1 //
   2 // Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return 3;  // rex.w, op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input,
 578 // it does if the polling page is more than disp32 away.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return Assembler::is_polling_page_far();
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += 1; // skip call opcode byte
 593   return round_to(current_offset, alignment_required()) - current_offset;
 594 }
 595 
 596 // The address of the call instruction needs to be 4-byte aligned to
 597 // ensure that it does not span a cache line so that it can be patched.
 598 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 599 {
 600   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 601   current_offset += 1; // skip call opcode byte
 602   return round_to(current_offset, alignment_required()) - current_offset;
 603 }
 604 
 605 // The address of the call instruction needs to be 4-byte aligned to
 606 // ensure that it does not span a cache line so that it can be patched.
 607 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 608 {
 609   current_offset += 11; // skip movq instruction + call opcode byte
 610   return round_to(current_offset, alignment_required()) - current_offset;
 611 }
 612 
 613 #ifndef PRODUCT
 614 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 615 {
 616   st->print("INT3");
 617 }
 618 #endif
 619 
 620 // EMIT_RM()
 621 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 622   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 623   cbuf.insts()->emit_int8(c);
 624 }
 625 
 626 // EMIT_CC()
 627 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 628   unsigned char c = (unsigned char) (f1 | f2);
 629   cbuf.insts()->emit_int8(c);
 630 }
 631 
 632 // EMIT_OPCODE()
 633 void emit_opcode(CodeBuffer &cbuf, int code) {
 634   cbuf.insts()->emit_int8((unsigned char) code);
 635 }
 636 
 637 // EMIT_OPCODE() w/ relocation information
 638 void emit_opcode(CodeBuffer &cbuf,
 639                  int code, relocInfo::relocType reloc, int offset, int format)
 640 {
 641   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 642   emit_opcode(cbuf, code);
 643 }
 644 
 645 // EMIT_D8()
 646 void emit_d8(CodeBuffer &cbuf, int d8) {
 647   cbuf.insts()->emit_int8((unsigned char) d8);
 648 }
 649 
 650 // EMIT_D16()
 651 void emit_d16(CodeBuffer &cbuf, int d16) {
 652   cbuf.insts()->emit_int16(d16);
 653 }
 654 
 655 // EMIT_D32()
 656 void emit_d32(CodeBuffer &cbuf, int d32) {
 657   cbuf.insts()->emit_int32(d32);
 658 }
 659 
 660 // EMIT_D64()
 661 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 662   cbuf.insts()->emit_int64(d64);
 663 }
 664 
 665 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 666 void emit_d32_reloc(CodeBuffer& cbuf,
 667                     int d32,
 668                     relocInfo::relocType reloc,
 669                     int format)
 670 {
 671   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 672   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 673   cbuf.insts()->emit_int32(d32);
 674 }
 675 
 676 // emit 32 bit value and construct relocation entry from RelocationHolder
 677 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 678 #ifdef ASSERT
 679   if (rspec.reloc()->type() == relocInfo::oop_type &&
 680       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 681     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 682   }
 683 #endif
 684   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 685   cbuf.insts()->emit_int32(d32);
 686 }
 687 
 688 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 689   address next_ip = cbuf.insts_end() + 4;
 690   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 691                  external_word_Relocation::spec(addr),
 692                  RELOC_DISP32);
 693 }
 694 
 695 
 696 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 697 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 698   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 699   cbuf.insts()->emit_int64(d64);
 700 }
 701 
 702 // emit 64 bit value and construct relocation entry from RelocationHolder
 703 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 704 #ifdef ASSERT
 705   if (rspec.reloc()->type() == relocInfo::oop_type &&
 706       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 707     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 708            "cannot embed scavengable oops in code");
 709   }
 710 #endif
 711   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 712   cbuf.insts()->emit_int64(d64);
 713 }
 714 
 715 // Access stack slot for load or store
 716 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 717 {
 718   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 719   if (-0x80 <= disp && disp < 0x80) {
 720     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 721     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 722     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 723   } else {
 724     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 725     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 726     emit_d32(cbuf, disp);     // Displacement // R/M byte
 727   }
 728 }
 729 
 730    // rRegI ereg, memory mem) %{    // emit_reg_mem
 731 void encode_RegMem(CodeBuffer &cbuf,
 732                    int reg,
 733                    int base, int index, int scale, int disp, bool disp_is_oop)
 734 {
 735   assert(!disp_is_oop, "cannot have disp");
 736   int regenc = reg & 7;
 737   int baseenc = base & 7;
 738   int indexenc = index & 7;
 739 
 740   // There is no index & no scale, use form without SIB byte
 741   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 742     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 743     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 744       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 745     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 746       // If 8-bit displacement, mode 0x1
 747       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 748       emit_d8(cbuf, disp);
 749     } else {
 750       // If 32-bit displacement
 751       if (base == -1) { // Special flag for absolute address
 752         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 753         if (disp_is_oop) {
 754           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 755         } else {
 756           emit_d32(cbuf, disp);
 757         }
 758       } else {
 759         // Normal base + offset
 760         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 761         if (disp_is_oop) {
 762           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 763         } else {
 764           emit_d32(cbuf, disp);
 765         }
 766       }
 767     }
 768   } else {
 769     // Else, encode with the SIB byte
 770     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 771     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 772       // If no displacement
 773       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 774       emit_rm(cbuf, scale, indexenc, baseenc);
 775     } else {
 776       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 777         // If 8-bit displacement, mode 0x1
 778         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 779         emit_rm(cbuf, scale, indexenc, baseenc);
 780         emit_d8(cbuf, disp);
 781       } else {
 782         // If 32-bit displacement
 783         if (base == 0x04 ) {
 784           emit_rm(cbuf, 0x2, regenc, 0x4);
 785           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 786         } else {
 787           emit_rm(cbuf, 0x2, regenc, 0x4);
 788           emit_rm(cbuf, scale, indexenc, baseenc); // *
 789         }
 790         if (disp_is_oop) {
 791           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 792         } else {
 793           emit_d32(cbuf, disp);
 794         }
 795       }
 796     }
 797   }
 798 }
 799 
 800 // This could be in MacroAssembler but it's fairly C2 specific
 801 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 802   Label exit;
 803   __ jccb(Assembler::noParity, exit);
 804   __ pushf();
 805   //
 806   // comiss/ucomiss instructions set ZF,PF,CF flags and
 807   // zero OF,AF,SF for NaN values.
 808   // Fixup flags by zeroing ZF,PF so that compare of NaN
 809   // values returns 'less than' result (CF is set).
 810   // Leave the rest of flags unchanged.
 811   //
 812   //    7 6 5 4 3 2 1 0
 813   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 814   //    0 0 1 0 1 0 1 1   (0x2B)
 815   //
 816   __ andq(Address(rsp, 0), 0xffffff2b);
 817   __ popf();
 818   __ bind(exit);
 819 }
 820 
 821 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 822   Label done;
 823   __ movl(dst, -1);
 824   __ jcc(Assembler::parity, done);
 825   __ jcc(Assembler::below, done);
 826   __ setb(Assembler::notEqual, dst);
 827   __ movzbl(dst, dst);
 828   __ bind(done);
 829 }
 830 
 831 
 832 //=============================================================================
 833 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 834 
 835 int Compile::ConstantTable::calculate_table_base_offset() const {
 836   return 0;  // absolute addressing, no offset
 837 }
 838 
 839 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 840   // Empty encoding
 841 }
 842 
 843 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 844   return 0;
 845 }
 846 
 847 #ifndef PRODUCT
 848 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 849   st->print("# MachConstantBaseNode (empty encoding)");
 850 }
 851 #endif
 852 
 853 
 854 //=============================================================================
 855 #ifndef PRODUCT
 856 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 857   Compile* C = ra_->C;
 858 
 859   int framesize = C->frame_slots() << LogBytesPerInt;
 860   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 861   // Remove wordSize for return addr which is already pushed.
 862   framesize -= wordSize;
 863 
 864   if (C->need_stack_bang(framesize)) {
 865     framesize -= wordSize;
 866     st->print("# stack bang");
 867     st->print("\n\t");
 868     st->print("pushq   rbp\t# Save rbp");
 869     if (framesize) {
 870       st->print("\n\t");
 871       st->print("subq    rsp, #%d\t# Create frame",framesize);
 872     }
 873   } else {
 874     st->print("subq    rsp, #%d\t# Create frame",framesize);
 875     st->print("\n\t");
 876     framesize -= wordSize;
 877     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 878   }
 879 
 880   if (VerifyStackAtCalls) {
 881     st->print("\n\t");
 882     framesize -= wordSize;
 883     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 884 #ifdef ASSERT
 885     st->print("\n\t");
 886     st->print("# stack alignment check");
 887 #endif
 888   }
 889   st->cr();
 890 }
 891 #endif
 892 
 893 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 894   Compile* C = ra_->C;
 895   MacroAssembler _masm(&cbuf);
 896 
 897   int framesize = C->frame_slots() << LogBytesPerInt;
 898 
 899   __ verified_entry(framesize, C->need_stack_bang(framesize), false);
 900 
 901   C->set_frame_complete(cbuf.insts_size());
 902 
 903   if (C->has_mach_constant_base_node()) {
 904     // NOTE: We set the table base offset here because users might be
 905     // emitted before MachConstantBaseNode.
 906     Compile::ConstantTable& constant_table = C->constant_table();
 907     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 908   }
 909 }
 910 
 911 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 912 {
 913   return MachNode::size(ra_); // too many variables; just compute it
 914                               // the hard way
 915 }
 916 
 917 int MachPrologNode::reloc() const
 918 {
 919   return 0; // a large enough number
 920 }
 921 
 922 //=============================================================================
 923 #ifndef PRODUCT
 924 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 925 {
 926   Compile* C = ra_->C;
 927   int framesize = C->frame_slots() << LogBytesPerInt;
 928   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 929   // Remove word for return adr already pushed
 930   // and RBP
 931   framesize -= 2*wordSize;
 932 
 933   if (framesize) {
 934     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 935     st->print("\t");
 936   }
 937 
 938   st->print_cr("popq   rbp");
 939   if (do_polling() && C->is_method_compilation()) {
 940     st->print("\t");
 941     if (Assembler::is_polling_page_far()) {
 942       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 943                    "testl  rax, [rscratch1]\t"
 944                    "# Safepoint: poll for GC");
 945     } else {
 946       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 947                    "# Safepoint: poll for GC");
 948     }
 949   }
 950 }
 951 #endif
 952 
 953 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 954 {
 955   Compile* C = ra_->C;
 956   int framesize = C->frame_slots() << LogBytesPerInt;
 957   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 958   // Remove word for return adr already pushed
 959   // and RBP
 960   framesize -= 2*wordSize;
 961 
 962   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 963 
 964   if (framesize) {
 965     emit_opcode(cbuf, Assembler::REX_W);
 966     if (framesize < 0x80) {
 967       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 968       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 969       emit_d8(cbuf, framesize);
 970     } else {
 971       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 972       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 973       emit_d32(cbuf, framesize);
 974     }
 975   }
 976 
 977   // popq rbp
 978   emit_opcode(cbuf, 0x58 | RBP_enc);
 979 
 980   if (do_polling() && C->is_method_compilation()) {
 981     MacroAssembler _masm(&cbuf);
 982     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 983     if (Assembler::is_polling_page_far()) {
 984       __ lea(rscratch1, polling_page);
 985       __ relocate(relocInfo::poll_return_type);
 986       __ testl(rax, Address(rscratch1, 0));
 987     } else {
 988       __ testl(rax, polling_page);
 989     }
 990   }
 991 }
 992 
 993 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 994 {
 995   return MachNode::size(ra_); // too many variables; just compute it
 996                               // the hard way
 997 }
 998 
 999 int MachEpilogNode::reloc() const
1000 {
1001   return 2; // a large enough number
1002 }
1003 
1004 const Pipeline* MachEpilogNode::pipeline() const
1005 {
1006   return MachNode::pipeline_class();
1007 }
1008 
1009 int MachEpilogNode::safepoint_offset() const
1010 {
1011   return 0;
1012 }
1013 
1014 //=============================================================================
1015 
1016 enum RC {
1017   rc_bad,
1018   rc_int,
1019   rc_float,
1020   rc_stack
1021 };
1022 
1023 static enum RC rc_class(OptoReg::Name reg)
1024 {
1025   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1026 
1027   if (OptoReg::is_stack(reg)) return rc_stack;
1028 
1029   VMReg r = OptoReg::as_VMReg(reg);
1030 
1031   if (r->is_Register()) return rc_int;
1032 
1033   assert(r->is_XMMRegister(), "must be");
1034   return rc_float;
1035 }
1036 
1037 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1038                                        PhaseRegAlloc* ra_,
1039                                        bool do_size,
1040                                        outputStream* st) const
1041 {
1042 
1043   // Get registers to move
1044   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1045   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1046   OptoReg::Name dst_second = ra_->get_reg_second(this);
1047   OptoReg::Name dst_first = ra_->get_reg_first(this);
1048 
1049   enum RC src_second_rc = rc_class(src_second);
1050   enum RC src_first_rc = rc_class(src_first);
1051   enum RC dst_second_rc = rc_class(dst_second);
1052   enum RC dst_first_rc = rc_class(dst_first);
1053 
1054   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1055          "must move at least 1 register" );
1056 
1057   if (src_first == dst_first && src_second == dst_second) {
1058     // Self copy, no move
1059     return 0;
1060   } else if (src_first_rc == rc_stack) {
1061     // mem ->
1062     if (dst_first_rc == rc_stack) {
1063       // mem -> mem
1064       assert(src_second != dst_first, "overlap");
1065       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1066           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1067         // 64-bit
1068         int src_offset = ra_->reg2offset(src_first);
1069         int dst_offset = ra_->reg2offset(dst_first);
1070         if (cbuf) {
1071           emit_opcode(*cbuf, 0xFF);
1072           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1073 
1074           emit_opcode(*cbuf, 0x8F);
1075           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1076 
1077 #ifndef PRODUCT
1078         } else if (!do_size) {
1079           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1080                      "popq    [rsp + #%d]",
1081                      src_offset,
1082                      dst_offset);
1083 #endif
1084         }
1085         return
1086           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1087           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1088       } else {
1089         // 32-bit
1090         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1091         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1092         // No pushl/popl, so:
1093         int src_offset = ra_->reg2offset(src_first);
1094         int dst_offset = ra_->reg2offset(dst_first);
1095         if (cbuf) {
1096           emit_opcode(*cbuf, Assembler::REX_W);
1097           emit_opcode(*cbuf, 0x89);
1098           emit_opcode(*cbuf, 0x44);
1099           emit_opcode(*cbuf, 0x24);
1100           emit_opcode(*cbuf, 0xF8);
1101 
1102           emit_opcode(*cbuf, 0x8B);
1103           encode_RegMem(*cbuf,
1104                         RAX_enc,
1105                         RSP_enc, 0x4, 0, src_offset,
1106                         false);
1107 
1108           emit_opcode(*cbuf, 0x89);
1109           encode_RegMem(*cbuf,
1110                         RAX_enc,
1111                         RSP_enc, 0x4, 0, dst_offset,
1112                         false);
1113 
1114           emit_opcode(*cbuf, Assembler::REX_W);
1115           emit_opcode(*cbuf, 0x8B);
1116           emit_opcode(*cbuf, 0x44);
1117           emit_opcode(*cbuf, 0x24);
1118           emit_opcode(*cbuf, 0xF8);
1119 
1120 #ifndef PRODUCT
1121         } else if (!do_size) {
1122           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1123                      "movl    rax, [rsp + #%d]\n\t"
1124                      "movl    [rsp + #%d], rax\n\t"
1125                      "movq    rax, [rsp - #8]",
1126                      src_offset,
1127                      dst_offset);
1128 #endif
1129         }
1130         return
1131           5 + // movq
1132           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1133           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1134           5; // movq
1135       }
1136     } else if (dst_first_rc == rc_int) {
1137       // mem -> gpr
1138       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1139           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1140         // 64-bit
1141         int offset = ra_->reg2offset(src_first);
1142         if (cbuf) {
1143           if (Matcher::_regEncode[dst_first] < 8) {
1144             emit_opcode(*cbuf, Assembler::REX_W);
1145           } else {
1146             emit_opcode(*cbuf, Assembler::REX_WR);
1147           }
1148           emit_opcode(*cbuf, 0x8B);
1149           encode_RegMem(*cbuf,
1150                         Matcher::_regEncode[dst_first],
1151                         RSP_enc, 0x4, 0, offset,
1152                         false);
1153 #ifndef PRODUCT
1154         } else if (!do_size) {
1155           st->print("movq    %s, [rsp + #%d]\t# spill",
1156                      Matcher::regName[dst_first],
1157                      offset);
1158 #endif
1159         }
1160         return
1161           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1162       } else {
1163         // 32-bit
1164         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1165         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1166         int offset = ra_->reg2offset(src_first);
1167         if (cbuf) {
1168           if (Matcher::_regEncode[dst_first] >= 8) {
1169             emit_opcode(*cbuf, Assembler::REX_R);
1170           }
1171           emit_opcode(*cbuf, 0x8B);
1172           encode_RegMem(*cbuf,
1173                         Matcher::_regEncode[dst_first],
1174                         RSP_enc, 0x4, 0, offset,
1175                         false);
1176 #ifndef PRODUCT
1177         } else if (!do_size) {
1178           st->print("movl    %s, [rsp + #%d]\t# spill",
1179                      Matcher::regName[dst_first],
1180                      offset);
1181 #endif
1182         }
1183         return
1184           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1185           ((Matcher::_regEncode[dst_first] < 8)
1186            ? 3
1187            : 4); // REX
1188       }
1189     } else if (dst_first_rc == rc_float) {
1190       // mem-> xmm
1191       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1192           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1193         // 64-bit
1194         int offset = ra_->reg2offset(src_first);
1195         if (cbuf) {
1196           MacroAssembler _masm(cbuf);
1197           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1198 #ifndef PRODUCT
1199         } else if (!do_size) {
1200           st->print("%s  %s, [rsp + #%d]\t# spill",
1201                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1202                      Matcher::regName[dst_first],
1203                      offset);
1204 #endif
1205         }
1206         return
1207           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1208           ((Matcher::_regEncode[dst_first] >= 8)
1209            ? 6
1210            : (5 + ((UseAVX>0)?1:0))); // REX
1211       } else {
1212         // 32-bit
1213         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1214         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1215         int offset = ra_->reg2offset(src_first);
1216         if (cbuf) {
1217           MacroAssembler _masm(cbuf);
1218           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1219 #ifndef PRODUCT
1220         } else if (!do_size) {
1221           st->print("movss   %s, [rsp + #%d]\t# spill",
1222                      Matcher::regName[dst_first],
1223                      offset);
1224 #endif
1225         }
1226         return
1227           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1228           ((Matcher::_regEncode[dst_first] >= 8)
1229            ? 6
1230            : (5 + ((UseAVX>0)?1:0))); // REX
1231       }
1232     }
1233   } else if (src_first_rc == rc_int) {
1234     // gpr ->
1235     if (dst_first_rc == rc_stack) {
1236       // gpr -> mem
1237       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1238           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1239         // 64-bit
1240         int offset = ra_->reg2offset(dst_first);
1241         if (cbuf) {
1242           if (Matcher::_regEncode[src_first] < 8) {
1243             emit_opcode(*cbuf, Assembler::REX_W);
1244           } else {
1245             emit_opcode(*cbuf, Assembler::REX_WR);
1246           }
1247           emit_opcode(*cbuf, 0x89);
1248           encode_RegMem(*cbuf,
1249                         Matcher::_regEncode[src_first],
1250                         RSP_enc, 0x4, 0, offset,
1251                         false);
1252 #ifndef PRODUCT
1253         } else if (!do_size) {
1254           st->print("movq    [rsp + #%d], %s\t# spill",
1255                      offset,
1256                      Matcher::regName[src_first]);
1257 #endif
1258         }
1259         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1260       } else {
1261         // 32-bit
1262         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1263         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1264         int offset = ra_->reg2offset(dst_first);
1265         if (cbuf) {
1266           if (Matcher::_regEncode[src_first] >= 8) {
1267             emit_opcode(*cbuf, Assembler::REX_R);
1268           }
1269           emit_opcode(*cbuf, 0x89);
1270           encode_RegMem(*cbuf,
1271                         Matcher::_regEncode[src_first],
1272                         RSP_enc, 0x4, 0, offset,
1273                         false);
1274 #ifndef PRODUCT
1275         } else if (!do_size) {
1276           st->print("movl    [rsp + #%d], %s\t# spill",
1277                      offset,
1278                      Matcher::regName[src_first]);
1279 #endif
1280         }
1281         return
1282           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1283           ((Matcher::_regEncode[src_first] < 8)
1284            ? 3
1285            : 4); // REX
1286       }
1287     } else if (dst_first_rc == rc_int) {
1288       // gpr -> gpr
1289       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1290           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1291         // 64-bit
1292         if (cbuf) {
1293           if (Matcher::_regEncode[dst_first] < 8) {
1294             if (Matcher::_regEncode[src_first] < 8) {
1295               emit_opcode(*cbuf, Assembler::REX_W);
1296             } else {
1297               emit_opcode(*cbuf, Assembler::REX_WB);
1298             }
1299           } else {
1300             if (Matcher::_regEncode[src_first] < 8) {
1301               emit_opcode(*cbuf, Assembler::REX_WR);
1302             } else {
1303               emit_opcode(*cbuf, Assembler::REX_WRB);
1304             }
1305           }
1306           emit_opcode(*cbuf, 0x8B);
1307           emit_rm(*cbuf, 0x3,
1308                   Matcher::_regEncode[dst_first] & 7,
1309                   Matcher::_regEncode[src_first] & 7);
1310 #ifndef PRODUCT
1311         } else if (!do_size) {
1312           st->print("movq    %s, %s\t# spill",
1313                      Matcher::regName[dst_first],
1314                      Matcher::regName[src_first]);
1315 #endif
1316         }
1317         return 3; // REX
1318       } else {
1319         // 32-bit
1320         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1321         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1322         if (cbuf) {
1323           if (Matcher::_regEncode[dst_first] < 8) {
1324             if (Matcher::_regEncode[src_first] >= 8) {
1325               emit_opcode(*cbuf, Assembler::REX_B);
1326             }
1327           } else {
1328             if (Matcher::_regEncode[src_first] < 8) {
1329               emit_opcode(*cbuf, Assembler::REX_R);
1330             } else {
1331               emit_opcode(*cbuf, Assembler::REX_RB);
1332             }
1333           }
1334           emit_opcode(*cbuf, 0x8B);
1335           emit_rm(*cbuf, 0x3,
1336                   Matcher::_regEncode[dst_first] & 7,
1337                   Matcher::_regEncode[src_first] & 7);
1338 #ifndef PRODUCT
1339         } else if (!do_size) {
1340           st->print("movl    %s, %s\t# spill",
1341                      Matcher::regName[dst_first],
1342                      Matcher::regName[src_first]);
1343 #endif
1344         }
1345         return
1346           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1347           ? 2
1348           : 3; // REX
1349       }
1350     } else if (dst_first_rc == rc_float) {
1351       // gpr -> xmm
1352       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1353           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1354         // 64-bit
1355         if (cbuf) {
1356           MacroAssembler _masm(cbuf);
1357           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1358 #ifndef PRODUCT
1359         } else if (!do_size) {
1360           st->print("movdq   %s, %s\t# spill",
1361                      Matcher::regName[dst_first],
1362                      Matcher::regName[src_first]);
1363 #endif
1364         }
1365         return 5; // REX
1366       } else {
1367         // 32-bit
1368         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1369         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1370         if (cbuf) {
1371           MacroAssembler _masm(cbuf);
1372           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1373 #ifndef PRODUCT
1374         } else if (!do_size) {
1375           st->print("movdl   %s, %s\t# spill",
1376                      Matcher::regName[dst_first],
1377                      Matcher::regName[src_first]);
1378 #endif
1379         }
1380         return
1381           (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
1382           ? 5
1383           : (4 + ((UseAVX>0)?1:0)); // REX
1384       }
1385     }
1386   } else if (src_first_rc == rc_float) {
1387     // xmm ->
1388     if (dst_first_rc == rc_stack) {
1389       // xmm -> mem
1390       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1391           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1392         // 64-bit
1393         int offset = ra_->reg2offset(dst_first);
1394         if (cbuf) {
1395           MacroAssembler _masm(cbuf);
1396           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1397 #ifndef PRODUCT
1398         } else if (!do_size) {
1399           st->print("movsd   [rsp + #%d], %s\t# spill",
1400                      offset,
1401                      Matcher::regName[src_first]);
1402 #endif
1403         }
1404         return
1405           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1406           ((Matcher::_regEncode[src_first] >= 8)
1407            ? 6
1408            : (5 + ((UseAVX>0)?1:0))); // REX
1409       } else {
1410         // 32-bit
1411         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1412         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1413         int offset = ra_->reg2offset(dst_first);
1414         if (cbuf) {
1415           MacroAssembler _masm(cbuf);
1416           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1417 #ifndef PRODUCT
1418         } else if (!do_size) {
1419           st->print("movss   [rsp + #%d], %s\t# spill",
1420                      offset,
1421                      Matcher::regName[src_first]);
1422 #endif
1423         }
1424         return
1425           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1426           ((Matcher::_regEncode[src_first] >=8)
1427            ? 6
1428            : (5 + ((UseAVX>0)?1:0))); // REX
1429       }
1430     } else if (dst_first_rc == rc_int) {
1431       // xmm -> gpr
1432       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1433           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1434         // 64-bit
1435         if (cbuf) {
1436           MacroAssembler _masm(cbuf);
1437           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1438 #ifndef PRODUCT
1439         } else if (!do_size) {
1440           st->print("movdq   %s, %s\t# spill",
1441                      Matcher::regName[dst_first],
1442                      Matcher::regName[src_first]);
1443 #endif
1444         }
1445         return 5; // REX
1446       } else {
1447         // 32-bit
1448         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1449         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1450         if (cbuf) {
1451           MacroAssembler _masm(cbuf);
1452           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1453 #ifndef PRODUCT
1454         } else if (!do_size) {
1455           st->print("movdl   %s, %s\t# spill",
1456                      Matcher::regName[dst_first],
1457                      Matcher::regName[src_first]);
1458 #endif
1459         }
1460         return
1461           (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
1462           ? 5
1463           : (4 + ((UseAVX>0)?1:0)); // REX
1464       }
1465     } else if (dst_first_rc == rc_float) {
1466       // xmm -> xmm
1467       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1468           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1469         // 64-bit
1470         if (cbuf) {
1471           MacroAssembler _masm(cbuf);
1472           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1473 #ifndef PRODUCT
1474         } else if (!do_size) {
1475           st->print("%s  %s, %s\t# spill",
1476                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1477                      Matcher::regName[dst_first],
1478                      Matcher::regName[src_first]);
1479 #endif
1480         }
1481         return
1482           (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
1483           ? 5
1484           : (4 + ((UseAVX>0)?1:0)); // REX
1485       } else {
1486         // 32-bit
1487         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1488         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1489         if (cbuf) {
1490           MacroAssembler _masm(cbuf);
1491           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1492 #ifndef PRODUCT
1493         } else if (!do_size) {
1494           st->print("%s  %s, %s\t# spill",
1495                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1496                      Matcher::regName[dst_first],
1497                      Matcher::regName[src_first]);
1498 #endif
1499         }
1500         return ((UseAVX>0) ? 5:
1501           ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
1502            ? (UseXmmRegToRegMoveAll ? 4 : 5)
1503            : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
1504       }
1505     }
1506   }
1507 
1508   assert(0," foo ");
1509   Unimplemented();
1510 
1511   return 0;
1512 }
1513 
1514 #ifndef PRODUCT
1515 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1516 {
1517   implementation(NULL, ra_, false, st);
1518 }
1519 #endif
1520 
1521 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1522 {
1523   implementation(&cbuf, ra_, false, NULL);
1524 }
1525 
1526 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1527 {
1528   return implementation(NULL, ra_, true, NULL);
1529 }
1530 
1531 //=============================================================================
1532 #ifndef PRODUCT
1533 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1534 {
1535   st->print("nop \t# %d bytes pad for loops and calls", _count);
1536 }
1537 #endif
1538 
1539 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1540 {
1541   MacroAssembler _masm(&cbuf);
1542   __ nop(_count);
1543 }
1544 
1545 uint MachNopNode::size(PhaseRegAlloc*) const
1546 {
1547   return _count;
1548 }
1549 
1550 
1551 //=============================================================================
1552 #ifndef PRODUCT
1553 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1554 {
1555   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1556   int reg = ra_->get_reg_first(this);
1557   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1558             Matcher::regName[reg], offset);
1559 }
1560 #endif
1561 
1562 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1563 {
1564   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1565   int reg = ra_->get_encode(this);
1566   if (offset >= 0x80) {
1567     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1568     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1569     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1570     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1571     emit_d32(cbuf, offset);
1572   } else {
1573     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1574     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1575     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1576     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1577     emit_d8(cbuf, offset);
1578   }
1579 }
1580 
1581 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1582 {
1583   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1584   return (offset < 0x80) ? 5 : 8; // REX
1585 }
1586 
1587 //=============================================================================
1588 
1589 // emit call stub, compiled java to interpreter
1590 void emit_java_to_interp(CodeBuffer& cbuf)
1591 {
1592   // Stub is fixed up when the corresponding call is converted from
1593   // calling compiled code to calling interpreted code.
1594   // movq rbx, 0
1595   // jmp -5 # to self
1596 
1597   address mark = cbuf.insts_mark();  // get mark within main instrs section
1598 
1599   // Note that the code buffer's insts_mark is always relative to insts.
1600   // That's why we must use the macroassembler to generate a stub.
1601   MacroAssembler _masm(&cbuf);
1602 
1603   address base =
1604   __ start_a_stub(Compile::MAX_stubs_size);
1605   if (base == NULL)  return;  // CodeBuffer::expand failed
1606   // static stub relocation stores the instruction address of the call
1607   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1608   // static stub relocation also tags the methodOop in the code-stream.
1609   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1610   // This is recognized as unresolved by relocs/nativeinst/ic code
1611   __ jump(RuntimeAddress(__ pc()));
1612 
1613   // Update current stubs pointer and restore insts_end.
1614   __ end_a_stub();
1615 }
1616 
1617 // size of call stub, compiled java to interpretor
1618 uint size_java_to_interp()
1619 {
1620   return 15;  // movq (1+1+8); jmp (1+4)
1621 }
1622 
1623 // relocation entries for call stub, compiled java to interpretor
1624 uint reloc_java_to_interp()
1625 {
1626   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1627 }
1628 
1629 //=============================================================================
1630 #ifndef PRODUCT
1631 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1632 {
1633   if (UseCompressedOops) {
1634     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1635     if (Universe::narrow_oop_shift() != 0) {
1636       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1637     }
1638     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1639   } else {
1640     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1641                  "# Inline cache check");
1642   }
1643   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1644   st->print_cr("\tnop\t# nops to align entry point");
1645 }
1646 #endif
1647 
1648 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1649 {
1650   MacroAssembler masm(&cbuf);
1651   uint insts_size = cbuf.insts_size();
1652   if (UseCompressedOops) {
1653     masm.load_klass(rscratch1, j_rarg0);
1654     masm.cmpptr(rax, rscratch1);
1655   } else {
1656     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1657   }
1658 
1659   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1660 
1661   /* WARNING these NOPs are critical so that verified entry point is properly
1662      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1663   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1664   if (OptoBreakpoint) {
1665     // Leave space for int3
1666     nops_cnt -= 1;
1667   }
1668   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1669   if (nops_cnt > 0)
1670     masm.nop(nops_cnt);
1671 }
1672 
1673 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1674 {
1675   return MachNode::size(ra_); // too many variables; just compute it
1676                               // the hard way
1677 }
1678 
1679 
1680 //=============================================================================
1681 uint size_exception_handler()
1682 {
1683   // NativeCall instruction size is the same as NativeJump.
1684   // Note that this value is also credited (in output.cpp) to
1685   // the size of the code section.
1686   return NativeJump::instruction_size;
1687 }
1688 
1689 // Emit exception handler code.
1690 int emit_exception_handler(CodeBuffer& cbuf)
1691 {
1692 
1693   // Note that the code buffer's insts_mark is always relative to insts.
1694   // That's why we must use the macroassembler to generate a handler.
1695   MacroAssembler _masm(&cbuf);
1696   address base =
1697   __ start_a_stub(size_exception_handler());
1698   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1699   int offset = __ offset();
1700   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1701   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1702   __ end_a_stub();
1703   return offset;
1704 }
1705 
1706 uint size_deopt_handler()
1707 {
1708   // three 5 byte instructions
1709   return 15;
1710 }
1711 
1712 // Emit deopt handler code.
1713 int emit_deopt_handler(CodeBuffer& cbuf)
1714 {
1715 
1716   // Note that the code buffer's insts_mark is always relative to insts.
1717   // That's why we must use the macroassembler to generate a handler.
1718   MacroAssembler _masm(&cbuf);
1719   address base =
1720   __ start_a_stub(size_deopt_handler());
1721   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1722   int offset = __ offset();
1723   address the_pc = (address) __ pc();
1724   Label next;
1725   // push a "the_pc" on the stack without destroying any registers
1726   // as they all may be live.
1727 
1728   // push address of "next"
1729   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1730   __ bind(next);
1731   // adjust it so it matches "the_pc"
1732   __ subptr(Address(rsp, 0), __ offset() - offset);
1733   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1734   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1735   __ end_a_stub();
1736   return offset;
1737 }
1738 
1739 
1740 const bool Matcher::match_rule_supported(int opcode) {
1741   if (!has_match_rule(opcode))
1742     return false;
1743 
1744   return true;  // Per default match rules are supported.
1745 }
1746 
1747 int Matcher::regnum_to_fpu_offset(int regnum)
1748 {
1749   return regnum - 32; // The FP registers are in the second chunk
1750 }
1751 
1752 // This is UltraSparc specific, true just means we have fast l2f conversion
1753 const bool Matcher::convL2FSupported(void) {
1754   return true;
1755 }
1756 
1757 // Vector width in bytes
1758 const uint Matcher::vector_width_in_bytes(void) {
1759   return 8;
1760 }
1761 
1762 // Vector ideal reg
1763 const uint Matcher::vector_ideal_reg(void) {
1764   return Op_RegD;
1765 }
1766 
1767 // Is this branch offset short enough that a short branch can be used?
1768 //
1769 // NOTE: If the platform does not provide any short branch variants, then
1770 //       this method should return false for offset 0.
1771 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1772   // The passed offset is relative to address of the branch.
1773   // On 86 a branch displacement is calculated relative to address
1774   // of a next instruction.
1775   offset -= br_size;
1776 
1777   // the short version of jmpConUCF2 contains multiple branches,
1778   // making the reach slightly less
1779   if (rule == jmpConUCF2_rule)
1780     return (-126 <= offset && offset <= 125);
1781   return (-128 <= offset && offset <= 127);
1782 }
1783 
1784 const bool Matcher::isSimpleConstant64(jlong value) {
1785   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1786   //return value == (int) value;  // Cf. storeImmL and immL32.
1787 
1788   // Probably always true, even if a temp register is required.
1789   return true;
1790 }
1791 
1792 // The ecx parameter to rep stosq for the ClearArray node is in words.
1793 const bool Matcher::init_array_count_is_in_bytes = false;
1794 
1795 // Threshold size for cleararray.
1796 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1797 
1798 // No additional cost for CMOVL.
1799 const int Matcher::long_cmove_cost() { return 0; }
1800 
1801 // No CMOVF/CMOVD with SSE2
1802 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1803 
1804 // Should the Matcher clone shifts on addressing modes, expecting them
1805 // to be subsumed into complex addressing expressions or compute them
1806 // into registers?  True for Intel but false for most RISCs
1807 const bool Matcher::clone_shift_expressions = true;
1808 
1809 // Do we need to mask the count passed to shift instructions or does
1810 // the cpu only look at the lower 5/6 bits anyway?
1811 const bool Matcher::need_masked_shift_count = false;
1812 
1813 bool Matcher::narrow_oop_use_complex_address() {
1814   assert(UseCompressedOops, "only for compressed oops code");
1815   return (LogMinObjAlignmentInBytes <= 3);
1816 }
1817 
1818 // Is it better to copy float constants, or load them directly from
1819 // memory?  Intel can load a float constant from a direct address,
1820 // requiring no extra registers.  Most RISCs will have to materialize
1821 // an address into a register first, so they would do better to copy
1822 // the constant from stack.
1823 const bool Matcher::rematerialize_float_constants = true; // XXX
1824 
1825 // If CPU can load and store mis-aligned doubles directly then no
1826 // fixup is needed.  Else we split the double into 2 integer pieces
1827 // and move it piece-by-piece.  Only happens when passing doubles into
1828 // C code as the Java calling convention forces doubles to be aligned.
1829 const bool Matcher::misaligned_doubles_ok = true;
1830 
1831 // No-op on amd64
1832 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1833 
1834 // Advertise here if the CPU requires explicit rounding operations to
1835 // implement the UseStrictFP mode.
1836 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1837 
1838 // Are floats conerted to double when stored to stack during deoptimization?
1839 // On x64 it is stored without convertion so we can use normal access.
1840 bool Matcher::float_in_double() { return false; }
1841 
1842 // Do ints take an entire long register or just half?
1843 const bool Matcher::int_in_long = true;
1844 
1845 // Return whether or not this register is ever used as an argument.
1846 // This function is used on startup to build the trampoline stubs in
1847 // generateOptoStub.  Registers not mentioned will be killed by the VM
1848 // call in the trampoline, and arguments in those registers not be
1849 // available to the callee.
1850 bool Matcher::can_be_java_arg(int reg)
1851 {
1852   return
1853     reg ==  RDI_num || reg ==  RDI_H_num ||
1854     reg ==  RSI_num || reg ==  RSI_H_num ||
1855     reg ==  RDX_num || reg ==  RDX_H_num ||
1856     reg ==  RCX_num || reg ==  RCX_H_num ||
1857     reg ==   R8_num || reg ==   R8_H_num ||
1858     reg ==   R9_num || reg ==   R9_H_num ||
1859     reg ==  R12_num || reg ==  R12_H_num ||
1860     reg == XMM0_num || reg == XMM0_H_num ||
1861     reg == XMM1_num || reg == XMM1_H_num ||
1862     reg == XMM2_num || reg == XMM2_H_num ||
1863     reg == XMM3_num || reg == XMM3_H_num ||
1864     reg == XMM4_num || reg == XMM4_H_num ||
1865     reg == XMM5_num || reg == XMM5_H_num ||
1866     reg == XMM6_num || reg == XMM6_H_num ||
1867     reg == XMM7_num || reg == XMM7_H_num;
1868 }
1869 
1870 bool Matcher::is_spillable_arg(int reg)
1871 {
1872   return can_be_java_arg(reg);
1873 }
1874 
1875 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1876   // In 64 bit mode a code which use multiply when
1877   // devisor is constant is faster than hardware
1878   // DIV instruction (it uses MulHiL).
1879   return false;
1880 }
1881 
1882 // Register for DIVI projection of divmodI
1883 RegMask Matcher::divI_proj_mask() {
1884   return INT_RAX_REG_mask();
1885 }
1886 
1887 // Register for MODI projection of divmodI
1888 RegMask Matcher::modI_proj_mask() {
1889   return INT_RDX_REG_mask();
1890 }
1891 
1892 // Register for DIVL projection of divmodL
1893 RegMask Matcher::divL_proj_mask() {
1894   return LONG_RAX_REG_mask();
1895 }
1896 
1897 // Register for MODL projection of divmodL
1898 RegMask Matcher::modL_proj_mask() {
1899   return LONG_RDX_REG_mask();
1900 }
1901 
1902 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1903   return PTR_RBP_REG_mask();
1904 }
1905 
1906 static Address build_address(int b, int i, int s, int d) {
1907   Register index = as_Register(i);
1908   Address::ScaleFactor scale = (Address::ScaleFactor)s;
1909   if (index == rsp) {
1910     index = noreg;
1911     scale = Address::no_scale;
1912   }
1913   Address addr(as_Register(b), index, scale, d);
1914   return addr;
1915 }
1916 
1917 %}
1918 
1919 //----------ENCODING BLOCK-----------------------------------------------------
1920 // This block specifies the encoding classes used by the compiler to
1921 // output byte streams.  Encoding classes are parameterized macros
1922 // used by Machine Instruction Nodes in order to generate the bit
1923 // encoding of the instruction.  Operands specify their base encoding
1924 // interface with the interface keyword.  There are currently
1925 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1926 // COND_INTER.  REG_INTER causes an operand to generate a function
1927 // which returns its register number when queried.  CONST_INTER causes
1928 // an operand to generate a function which returns the value of the
1929 // constant when queried.  MEMORY_INTER causes an operand to generate
1930 // four functions which return the Base Register, the Index Register,
1931 // the Scale Value, and the Offset Value of the operand when queried.
1932 // COND_INTER causes an operand to generate six functions which return
1933 // the encoding code (ie - encoding bits for the instruction)
1934 // associated with each basic boolean condition for a conditional
1935 // instruction.
1936 //
1937 // Instructions specify two basic values for encoding.  Again, a
1938 // function is available to check if the constant displacement is an
1939 // oop. They use the ins_encode keyword to specify their encoding
1940 // classes (which must be a sequence of enc_class names, and their
1941 // parameters, specified in the encoding block), and they use the
1942 // opcode keyword to specify, in order, their primary, secondary, and
1943 // tertiary opcode.  Only the opcode sections which a particular
1944 // instruction needs for encoding need to be specified.
1945 encode %{
1946   // Build emit functions for each basic byte or larger field in the
1947   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1948   // from C++ code in the enc_class source block.  Emit functions will
1949   // live in the main source block for now.  In future, we can
1950   // generalize this by adding a syntax that specifies the sizes of
1951   // fields in an order, so that the adlc can build the emit functions
1952   // automagically
1953 
1954   // Emit primary opcode
1955   enc_class OpcP
1956   %{
1957     emit_opcode(cbuf, $primary);
1958   %}
1959 
1960   // Emit secondary opcode
1961   enc_class OpcS
1962   %{
1963     emit_opcode(cbuf, $secondary);
1964   %}
1965 
1966   // Emit tertiary opcode
1967   enc_class OpcT
1968   %{
1969     emit_opcode(cbuf, $tertiary);
1970   %}
1971 
1972   // Emit opcode directly
1973   enc_class Opcode(immI d8)
1974   %{
1975     emit_opcode(cbuf, $d8$$constant);
1976   %}
1977 
1978   // Emit size prefix
1979   enc_class SizePrefix
1980   %{
1981     emit_opcode(cbuf, 0x66);
1982   %}
1983 
1984   enc_class reg(rRegI reg)
1985   %{
1986     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1987   %}
1988 
1989   enc_class reg_reg(rRegI dst, rRegI src)
1990   %{
1991     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1992   %}
1993 
1994   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1995   %{
1996     emit_opcode(cbuf, $opcode$$constant);
1997     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1998   %}
1999 
2000   enc_class cdql_enc(no_rax_rdx_RegI div)
2001   %{
2002     // Full implementation of Java idiv and irem; checks for
2003     // special case as described in JVM spec., p.243 & p.271.
2004     //
2005     //         normal case                           special case
2006     //
2007     // input : rax: dividend                         min_int
2008     //         reg: divisor                          -1
2009     //
2010     // output: rax: quotient  (= rax idiv reg)       min_int
2011     //         rdx: remainder (= rax irem reg)       0
2012     //
2013     //  Code sequnce:
2014     //
2015     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2016     //    5:   75 07/08                jne    e <normal>
2017     //    7:   33 d2                   xor    %edx,%edx
2018     //  [div >= 8 -> offset + 1]
2019     //  [REX_B]
2020     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2021     //    c:   74 03/04                je     11 <done>
2022     // 000000000000000e <normal>:
2023     //    e:   99                      cltd
2024     //  [div >= 8 -> offset + 1]
2025     //  [REX_B]
2026     //    f:   f7 f9                   idiv   $div
2027     // 0000000000000011 <done>:
2028 
2029     // cmp    $0x80000000,%eax
2030     emit_opcode(cbuf, 0x3d);
2031     emit_d8(cbuf, 0x00);
2032     emit_d8(cbuf, 0x00);
2033     emit_d8(cbuf, 0x00);
2034     emit_d8(cbuf, 0x80);
2035 
2036     // jne    e <normal>
2037     emit_opcode(cbuf, 0x75);
2038     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2039 
2040     // xor    %edx,%edx
2041     emit_opcode(cbuf, 0x33);
2042     emit_d8(cbuf, 0xD2);
2043 
2044     // cmp    $0xffffffffffffffff,%ecx
2045     if ($div$$reg >= 8) {
2046       emit_opcode(cbuf, Assembler::REX_B);
2047     }
2048     emit_opcode(cbuf, 0x83);
2049     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2050     emit_d8(cbuf, 0xFF);
2051 
2052     // je     11 <done>
2053     emit_opcode(cbuf, 0x74);
2054     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2055 
2056     // <normal>
2057     // cltd
2058     emit_opcode(cbuf, 0x99);
2059 
2060     // idivl (note: must be emitted by the user of this rule)
2061     // <done>
2062   %}
2063 
2064   enc_class cdqq_enc(no_rax_rdx_RegL div)
2065   %{
2066     // Full implementation of Java ldiv and lrem; checks for
2067     // special case as described in JVM spec., p.243 & p.271.
2068     //
2069     //         normal case                           special case
2070     //
2071     // input : rax: dividend                         min_long
2072     //         reg: divisor                          -1
2073     //
2074     // output: rax: quotient  (= rax idiv reg)       min_long
2075     //         rdx: remainder (= rax irem reg)       0
2076     //
2077     //  Code sequnce:
2078     //
2079     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2080     //    7:   00 00 80
2081     //    a:   48 39 d0                cmp    %rdx,%rax
2082     //    d:   75 08                   jne    17 <normal>
2083     //    f:   33 d2                   xor    %edx,%edx
2084     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2085     //   15:   74 05                   je     1c <done>
2086     // 0000000000000017 <normal>:
2087     //   17:   48 99                   cqto
2088     //   19:   48 f7 f9                idiv   $div
2089     // 000000000000001c <done>:
2090 
2091     // mov    $0x8000000000000000,%rdx
2092     emit_opcode(cbuf, Assembler::REX_W);
2093     emit_opcode(cbuf, 0xBA);
2094     emit_d8(cbuf, 0x00);
2095     emit_d8(cbuf, 0x00);
2096     emit_d8(cbuf, 0x00);
2097     emit_d8(cbuf, 0x00);
2098     emit_d8(cbuf, 0x00);
2099     emit_d8(cbuf, 0x00);
2100     emit_d8(cbuf, 0x00);
2101     emit_d8(cbuf, 0x80);
2102 
2103     // cmp    %rdx,%rax
2104     emit_opcode(cbuf, Assembler::REX_W);
2105     emit_opcode(cbuf, 0x39);
2106     emit_d8(cbuf, 0xD0);
2107 
2108     // jne    17 <normal>
2109     emit_opcode(cbuf, 0x75);
2110     emit_d8(cbuf, 0x08);
2111 
2112     // xor    %edx,%edx
2113     emit_opcode(cbuf, 0x33);
2114     emit_d8(cbuf, 0xD2);
2115 
2116     // cmp    $0xffffffffffffffff,$div
2117     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2118     emit_opcode(cbuf, 0x83);
2119     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2120     emit_d8(cbuf, 0xFF);
2121 
2122     // je     1e <done>
2123     emit_opcode(cbuf, 0x74);
2124     emit_d8(cbuf, 0x05);
2125 
2126     // <normal>
2127     // cqto
2128     emit_opcode(cbuf, Assembler::REX_W);
2129     emit_opcode(cbuf, 0x99);
2130 
2131     // idivq (note: must be emitted by the user of this rule)
2132     // <done>
2133   %}
2134 
2135   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2136   enc_class OpcSE(immI imm)
2137   %{
2138     // Emit primary opcode and set sign-extend bit
2139     // Check for 8-bit immediate, and set sign extend bit in opcode
2140     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2141       emit_opcode(cbuf, $primary | 0x02);
2142     } else {
2143       // 32-bit immediate
2144       emit_opcode(cbuf, $primary);
2145     }
2146   %}
2147 
2148   enc_class OpcSErm(rRegI dst, immI imm)
2149   %{
2150     // OpcSEr/m
2151     int dstenc = $dst$$reg;
2152     if (dstenc >= 8) {
2153       emit_opcode(cbuf, Assembler::REX_B);
2154       dstenc -= 8;
2155     }
2156     // Emit primary opcode and set sign-extend bit
2157     // Check for 8-bit immediate, and set sign extend bit in opcode
2158     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2159       emit_opcode(cbuf, $primary | 0x02);
2160     } else {
2161       // 32-bit immediate
2162       emit_opcode(cbuf, $primary);
2163     }
2164     // Emit r/m byte with secondary opcode, after primary opcode.
2165     emit_rm(cbuf, 0x3, $secondary, dstenc);
2166   %}
2167 
2168   enc_class OpcSErm_wide(rRegL dst, immI imm)
2169   %{
2170     // OpcSEr/m
2171     int dstenc = $dst$$reg;
2172     if (dstenc < 8) {
2173       emit_opcode(cbuf, Assembler::REX_W);
2174     } else {
2175       emit_opcode(cbuf, Assembler::REX_WB);
2176       dstenc -= 8;
2177     }
2178     // Emit primary opcode and set sign-extend bit
2179     // Check for 8-bit immediate, and set sign extend bit in opcode
2180     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2181       emit_opcode(cbuf, $primary | 0x02);
2182     } else {
2183       // 32-bit immediate
2184       emit_opcode(cbuf, $primary);
2185     }
2186     // Emit r/m byte with secondary opcode, after primary opcode.
2187     emit_rm(cbuf, 0x3, $secondary, dstenc);
2188   %}
2189 
2190   enc_class Con8or32(immI imm)
2191   %{
2192     // Check for 8-bit immediate, and set sign extend bit in opcode
2193     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2194       $$$emit8$imm$$constant;
2195     } else {
2196       // 32-bit immediate
2197       $$$emit32$imm$$constant;
2198     }
2199   %}
2200 
2201   enc_class opc2_reg(rRegI dst)
2202   %{
2203     // BSWAP
2204     emit_cc(cbuf, $secondary, $dst$$reg);
2205   %}
2206 
2207   enc_class opc3_reg(rRegI dst)
2208   %{
2209     // BSWAP
2210     emit_cc(cbuf, $tertiary, $dst$$reg);
2211   %}
2212 
2213   enc_class reg_opc(rRegI div)
2214   %{
2215     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2216     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2217   %}
2218 
2219   enc_class enc_cmov(cmpOp cop)
2220   %{
2221     // CMOV
2222     $$$emit8$primary;
2223     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2224   %}
2225 
2226   enc_class enc_PartialSubtypeCheck()
2227   %{
2228     Register Rrdi = as_Register(RDI_enc); // result register
2229     Register Rrax = as_Register(RAX_enc); // super class
2230     Register Rrcx = as_Register(RCX_enc); // killed
2231     Register Rrsi = as_Register(RSI_enc); // sub class
2232     Label miss;
2233     const bool set_cond_codes = true;
2234 
2235     MacroAssembler _masm(&cbuf);
2236     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2237                                      NULL, &miss,
2238                                      /*set_cond_codes:*/ true);
2239     if ($primary) {
2240       __ xorptr(Rrdi, Rrdi);
2241     }
2242     __ bind(miss);
2243   %}
2244 
2245   enc_class Java_To_Interpreter(method meth)
2246   %{
2247     // CALL Java_To_Interpreter
2248     // This is the instruction starting address for relocation info.
2249     cbuf.set_insts_mark();
2250     $$$emit8$primary;
2251     // CALL directly to the runtime
2252     emit_d32_reloc(cbuf,
2253                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2254                    runtime_call_Relocation::spec(),
2255                    RELOC_DISP32);
2256   %}
2257 
2258   enc_class preserve_SP %{
2259     debug_only(int off0 = cbuf.insts_size());
2260     MacroAssembler _masm(&cbuf);
2261     // RBP is preserved across all calls, even compiled calls.
2262     // Use it to preserve RSP in places where the callee might change the SP.
2263     __ movptr(rbp_mh_SP_save, rsp);
2264     debug_only(int off1 = cbuf.insts_size());
2265     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2266   %}
2267 
2268   enc_class restore_SP %{
2269     MacroAssembler _masm(&cbuf);
2270     __ movptr(rsp, rbp_mh_SP_save);
2271   %}
2272 
2273   enc_class Java_Static_Call(method meth)
2274   %{
2275     // JAVA STATIC CALL
2276     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2277     // determine who we intended to call.
2278     cbuf.set_insts_mark();
2279     $$$emit8$primary;
2280 
2281     if (!_method) {
2282       emit_d32_reloc(cbuf,
2283                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2284                      runtime_call_Relocation::spec(),
2285                      RELOC_DISP32);
2286     } else if (_optimized_virtual) {
2287       emit_d32_reloc(cbuf,
2288                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2289                      opt_virtual_call_Relocation::spec(),
2290                      RELOC_DISP32);
2291     } else {
2292       emit_d32_reloc(cbuf,
2293                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2294                      static_call_Relocation::spec(),
2295                      RELOC_DISP32);
2296     }
2297     if (_method) {
2298       // Emit stub for static call
2299       emit_java_to_interp(cbuf);
2300     }
2301   %}
2302 
2303   enc_class Java_Dynamic_Call(method meth)
2304   %{
2305     // JAVA DYNAMIC CALL
2306     // !!!!!
2307     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2308     // emit_call_dynamic_prologue( cbuf );
2309     cbuf.set_insts_mark();
2310 
2311     // movq rax, -1
2312     emit_opcode(cbuf, Assembler::REX_W);
2313     emit_opcode(cbuf, 0xB8 | RAX_enc);
2314     emit_d64_reloc(cbuf,
2315                    (int64_t) Universe::non_oop_word(),
2316                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2317     address virtual_call_oop_addr = cbuf.insts_mark();
2318     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2319     // who we intended to call.
2320     cbuf.set_insts_mark();
2321     $$$emit8$primary;
2322     emit_d32_reloc(cbuf,
2323                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2324                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2325                    RELOC_DISP32);
2326   %}
2327 
2328   enc_class Java_Compiled_Call(method meth)
2329   %{
2330     // JAVA COMPILED CALL
2331     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2332 
2333     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2334     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2335 
2336     // callq *disp(%rax)
2337     cbuf.set_insts_mark();
2338     $$$emit8$primary;
2339     if (disp < 0x80) {
2340       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2341       emit_d8(cbuf, disp); // Displacement
2342     } else {
2343       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2344       emit_d32(cbuf, disp); // Displacement
2345     }
2346   %}
2347 
2348   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2349   %{
2350     // SAL, SAR, SHR
2351     int dstenc = $dst$$reg;
2352     if (dstenc >= 8) {
2353       emit_opcode(cbuf, Assembler::REX_B);
2354       dstenc -= 8;
2355     }
2356     $$$emit8$primary;
2357     emit_rm(cbuf, 0x3, $secondary, dstenc);
2358     $$$emit8$shift$$constant;
2359   %}
2360 
2361   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2362   %{
2363     // SAL, SAR, SHR
2364     int dstenc = $dst$$reg;
2365     if (dstenc < 8) {
2366       emit_opcode(cbuf, Assembler::REX_W);
2367     } else {
2368       emit_opcode(cbuf, Assembler::REX_WB);
2369       dstenc -= 8;
2370     }
2371     $$$emit8$primary;
2372     emit_rm(cbuf, 0x3, $secondary, dstenc);
2373     $$$emit8$shift$$constant;
2374   %}
2375 
2376   enc_class load_immI(rRegI dst, immI src)
2377   %{
2378     int dstenc = $dst$$reg;
2379     if (dstenc >= 8) {
2380       emit_opcode(cbuf, Assembler::REX_B);
2381       dstenc -= 8;
2382     }
2383     emit_opcode(cbuf, 0xB8 | dstenc);
2384     $$$emit32$src$$constant;
2385   %}
2386 
2387   enc_class load_immL(rRegL dst, immL src)
2388   %{
2389     int dstenc = $dst$$reg;
2390     if (dstenc < 8) {
2391       emit_opcode(cbuf, Assembler::REX_W);
2392     } else {
2393       emit_opcode(cbuf, Assembler::REX_WB);
2394       dstenc -= 8;
2395     }
2396     emit_opcode(cbuf, 0xB8 | dstenc);
2397     emit_d64(cbuf, $src$$constant);
2398   %}
2399 
2400   enc_class load_immUL32(rRegL dst, immUL32 src)
2401   %{
2402     // same as load_immI, but this time we care about zeroes in the high word
2403     int dstenc = $dst$$reg;
2404     if (dstenc >= 8) {
2405       emit_opcode(cbuf, Assembler::REX_B);
2406       dstenc -= 8;
2407     }
2408     emit_opcode(cbuf, 0xB8 | dstenc);
2409     $$$emit32$src$$constant;
2410   %}
2411 
2412   enc_class load_immL32(rRegL dst, immL32 src)
2413   %{
2414     int dstenc = $dst$$reg;
2415     if (dstenc < 8) {
2416       emit_opcode(cbuf, Assembler::REX_W);
2417     } else {
2418       emit_opcode(cbuf, Assembler::REX_WB);
2419       dstenc -= 8;
2420     }
2421     emit_opcode(cbuf, 0xC7);
2422     emit_rm(cbuf, 0x03, 0x00, dstenc);
2423     $$$emit32$src$$constant;
2424   %}
2425 
2426   enc_class load_immP31(rRegP dst, immP32 src)
2427   %{
2428     // same as load_immI, but this time we care about zeroes in the high word
2429     int dstenc = $dst$$reg;
2430     if (dstenc >= 8) {
2431       emit_opcode(cbuf, Assembler::REX_B);
2432       dstenc -= 8;
2433     }
2434     emit_opcode(cbuf, 0xB8 | dstenc);
2435     $$$emit32$src$$constant;
2436   %}
2437 
2438   enc_class load_immP(rRegP dst, immP src)
2439   %{
2440     int dstenc = $dst$$reg;
2441     if (dstenc < 8) {
2442       emit_opcode(cbuf, Assembler::REX_W);
2443     } else {
2444       emit_opcode(cbuf, Assembler::REX_WB);
2445       dstenc -= 8;
2446     }
2447     emit_opcode(cbuf, 0xB8 | dstenc);
2448     // This next line should be generated from ADLC
2449     if ($src->constant_is_oop()) {
2450       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2451     } else {
2452       emit_d64(cbuf, $src$$constant);
2453     }
2454   %}
2455 
2456   enc_class Con32(immI src)
2457   %{
2458     // Output immediate
2459     $$$emit32$src$$constant;
2460   %}
2461 
2462   enc_class Con64(immL src)
2463   %{
2464     // Output immediate
2465     emit_d64($src$$constant);
2466   %}
2467 
2468   enc_class Con32F_as_bits(immF src)
2469   %{
2470     // Output Float immediate bits
2471     jfloat jf = $src$$constant;
2472     jint jf_as_bits = jint_cast(jf);
2473     emit_d32(cbuf, jf_as_bits);
2474   %}
2475 
2476   enc_class Con16(immI src)
2477   %{
2478     // Output immediate
2479     $$$emit16$src$$constant;
2480   %}
2481 
2482   // How is this different from Con32??? XXX
2483   enc_class Con_d32(immI src)
2484   %{
2485     emit_d32(cbuf,$src$$constant);
2486   %}
2487 
2488   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2489     // Output immediate memory reference
2490     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2491     emit_d32(cbuf, 0x00);
2492   %}
2493 
2494   enc_class lock_prefix()
2495   %{
2496     if (os::is_MP()) {
2497       emit_opcode(cbuf, 0xF0); // lock
2498     }
2499   %}
2500 
2501   enc_class REX_mem(memory mem)
2502   %{
2503     if ($mem$$base >= 8) {
2504       if ($mem$$index < 8) {
2505         emit_opcode(cbuf, Assembler::REX_B);
2506       } else {
2507         emit_opcode(cbuf, Assembler::REX_XB);
2508       }
2509     } else {
2510       if ($mem$$index >= 8) {
2511         emit_opcode(cbuf, Assembler::REX_X);
2512       }
2513     }
2514   %}
2515 
2516   enc_class REX_mem_wide(memory mem)
2517   %{
2518     if ($mem$$base >= 8) {
2519       if ($mem$$index < 8) {
2520         emit_opcode(cbuf, Assembler::REX_WB);
2521       } else {
2522         emit_opcode(cbuf, Assembler::REX_WXB);
2523       }
2524     } else {
2525       if ($mem$$index < 8) {
2526         emit_opcode(cbuf, Assembler::REX_W);
2527       } else {
2528         emit_opcode(cbuf, Assembler::REX_WX);
2529       }
2530     }
2531   %}
2532 
2533   // for byte regs
2534   enc_class REX_breg(rRegI reg)
2535   %{
2536     if ($reg$$reg >= 4) {
2537       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2538     }
2539   %}
2540 
2541   // for byte regs
2542   enc_class REX_reg_breg(rRegI dst, rRegI src)
2543   %{
2544     if ($dst$$reg < 8) {
2545       if ($src$$reg >= 4) {
2546         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2547       }
2548     } else {
2549       if ($src$$reg < 8) {
2550         emit_opcode(cbuf, Assembler::REX_R);
2551       } else {
2552         emit_opcode(cbuf, Assembler::REX_RB);
2553       }
2554     }
2555   %}
2556 
2557   // for byte regs
2558   enc_class REX_breg_mem(rRegI reg, memory mem)
2559   %{
2560     if ($reg$$reg < 8) {
2561       if ($mem$$base < 8) {
2562         if ($mem$$index >= 8) {
2563           emit_opcode(cbuf, Assembler::REX_X);
2564         } else if ($reg$$reg >= 4) {
2565           emit_opcode(cbuf, Assembler::REX);
2566         }
2567       } else {
2568         if ($mem$$index < 8) {
2569           emit_opcode(cbuf, Assembler::REX_B);
2570         } else {
2571           emit_opcode(cbuf, Assembler::REX_XB);
2572         }
2573       }
2574     } else {
2575       if ($mem$$base < 8) {
2576         if ($mem$$index < 8) {
2577           emit_opcode(cbuf, Assembler::REX_R);
2578         } else {
2579           emit_opcode(cbuf, Assembler::REX_RX);
2580         }
2581       } else {
2582         if ($mem$$index < 8) {
2583           emit_opcode(cbuf, Assembler::REX_RB);
2584         } else {
2585           emit_opcode(cbuf, Assembler::REX_RXB);
2586         }
2587       }
2588     }
2589   %}
2590 
2591   enc_class REX_reg(rRegI reg)
2592   %{
2593     if ($reg$$reg >= 8) {
2594       emit_opcode(cbuf, Assembler::REX_B);
2595     }
2596   %}
2597 
2598   enc_class REX_reg_wide(rRegI reg)
2599   %{
2600     if ($reg$$reg < 8) {
2601       emit_opcode(cbuf, Assembler::REX_W);
2602     } else {
2603       emit_opcode(cbuf, Assembler::REX_WB);
2604     }
2605   %}
2606 
2607   enc_class REX_reg_reg(rRegI dst, rRegI src)
2608   %{
2609     if ($dst$$reg < 8) {
2610       if ($src$$reg >= 8) {
2611         emit_opcode(cbuf, Assembler::REX_B);
2612       }
2613     } else {
2614       if ($src$$reg < 8) {
2615         emit_opcode(cbuf, Assembler::REX_R);
2616       } else {
2617         emit_opcode(cbuf, Assembler::REX_RB);
2618       }
2619     }
2620   %}
2621 
2622   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2623   %{
2624     if ($dst$$reg < 8) {
2625       if ($src$$reg < 8) {
2626         emit_opcode(cbuf, Assembler::REX_W);
2627       } else {
2628         emit_opcode(cbuf, Assembler::REX_WB);
2629       }
2630     } else {
2631       if ($src$$reg < 8) {
2632         emit_opcode(cbuf, Assembler::REX_WR);
2633       } else {
2634         emit_opcode(cbuf, Assembler::REX_WRB);
2635       }
2636     }
2637   %}
2638 
2639   enc_class REX_reg_mem(rRegI reg, memory mem)
2640   %{
2641     if ($reg$$reg < 8) {
2642       if ($mem$$base < 8) {
2643         if ($mem$$index >= 8) {
2644           emit_opcode(cbuf, Assembler::REX_X);
2645         }
2646       } else {
2647         if ($mem$$index < 8) {
2648           emit_opcode(cbuf, Assembler::REX_B);
2649         } else {
2650           emit_opcode(cbuf, Assembler::REX_XB);
2651         }
2652       }
2653     } else {
2654       if ($mem$$base < 8) {
2655         if ($mem$$index < 8) {
2656           emit_opcode(cbuf, Assembler::REX_R);
2657         } else {
2658           emit_opcode(cbuf, Assembler::REX_RX);
2659         }
2660       } else {
2661         if ($mem$$index < 8) {
2662           emit_opcode(cbuf, Assembler::REX_RB);
2663         } else {
2664           emit_opcode(cbuf, Assembler::REX_RXB);
2665         }
2666       }
2667     }
2668   %}
2669 
2670   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2671   %{
2672     if ($reg$$reg < 8) {
2673       if ($mem$$base < 8) {
2674         if ($mem$$index < 8) {
2675           emit_opcode(cbuf, Assembler::REX_W);
2676         } else {
2677           emit_opcode(cbuf, Assembler::REX_WX);
2678         }
2679       } else {
2680         if ($mem$$index < 8) {
2681           emit_opcode(cbuf, Assembler::REX_WB);
2682         } else {
2683           emit_opcode(cbuf, Assembler::REX_WXB);
2684         }
2685       }
2686     } else {
2687       if ($mem$$base < 8) {
2688         if ($mem$$index < 8) {
2689           emit_opcode(cbuf, Assembler::REX_WR);
2690         } else {
2691           emit_opcode(cbuf, Assembler::REX_WRX);
2692         }
2693       } else {
2694         if ($mem$$index < 8) {
2695           emit_opcode(cbuf, Assembler::REX_WRB);
2696         } else {
2697           emit_opcode(cbuf, Assembler::REX_WRXB);
2698         }
2699       }
2700     }
2701   %}
2702 
2703   enc_class reg_mem(rRegI ereg, memory mem)
2704   %{
2705     // High registers handle in encode_RegMem
2706     int reg = $ereg$$reg;
2707     int base = $mem$$base;
2708     int index = $mem$$index;
2709     int scale = $mem$$scale;
2710     int disp = $mem$$disp;
2711     bool disp_is_oop = $mem->disp_is_oop();
2712 
2713     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
2714   %}
2715 
2716   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2717   %{
2718     int rm_byte_opcode = $rm_opcode$$constant;
2719 
2720     // High registers handle in encode_RegMem
2721     int base = $mem$$base;
2722     int index = $mem$$index;
2723     int scale = $mem$$scale;
2724     int displace = $mem$$disp;
2725 
2726     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
2727                                             // working with static
2728                                             // globals
2729     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2730                   disp_is_oop);
2731   %}
2732 
2733   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2734   %{
2735     int reg_encoding = $dst$$reg;
2736     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2737     int index        = 0x04;            // 0x04 indicates no index
2738     int scale        = 0x00;            // 0x00 indicates no scale
2739     int displace     = $src1$$constant; // 0x00 indicates no displacement
2740     bool disp_is_oop = false;
2741     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2742                   disp_is_oop);
2743   %}
2744 
2745   enc_class neg_reg(rRegI dst)
2746   %{
2747     int dstenc = $dst$$reg;
2748     if (dstenc >= 8) {
2749       emit_opcode(cbuf, Assembler::REX_B);
2750       dstenc -= 8;
2751     }
2752     // NEG $dst
2753     emit_opcode(cbuf, 0xF7);
2754     emit_rm(cbuf, 0x3, 0x03, dstenc);
2755   %}
2756 
2757   enc_class neg_reg_wide(rRegI dst)
2758   %{
2759     int dstenc = $dst$$reg;
2760     if (dstenc < 8) {
2761       emit_opcode(cbuf, Assembler::REX_W);
2762     } else {
2763       emit_opcode(cbuf, Assembler::REX_WB);
2764       dstenc -= 8;
2765     }
2766     // NEG $dst
2767     emit_opcode(cbuf, 0xF7);
2768     emit_rm(cbuf, 0x3, 0x03, dstenc);
2769   %}
2770 
2771   enc_class setLT_reg(rRegI dst)
2772   %{
2773     int dstenc = $dst$$reg;
2774     if (dstenc >= 8) {
2775       emit_opcode(cbuf, Assembler::REX_B);
2776       dstenc -= 8;
2777     } else if (dstenc >= 4) {
2778       emit_opcode(cbuf, Assembler::REX);
2779     }
2780     // SETLT $dst
2781     emit_opcode(cbuf, 0x0F);
2782     emit_opcode(cbuf, 0x9C);
2783     emit_rm(cbuf, 0x3, 0x0, dstenc);
2784   %}
2785 
2786   enc_class setNZ_reg(rRegI dst)
2787   %{
2788     int dstenc = $dst$$reg;
2789     if (dstenc >= 8) {
2790       emit_opcode(cbuf, Assembler::REX_B);
2791       dstenc -= 8;
2792     } else if (dstenc >= 4) {
2793       emit_opcode(cbuf, Assembler::REX);
2794     }
2795     // SETNZ $dst
2796     emit_opcode(cbuf, 0x0F);
2797     emit_opcode(cbuf, 0x95);
2798     emit_rm(cbuf, 0x3, 0x0, dstenc);
2799   %}
2800 
2801 
2802   // Compare the lonogs and set -1, 0, or 1 into dst
2803   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2804   %{
2805     int src1enc = $src1$$reg;
2806     int src2enc = $src2$$reg;
2807     int dstenc = $dst$$reg;
2808 
2809     // cmpq $src1, $src2
2810     if (src1enc < 8) {
2811       if (src2enc < 8) {
2812         emit_opcode(cbuf, Assembler::REX_W);
2813       } else {
2814         emit_opcode(cbuf, Assembler::REX_WB);
2815       }
2816     } else {
2817       if (src2enc < 8) {
2818         emit_opcode(cbuf, Assembler::REX_WR);
2819       } else {
2820         emit_opcode(cbuf, Assembler::REX_WRB);
2821       }
2822     }
2823     emit_opcode(cbuf, 0x3B);
2824     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2825 
2826     // movl $dst, -1
2827     if (dstenc >= 8) {
2828       emit_opcode(cbuf, Assembler::REX_B);
2829     }
2830     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2831     emit_d32(cbuf, -1);
2832 
2833     // jl,s done
2834     emit_opcode(cbuf, 0x7C);
2835     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2836 
2837     // setne $dst
2838     if (dstenc >= 4) {
2839       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2840     }
2841     emit_opcode(cbuf, 0x0F);
2842     emit_opcode(cbuf, 0x95);
2843     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2844 
2845     // movzbl $dst, $dst
2846     if (dstenc >= 4) {
2847       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2848     }
2849     emit_opcode(cbuf, 0x0F);
2850     emit_opcode(cbuf, 0xB6);
2851     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2852   %}
2853 
2854   enc_class Push_ResultXD(regD dst) %{
2855     MacroAssembler _masm(&cbuf);
2856     __ fstp_d(Address(rsp, 0));
2857     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2858     __ addptr(rsp, 8);
2859   %}
2860 
2861   enc_class Push_SrcXD(regD src) %{
2862     MacroAssembler _masm(&cbuf);
2863     __ subptr(rsp, 8);
2864     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2865     __ fld_d(Address(rsp, 0));
2866   %}
2867 
2868 
2869   // obj: object to lock
2870   // box: box address (header location) -- killed
2871   // tmp: rax -- killed
2872   // scr: rbx -- killed
2873   //
2874   // What follows is a direct transliteration of fast_lock() and fast_unlock()
2875   // from i486.ad.  See that file for comments.
2876   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
2877   // use the shorter encoding.  (Movl clears the high-order 32-bits).
2878 
2879 
2880   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
2881   %{
2882     Register objReg = as_Register((int)$obj$$reg);
2883     Register boxReg = as_Register((int)$box$$reg);
2884     Register tmpReg = as_Register($tmp$$reg);
2885     Register scrReg = as_Register($scr$$reg);
2886     MacroAssembler masm(&cbuf);
2887 
2888     // Verify uniqueness of register assignments -- necessary but not sufficient
2889     assert (objReg != boxReg && objReg != tmpReg &&
2890             objReg != scrReg && tmpReg != scrReg, "invariant") ;
2891 
2892     if (_counters != NULL) {
2893       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
2894     }
2895     if (EmitSync & 1) {
2896         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2897         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2898         masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
2899     } else
2900     if (EmitSync & 2) {
2901         Label DONE_LABEL;
2902         if (UseBiasedLocking) {
2903            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
2904           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
2905         }
2906         // QQQ was movl...
2907         masm.movptr(tmpReg, 0x1);
2908         masm.orptr(tmpReg, Address(objReg, 0));
2909         masm.movptr(Address(boxReg, 0), tmpReg);
2910         if (os::is_MP()) {
2911           masm.lock();
2912         }
2913         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2914         masm.jcc(Assembler::equal, DONE_LABEL);
2915 
2916         // Recursive locking
2917         masm.subptr(tmpReg, rsp);
2918         masm.andptr(tmpReg, 7 - os::vm_page_size());
2919         masm.movptr(Address(boxReg, 0), tmpReg);
2920 
2921         masm.bind(DONE_LABEL);
2922         masm.nop(); // avoid branch to branch
2923     } else {
2924         Label DONE_LABEL, IsInflated, Egress;
2925 
2926         masm.movptr(tmpReg, Address(objReg, 0)) ;
2927         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
2928         masm.jcc   (Assembler::notZero, IsInflated) ;
2929 
2930         // it's stack-locked, biased or neutral
2931         // TODO: optimize markword triage order to reduce the number of
2932         // conditional branches in the most common cases.
2933         // Beware -- there's a subtle invariant that fetch of the markword
2934         // at [FETCH], below, will never observe a biased encoding (*101b).
2935         // If this invariant is not held we'll suffer exclusion (safety) failure.
2936 
2937         if (UseBiasedLocking && !UseOptoBiasInlining) {
2938           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
2939           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
2940         }
2941 
2942         // was q will it destroy high?
2943         masm.orl   (tmpReg, 1) ;
2944         masm.movptr(Address(boxReg, 0), tmpReg) ;
2945         if (os::is_MP()) { masm.lock(); }
2946         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
2947         if (_counters != NULL) {
2948            masm.cond_inc32(Assembler::equal,
2949                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2950         }
2951         masm.jcc   (Assembler::equal, DONE_LABEL);
2952 
2953         // Recursive locking
2954         masm.subptr(tmpReg, rsp);
2955         masm.andptr(tmpReg, 7 - os::vm_page_size());
2956         masm.movptr(Address(boxReg, 0), tmpReg);
2957         if (_counters != NULL) {
2958            masm.cond_inc32(Assembler::equal,
2959                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
2960         }
2961         masm.jmp   (DONE_LABEL) ;
2962 
2963         masm.bind  (IsInflated) ;
2964         // It's inflated
2965 
2966         // TODO: someday avoid the ST-before-CAS penalty by
2967         // relocating (deferring) the following ST.
2968         // We should also think about trying a CAS without having
2969         // fetched _owner.  If the CAS is successful we may
2970         // avoid an RTO->RTS upgrade on the $line.
2971         // Without cast to int32_t a movptr will destroy r10 which is typically obj
2972         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
2973 
2974         masm.mov    (boxReg, tmpReg) ;
2975         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2976         masm.testptr(tmpReg, tmpReg) ;
2977         masm.jcc    (Assembler::notZero, DONE_LABEL) ;
2978 
2979         // It's inflated and appears unlocked
2980         if (os::is_MP()) { masm.lock(); }
2981         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
2982         // Intentional fall-through into DONE_LABEL ...
2983 
2984         masm.bind  (DONE_LABEL) ;
2985         masm.nop   () ;                 // avoid jmp to jmp
2986     }
2987   %}
2988 
2989   // obj: object to unlock
2990   // box: box address (displaced header location), killed
2991   // RBX: killed tmp; cannot be obj nor box
2992   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
2993   %{
2994 
2995     Register objReg = as_Register($obj$$reg);
2996     Register boxReg = as_Register($box$$reg);
2997     Register tmpReg = as_Register($tmp$$reg);
2998     MacroAssembler masm(&cbuf);
2999 
3000     if (EmitSync & 4) {
3001        masm.cmpptr(rsp, 0) ;
3002     } else
3003     if (EmitSync & 8) {
3004        Label DONE_LABEL;
3005        if (UseBiasedLocking) {
3006          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3007        }
3008 
3009        // Check whether the displaced header is 0
3010        //(=> recursive unlock)
3011        masm.movptr(tmpReg, Address(boxReg, 0));
3012        masm.testptr(tmpReg, tmpReg);
3013        masm.jcc(Assembler::zero, DONE_LABEL);
3014 
3015        // If not recursive lock, reset the header to displaced header
3016        if (os::is_MP()) {
3017          masm.lock();
3018        }
3019        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3020        masm.bind(DONE_LABEL);
3021        masm.nop(); // avoid branch to branch
3022     } else {
3023        Label DONE_LABEL, Stacked, CheckSucc ;
3024 
3025        if (UseBiasedLocking && !UseOptoBiasInlining) {
3026          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3027        }
3028 
3029        masm.movptr(tmpReg, Address(objReg, 0)) ;
3030        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
3031        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3032        masm.testl (tmpReg, 0x02) ;
3033        masm.jcc   (Assembler::zero, Stacked) ;
3034 
3035        // It's inflated
3036        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3037        masm.xorptr(boxReg, r15_thread) ;
3038        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3039        masm.jcc   (Assembler::notZero, DONE_LABEL) ;
3040        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
3041        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
3042        masm.jcc   (Assembler::notZero, CheckSucc) ;
3043        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3044        masm.jmp   (DONE_LABEL) ;
3045 
3046        if ((EmitSync & 65536) == 0) {
3047          Label LSuccess, LGoSlowPath ;
3048          masm.bind  (CheckSucc) ;
3049          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3050          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3051 
3052          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3053          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3054          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3055          // are all faster when the write buffer is populated.
3056          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3057          if (os::is_MP()) {
3058             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3059          }
3060          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3061          masm.jcc   (Assembler::notZero, LSuccess) ;
3062 
3063          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3064          if (os::is_MP()) { masm.lock(); }
3065          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3066          masm.jcc   (Assembler::notEqual, LSuccess) ;
3067          // Intentional fall-through into slow-path
3068 
3069          masm.bind  (LGoSlowPath) ;
3070          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3071          masm.jmp   (DONE_LABEL) ;
3072 
3073          masm.bind  (LSuccess) ;
3074          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3075          masm.jmp   (DONE_LABEL) ;
3076        }
3077 
3078        masm.bind  (Stacked) ;
3079        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3080        if (os::is_MP()) { masm.lock(); }
3081        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3082 
3083        if (EmitSync & 65536) {
3084           masm.bind (CheckSucc) ;
3085        }
3086        masm.bind(DONE_LABEL);
3087        if (EmitSync & 32768) {
3088           masm.nop();                      // avoid branch to branch
3089        }
3090     }
3091   %}
3092 
3093 
3094   enc_class enc_rethrow()
3095   %{
3096     cbuf.set_insts_mark();
3097     emit_opcode(cbuf, 0xE9); // jmp entry
3098     emit_d32_reloc(cbuf,
3099                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3100                    runtime_call_Relocation::spec(),
3101                    RELOC_DISP32);
3102   %}
3103 
3104 %}
3105 
3106 
3107 
3108 //----------FRAME--------------------------------------------------------------
3109 // Definition of frame structure and management information.
3110 //
3111 //  S T A C K   L A Y O U T    Allocators stack-slot number
3112 //                             |   (to get allocators register number
3113 //  G  Owned by    |        |  v    add OptoReg::stack0())
3114 //  r   CALLER     |        |
3115 //  o     |        +--------+      pad to even-align allocators stack-slot
3116 //  w     V        |  pad0  |        numbers; owned by CALLER
3117 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3118 //  h     ^        |   in   |  5
3119 //        |        |  args  |  4   Holes in incoming args owned by SELF
3120 //  |     |        |        |  3
3121 //  |     |        +--------+
3122 //  V     |        | old out|      Empty on Intel, window on Sparc
3123 //        |    old |preserve|      Must be even aligned.
3124 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3125 //        |        |   in   |  3   area for Intel ret address
3126 //     Owned by    |preserve|      Empty on Sparc.
3127 //       SELF      +--------+
3128 //        |        |  pad2  |  2   pad to align old SP
3129 //        |        +--------+  1
3130 //        |        | locks  |  0
3131 //        |        +--------+----> OptoReg::stack0(), even aligned
3132 //        |        |  pad1  | 11   pad to align new SP
3133 //        |        +--------+
3134 //        |        |        | 10
3135 //        |        | spills |  9   spills
3136 //        V        |        |  8   (pad0 slot for callee)
3137 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3138 //        ^        |  out   |  7
3139 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3140 //     Owned by    +--------+
3141 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3142 //        |    new |preserve|      Must be even-aligned.
3143 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3144 //        |        |        |
3145 //
3146 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3147 //         known from SELF's arguments and the Java calling convention.
3148 //         Region 6-7 is determined per call site.
3149 // Note 2: If the calling convention leaves holes in the incoming argument
3150 //         area, those holes are owned by SELF.  Holes in the outgoing area
3151 //         are owned by the CALLEE.  Holes should not be nessecary in the
3152 //         incoming area, as the Java calling convention is completely under
3153 //         the control of the AD file.  Doubles can be sorted and packed to
3154 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3155 //         varargs C calling conventions.
3156 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3157 //         even aligned with pad0 as needed.
3158 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3159 //         region 6-11 is even aligned; it may be padded out more so that
3160 //         the region from SP to FP meets the minimum stack alignment.
3161 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3162 //         alignment.  Region 11, pad1, may be dynamically extended so that
3163 //         SP meets the minimum alignment.
3164 
3165 frame
3166 %{
3167   // What direction does stack grow in (assumed to be same for C & Java)
3168   stack_direction(TOWARDS_LOW);
3169 
3170   // These three registers define part of the calling convention
3171   // between compiled code and the interpreter.
3172   inline_cache_reg(RAX);                // Inline Cache Register
3173   interpreter_method_oop_reg(RBX);      // Method Oop Register when
3174                                         // calling interpreter
3175 
3176   // Optional: name the operand used by cisc-spilling to access
3177   // [stack_pointer + offset]
3178   cisc_spilling_operand_name(indOffset32);
3179 
3180   // Number of stack slots consumed by locking an object
3181   sync_stack_slots(2);
3182 
3183   // Compiled code's Frame Pointer
3184   frame_pointer(RSP);
3185 
3186   // Interpreter stores its frame pointer in a register which is
3187   // stored to the stack by I2CAdaptors.
3188   // I2CAdaptors convert from interpreted java to compiled java.
3189   interpreter_frame_pointer(RBP);
3190 
3191   // Stack alignment requirement
3192   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3193 
3194   // Number of stack slots between incoming argument block and the start of
3195   // a new frame.  The PROLOG must add this many slots to the stack.  The
3196   // EPILOG must remove this many slots.  amd64 needs two slots for
3197   // return address.
3198   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
3199 
3200   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3201   // for calls to C.  Supports the var-args backing area for register parms.
3202   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3203 
3204   // The after-PROLOG location of the return address.  Location of
3205   // return address specifies a type (REG or STACK) and a number
3206   // representing the register number (i.e. - use a register name) or
3207   // stack slot.
3208   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3209   // Otherwise, it is above the locks and verification slot and alignment word
3210   return_addr(STACK - 2 +
3211               round_to(2 + 2 * VerifyStackAtCalls +
3212                        Compile::current()->fixed_slots(),
3213                        WordsPerLong * 2));
3214 
3215   // Body of function which returns an integer array locating
3216   // arguments either in registers or in stack slots.  Passed an array
3217   // of ideal registers called "sig" and a "length" count.  Stack-slot
3218   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3219   // arguments for a CALLEE.  Incoming stack arguments are
3220   // automatically biased by the preserve_stack_slots field above.
3221 
3222   calling_convention
3223   %{
3224     // No difference between ingoing/outgoing just pass false
3225     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3226   %}
3227 
3228   c_calling_convention
3229   %{
3230     // This is obviously always outgoing
3231     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
3232   %}
3233 
3234   // Location of compiled Java return values.  Same as C for now.
3235   return_value
3236   %{
3237     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3238            "only return normal values");
3239 
3240     static const int lo[Op_RegL + 1] = {
3241       0,
3242       0,
3243       RAX_num,  // Op_RegN
3244       RAX_num,  // Op_RegI
3245       RAX_num,  // Op_RegP
3246       XMM0_num, // Op_RegF
3247       XMM0_num, // Op_RegD
3248       RAX_num   // Op_RegL
3249     };
3250     static const int hi[Op_RegL + 1] = {
3251       0,
3252       0,
3253       OptoReg::Bad, // Op_RegN
3254       OptoReg::Bad, // Op_RegI
3255       RAX_H_num,    // Op_RegP
3256       OptoReg::Bad, // Op_RegF
3257       XMM0_H_num,   // Op_RegD
3258       RAX_H_num     // Op_RegL
3259     };
3260     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
3261     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3262   %}
3263 %}
3264 
3265 //----------ATTRIBUTES---------------------------------------------------------
3266 //----------Operand Attributes-------------------------------------------------
3267 op_attrib op_cost(0);        // Required cost attribute
3268 
3269 //----------Instruction Attributes---------------------------------------------
3270 ins_attrib ins_cost(100);       // Required cost attribute
3271 ins_attrib ins_size(8);         // Required size attribute (in bits)
3272 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3273                                 // a non-matching short branch variant
3274                                 // of some long branch?
3275 ins_attrib ins_alignment(1);    // Required alignment attribute (must
3276                                 // be a power of 2) specifies the
3277                                 // alignment that some part of the
3278                                 // instruction (not necessarily the
3279                                 // start) requires.  If > 1, a
3280                                 // compute_padding() function must be
3281                                 // provided for the instruction
3282 
3283 //----------OPERANDS-----------------------------------------------------------
3284 // Operand definitions must precede instruction definitions for correct parsing
3285 // in the ADLC because operands constitute user defined types which are used in
3286 // instruction definitions.
3287 
3288 //----------Simple Operands----------------------------------------------------
3289 // Immediate Operands
3290 // Integer Immediate
3291 operand immI()
3292 %{
3293   match(ConI);
3294 
3295   op_cost(10);
3296   format %{ %}
3297   interface(CONST_INTER);
3298 %}
3299 
3300 // Constant for test vs zero
3301 operand immI0()
3302 %{
3303   predicate(n->get_int() == 0);
3304   match(ConI);
3305 
3306   op_cost(0);
3307   format %{ %}
3308   interface(CONST_INTER);
3309 %}
3310 
3311 // Constant for increment
3312 operand immI1()
3313 %{
3314   predicate(n->get_int() == 1);
3315   match(ConI);
3316 
3317   op_cost(0);
3318   format %{ %}
3319   interface(CONST_INTER);
3320 %}
3321 
3322 // Constant for decrement
3323 operand immI_M1()
3324 %{
3325   predicate(n->get_int() == -1);
3326   match(ConI);
3327 
3328   op_cost(0);
3329   format %{ %}
3330   interface(CONST_INTER);
3331 %}
3332 
3333 // Valid scale values for addressing modes
3334 operand immI2()
3335 %{
3336   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3337   match(ConI);
3338 
3339   format %{ %}
3340   interface(CONST_INTER);
3341 %}
3342 
3343 operand immI8()
3344 %{
3345   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
3346   match(ConI);
3347 
3348   op_cost(5);
3349   format %{ %}
3350   interface(CONST_INTER);
3351 %}
3352 
3353 operand immI16()
3354 %{
3355   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3356   match(ConI);
3357 
3358   op_cost(10);
3359   format %{ %}
3360   interface(CONST_INTER);
3361 %}
3362 
3363 // Constant for long shifts
3364 operand immI_32()
3365 %{
3366   predicate( n->get_int() == 32 );
3367   match(ConI);
3368 
3369   op_cost(0);
3370   format %{ %}
3371   interface(CONST_INTER);
3372 %}
3373 
3374 // Constant for long shifts
3375 operand immI_64()
3376 %{
3377   predicate( n->get_int() == 64 );
3378   match(ConI);
3379 
3380   op_cost(0);
3381   format %{ %}
3382   interface(CONST_INTER);
3383 %}
3384 
3385 // Pointer Immediate
3386 operand immP()
3387 %{
3388   match(ConP);
3389 
3390   op_cost(10);
3391   format %{ %}
3392   interface(CONST_INTER);
3393 %}
3394 
3395 // NULL Pointer Immediate
3396 operand immP0()
3397 %{
3398   predicate(n->get_ptr() == 0);
3399   match(ConP);
3400 
3401   op_cost(5);
3402   format %{ %}
3403   interface(CONST_INTER);
3404 %}
3405 
3406 operand immP_poll() %{
3407   predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
3408   match(ConP);
3409 
3410   // formats are generated automatically for constants and base registers
3411   format %{ %}
3412   interface(CONST_INTER);
3413 %}
3414 
3415 // Pointer Immediate
3416 operand immN() %{
3417   match(ConN);
3418 
3419   op_cost(10);
3420   format %{ %}
3421   interface(CONST_INTER);
3422 %}
3423 
3424 // NULL Pointer Immediate
3425 operand immN0() %{
3426   predicate(n->get_narrowcon() == 0);
3427   match(ConN);
3428 
3429   op_cost(5);
3430   format %{ %}
3431   interface(CONST_INTER);
3432 %}
3433 
3434 operand immP31()
3435 %{
3436   predicate(!n->as_Type()->type()->isa_oopptr()
3437             && (n->get_ptr() >> 31) == 0);
3438   match(ConP);
3439 
3440   op_cost(5);
3441   format %{ %}
3442   interface(CONST_INTER);
3443 %}
3444 
3445 
3446 // Long Immediate
3447 operand immL()
3448 %{
3449   match(ConL);
3450 
3451   op_cost(20);
3452   format %{ %}
3453   interface(CONST_INTER);
3454 %}
3455 
3456 // Long Immediate 8-bit
3457 operand immL8()
3458 %{
3459   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3460   match(ConL);
3461 
3462   op_cost(5);
3463   format %{ %}
3464   interface(CONST_INTER);
3465 %}
3466 
3467 // Long Immediate 32-bit unsigned
3468 operand immUL32()
3469 %{
3470   predicate(n->get_long() == (unsigned int) (n->get_long()));
3471   match(ConL);
3472 
3473   op_cost(10);
3474   format %{ %}
3475   interface(CONST_INTER);
3476 %}
3477 
3478 // Long Immediate 32-bit signed
3479 operand immL32()
3480 %{
3481   predicate(n->get_long() == (int) (n->get_long()));
3482   match(ConL);
3483 
3484   op_cost(15);
3485   format %{ %}
3486   interface(CONST_INTER);
3487 %}
3488 
3489 // Long Immediate zero
3490 operand immL0()
3491 %{
3492   predicate(n->get_long() == 0L);
3493   match(ConL);
3494 
3495   op_cost(10);
3496   format %{ %}
3497   interface(CONST_INTER);
3498 %}
3499 
3500 // Constant for increment
3501 operand immL1()
3502 %{
3503   predicate(n->get_long() == 1);
3504   match(ConL);
3505 
3506   format %{ %}
3507   interface(CONST_INTER);
3508 %}
3509 
3510 // Constant for decrement
3511 operand immL_M1()
3512 %{
3513   predicate(n->get_long() == -1);
3514   match(ConL);
3515 
3516   format %{ %}
3517   interface(CONST_INTER);
3518 %}
3519 
3520 // Long Immediate: the value 10
3521 operand immL10()
3522 %{
3523   predicate(n->get_long() == 10);
3524   match(ConL);
3525 
3526   format %{ %}
3527   interface(CONST_INTER);
3528 %}
3529 
3530 // Long immediate from 0 to 127.
3531 // Used for a shorter form of long mul by 10.
3532 operand immL_127()
3533 %{
3534   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3535   match(ConL);
3536 
3537   op_cost(10);
3538   format %{ %}
3539   interface(CONST_INTER);
3540 %}
3541 
3542 // Long Immediate: low 32-bit mask
3543 operand immL_32bits()
3544 %{
3545   predicate(n->get_long() == 0xFFFFFFFFL);
3546   match(ConL);
3547   op_cost(20);
3548 
3549   format %{ %}
3550   interface(CONST_INTER);
3551 %}
3552 
3553 // Float Immediate zero
3554 operand immF0()
3555 %{
3556   predicate(jint_cast(n->getf()) == 0);
3557   match(ConF);
3558 
3559   op_cost(5);
3560   format %{ %}
3561   interface(CONST_INTER);
3562 %}
3563 
3564 // Float Immediate
3565 operand immF()
3566 %{
3567   match(ConF);
3568 
3569   op_cost(15);
3570   format %{ %}
3571   interface(CONST_INTER);
3572 %}
3573 
3574 // Double Immediate zero
3575 operand immD0()
3576 %{
3577   predicate(jlong_cast(n->getd()) == 0);
3578   match(ConD);
3579 
3580   op_cost(5);
3581   format %{ %}
3582   interface(CONST_INTER);
3583 %}
3584 
3585 // Double Immediate
3586 operand immD()
3587 %{
3588   match(ConD);
3589 
3590   op_cost(15);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Immediates for special shifts (sign extend)
3596 
3597 // Constants for increment
3598 operand immI_16()
3599 %{
3600   predicate(n->get_int() == 16);
3601   match(ConI);
3602 
3603   format %{ %}
3604   interface(CONST_INTER);
3605 %}
3606 
3607 operand immI_24()
3608 %{
3609   predicate(n->get_int() == 24);
3610   match(ConI);
3611 
3612   format %{ %}
3613   interface(CONST_INTER);
3614 %}
3615 
3616 // Constant for byte-wide masking
3617 operand immI_255()
3618 %{
3619   predicate(n->get_int() == 255);
3620   match(ConI);
3621 
3622   format %{ %}
3623   interface(CONST_INTER);
3624 %}
3625 
3626 // Constant for short-wide masking
3627 operand immI_65535()
3628 %{
3629   predicate(n->get_int() == 65535);
3630   match(ConI);
3631 
3632   format %{ %}
3633   interface(CONST_INTER);
3634 %}
3635 
3636 // Constant for byte-wide masking
3637 operand immL_255()
3638 %{
3639   predicate(n->get_long() == 255);
3640   match(ConL);
3641 
3642   format %{ %}
3643   interface(CONST_INTER);
3644 %}
3645 
3646 // Constant for short-wide masking
3647 operand immL_65535()
3648 %{
3649   predicate(n->get_long() == 65535);
3650   match(ConL);
3651 
3652   format %{ %}
3653   interface(CONST_INTER);
3654 %}
3655 
3656 // Register Operands
3657 // Integer Register
3658 operand rRegI()
3659 %{
3660   constraint(ALLOC_IN_RC(int_reg));
3661   match(RegI);
3662 
3663   match(rax_RegI);
3664   match(rbx_RegI);
3665   match(rcx_RegI);
3666   match(rdx_RegI);
3667   match(rdi_RegI);
3668 
3669   format %{ %}
3670   interface(REG_INTER);
3671 %}
3672 
3673 // Special Registers
3674 operand rax_RegI()
3675 %{
3676   constraint(ALLOC_IN_RC(int_rax_reg));
3677   match(RegI);
3678   match(rRegI);
3679 
3680   format %{ "RAX" %}
3681   interface(REG_INTER);
3682 %}
3683 
3684 // Special Registers
3685 operand rbx_RegI()
3686 %{
3687   constraint(ALLOC_IN_RC(int_rbx_reg));
3688   match(RegI);
3689   match(rRegI);
3690 
3691   format %{ "RBX" %}
3692   interface(REG_INTER);
3693 %}
3694 
3695 operand rcx_RegI()
3696 %{
3697   constraint(ALLOC_IN_RC(int_rcx_reg));
3698   match(RegI);
3699   match(rRegI);
3700 
3701   format %{ "RCX" %}
3702   interface(REG_INTER);
3703 %}
3704 
3705 operand rdx_RegI()
3706 %{
3707   constraint(ALLOC_IN_RC(int_rdx_reg));
3708   match(RegI);
3709   match(rRegI);
3710 
3711   format %{ "RDX" %}
3712   interface(REG_INTER);
3713 %}
3714 
3715 operand rdi_RegI()
3716 %{
3717   constraint(ALLOC_IN_RC(int_rdi_reg));
3718   match(RegI);
3719   match(rRegI);
3720 
3721   format %{ "RDI" %}
3722   interface(REG_INTER);
3723 %}
3724 
3725 operand no_rcx_RegI()
3726 %{
3727   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3728   match(RegI);
3729   match(rax_RegI);
3730   match(rbx_RegI);
3731   match(rdx_RegI);
3732   match(rdi_RegI);
3733 
3734   format %{ %}
3735   interface(REG_INTER);
3736 %}
3737 
3738 operand no_rax_rdx_RegI()
3739 %{
3740   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3741   match(RegI);
3742   match(rbx_RegI);
3743   match(rcx_RegI);
3744   match(rdi_RegI);
3745 
3746   format %{ %}
3747   interface(REG_INTER);
3748 %}
3749 
3750 // Pointer Register
3751 operand any_RegP()
3752 %{
3753   constraint(ALLOC_IN_RC(any_reg));
3754   match(RegP);
3755   match(rax_RegP);
3756   match(rbx_RegP);
3757   match(rdi_RegP);
3758   match(rsi_RegP);
3759   match(rbp_RegP);
3760   match(r15_RegP);
3761   match(rRegP);
3762 
3763   format %{ %}
3764   interface(REG_INTER);
3765 %}
3766 
3767 operand rRegP()
3768 %{
3769   constraint(ALLOC_IN_RC(ptr_reg));
3770   match(RegP);
3771   match(rax_RegP);
3772   match(rbx_RegP);
3773   match(rdi_RegP);
3774   match(rsi_RegP);
3775   match(rbp_RegP);
3776   match(r15_RegP);  // See Q&A below about r15_RegP.
3777 
3778   format %{ %}
3779   interface(REG_INTER);
3780 %}
3781 
3782 operand rRegN() %{
3783   constraint(ALLOC_IN_RC(int_reg));
3784   match(RegN);
3785 
3786   format %{ %}
3787   interface(REG_INTER);
3788 %}
3789 
3790 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3791 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3792 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
3793 // The output of an instruction is controlled by the allocator, which respects
3794 // register class masks, not match rules.  Unless an instruction mentions
3795 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3796 // by the allocator as an input.
3797 
3798 operand no_rax_RegP()
3799 %{
3800   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3801   match(RegP);
3802   match(rbx_RegP);
3803   match(rsi_RegP);
3804   match(rdi_RegP);
3805 
3806   format %{ %}
3807   interface(REG_INTER);
3808 %}
3809 
3810 operand no_rbp_RegP()
3811 %{
3812   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
3813   match(RegP);
3814   match(rbx_RegP);
3815   match(rsi_RegP);
3816   match(rdi_RegP);
3817 
3818   format %{ %}
3819   interface(REG_INTER);
3820 %}
3821 
3822 operand no_rax_rbx_RegP()
3823 %{
3824   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3825   match(RegP);
3826   match(rsi_RegP);
3827   match(rdi_RegP);
3828 
3829   format %{ %}
3830   interface(REG_INTER);
3831 %}
3832 
3833 // Special Registers
3834 // Return a pointer value
3835 operand rax_RegP()
3836 %{
3837   constraint(ALLOC_IN_RC(ptr_rax_reg));
3838   match(RegP);
3839   match(rRegP);
3840 
3841   format %{ %}
3842   interface(REG_INTER);
3843 %}
3844 
3845 // Special Registers
3846 // Return a compressed pointer value
3847 operand rax_RegN()
3848 %{
3849   constraint(ALLOC_IN_RC(int_rax_reg));
3850   match(RegN);
3851   match(rRegN);
3852 
3853   format %{ %}
3854   interface(REG_INTER);
3855 %}
3856 
3857 // Used in AtomicAdd
3858 operand rbx_RegP()
3859 %{
3860   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3861   match(RegP);
3862   match(rRegP);
3863 
3864   format %{ %}
3865   interface(REG_INTER);
3866 %}
3867 
3868 operand rsi_RegP()
3869 %{
3870   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3871   match(RegP);
3872   match(rRegP);
3873 
3874   format %{ %}
3875   interface(REG_INTER);
3876 %}
3877 
3878 // Used in rep stosq
3879 operand rdi_RegP()
3880 %{
3881   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3882   match(RegP);
3883   match(rRegP);
3884 
3885   format %{ %}
3886   interface(REG_INTER);
3887 %}
3888 
3889 operand rbp_RegP()
3890 %{
3891   constraint(ALLOC_IN_RC(ptr_rbp_reg));
3892   match(RegP);
3893   match(rRegP);
3894 
3895   format %{ %}
3896   interface(REG_INTER);
3897 %}
3898 
3899 operand r15_RegP()
3900 %{
3901   constraint(ALLOC_IN_RC(ptr_r15_reg));
3902   match(RegP);
3903   match(rRegP);
3904 
3905   format %{ %}
3906   interface(REG_INTER);
3907 %}
3908 
3909 operand rRegL()
3910 %{
3911   constraint(ALLOC_IN_RC(long_reg));
3912   match(RegL);
3913   match(rax_RegL);
3914   match(rdx_RegL);
3915 
3916   format %{ %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 // Special Registers
3921 operand no_rax_rdx_RegL()
3922 %{
3923   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3924   match(RegL);
3925   match(rRegL);
3926 
3927   format %{ %}
3928   interface(REG_INTER);
3929 %}
3930 
3931 operand no_rax_RegL()
3932 %{
3933   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3934   match(RegL);
3935   match(rRegL);
3936   match(rdx_RegL);
3937 
3938   format %{ %}
3939   interface(REG_INTER);
3940 %}
3941 
3942 operand no_rcx_RegL()
3943 %{
3944   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3945   match(RegL);
3946   match(rRegL);
3947 
3948   format %{ %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand rax_RegL()
3953 %{
3954   constraint(ALLOC_IN_RC(long_rax_reg));
3955   match(RegL);
3956   match(rRegL);
3957 
3958   format %{ "RAX" %}
3959   interface(REG_INTER);
3960 %}
3961 
3962 operand rcx_RegL()
3963 %{
3964   constraint(ALLOC_IN_RC(long_rcx_reg));
3965   match(RegL);
3966   match(rRegL);
3967 
3968   format %{ %}
3969   interface(REG_INTER);
3970 %}
3971 
3972 operand rdx_RegL()
3973 %{
3974   constraint(ALLOC_IN_RC(long_rdx_reg));
3975   match(RegL);
3976   match(rRegL);
3977 
3978   format %{ %}
3979   interface(REG_INTER);
3980 %}
3981 
3982 // Flags register, used as output of compare instructions
3983 operand rFlagsReg()
3984 %{
3985   constraint(ALLOC_IN_RC(int_flags));
3986   match(RegFlags);
3987 
3988   format %{ "RFLAGS" %}
3989   interface(REG_INTER);
3990 %}
3991 
3992 // Flags register, used as output of FLOATING POINT compare instructions
3993 operand rFlagsRegU()
3994 %{
3995   constraint(ALLOC_IN_RC(int_flags));
3996   match(RegFlags);
3997 
3998   format %{ "RFLAGS_U" %}
3999   interface(REG_INTER);
4000 %}
4001 
4002 operand rFlagsRegUCF() %{
4003   constraint(ALLOC_IN_RC(int_flags));
4004   match(RegFlags);
4005   predicate(false);
4006 
4007   format %{ "RFLAGS_U_CF" %}
4008   interface(REG_INTER);
4009 %}
4010 
4011 // Float register operands
4012 operand regF()
4013 %{
4014   constraint(ALLOC_IN_RC(float_reg));
4015   match(RegF);
4016 
4017   format %{ %}
4018   interface(REG_INTER);
4019 %}
4020 
4021 // Double register operands
4022 operand regD()
4023 %{
4024   constraint(ALLOC_IN_RC(double_reg));
4025   match(RegD);
4026 
4027   format %{ %}
4028   interface(REG_INTER);
4029 %}
4030 
4031 
4032 //----------Memory Operands----------------------------------------------------
4033 // Direct Memory Operand
4034 // operand direct(immP addr)
4035 // %{
4036 //   match(addr);
4037 
4038 //   format %{ "[$addr]" %}
4039 //   interface(MEMORY_INTER) %{
4040 //     base(0xFFFFFFFF);
4041 //     index(0x4);
4042 //     scale(0x0);
4043 //     disp($addr);
4044 //   %}
4045 // %}
4046 
4047 // Indirect Memory Operand
4048 operand indirect(any_RegP reg)
4049 %{
4050   constraint(ALLOC_IN_RC(ptr_reg));
4051   match(reg);
4052 
4053   format %{ "[$reg]" %}
4054   interface(MEMORY_INTER) %{
4055     base($reg);
4056     index(0x4);
4057     scale(0x0);
4058     disp(0x0);
4059   %}
4060 %}
4061 
4062 // Indirect Memory Plus Short Offset Operand
4063 operand indOffset8(any_RegP reg, immL8 off)
4064 %{
4065   constraint(ALLOC_IN_RC(ptr_reg));
4066   match(AddP reg off);
4067 
4068   format %{ "[$reg + $off (8-bit)]" %}
4069   interface(MEMORY_INTER) %{
4070     base($reg);
4071     index(0x4);
4072     scale(0x0);
4073     disp($off);
4074   %}
4075 %}
4076 
4077 // Indirect Memory Plus Long Offset Operand
4078 operand indOffset32(any_RegP reg, immL32 off)
4079 %{
4080   constraint(ALLOC_IN_RC(ptr_reg));
4081   match(AddP reg off);
4082 
4083   format %{ "[$reg + $off (32-bit)]" %}
4084   interface(MEMORY_INTER) %{
4085     base($reg);
4086     index(0x4);
4087     scale(0x0);
4088     disp($off);
4089   %}
4090 %}
4091 
4092 // Indirect Memory Plus Index Register Plus Offset Operand
4093 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4094 %{
4095   constraint(ALLOC_IN_RC(ptr_reg));
4096   match(AddP (AddP reg lreg) off);
4097 
4098   op_cost(10);
4099   format %{"[$reg + $off + $lreg]" %}
4100   interface(MEMORY_INTER) %{
4101     base($reg);
4102     index($lreg);
4103     scale(0x0);
4104     disp($off);
4105   %}
4106 %}
4107 
4108 // Indirect Memory Plus Index Register Plus Offset Operand
4109 operand indIndex(any_RegP reg, rRegL lreg)
4110 %{
4111   constraint(ALLOC_IN_RC(ptr_reg));
4112   match(AddP reg lreg);
4113 
4114   op_cost(10);
4115   format %{"[$reg + $lreg]" %}
4116   interface(MEMORY_INTER) %{
4117     base($reg);
4118     index($lreg);
4119     scale(0x0);
4120     disp(0x0);
4121   %}
4122 %}
4123 
4124 // Indirect Memory Times Scale Plus Index Register
4125 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4126 %{
4127   constraint(ALLOC_IN_RC(ptr_reg));
4128   match(AddP reg (LShiftL lreg scale));
4129 
4130   op_cost(10);
4131   format %{"[$reg + $lreg << $scale]" %}
4132   interface(MEMORY_INTER) %{
4133     base($reg);
4134     index($lreg);
4135     scale($scale);
4136     disp(0x0);
4137   %}
4138 %}
4139 
4140 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4141 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4142 %{
4143   constraint(ALLOC_IN_RC(ptr_reg));
4144   match(AddP (AddP reg (LShiftL lreg scale)) off);
4145 
4146   op_cost(10);
4147   format %{"[$reg + $off + $lreg << $scale]" %}
4148   interface(MEMORY_INTER) %{
4149     base($reg);
4150     index($lreg);
4151     scale($scale);
4152     disp($off);
4153   %}
4154 %}
4155 
4156 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4157 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4158 %{
4159   constraint(ALLOC_IN_RC(ptr_reg));
4160   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4161   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4162 
4163   op_cost(10);
4164   format %{"[$reg + $off + $idx << $scale]" %}
4165   interface(MEMORY_INTER) %{
4166     base($reg);
4167     index($idx);
4168     scale($scale);
4169     disp($off);
4170   %}
4171 %}
4172 
4173 // Indirect Narrow Oop Plus Offset Operand
4174 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4175 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4176 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4177   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
4178   constraint(ALLOC_IN_RC(ptr_reg));
4179   match(AddP (DecodeN reg) off);
4180 
4181   op_cost(10);
4182   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
4183   interface(MEMORY_INTER) %{
4184     base(0xc); // R12
4185     index($reg);
4186     scale(0x3);
4187     disp($off);
4188   %}
4189 %}
4190 
4191 // Indirect Memory Operand
4192 operand indirectNarrow(rRegN reg)
4193 %{
4194   predicate(Universe::narrow_oop_shift() == 0);
4195   constraint(ALLOC_IN_RC(ptr_reg));
4196   match(DecodeN reg);
4197 
4198   format %{ "[$reg]" %}
4199   interface(MEMORY_INTER) %{
4200     base($reg);
4201     index(0x4);
4202     scale(0x0);
4203     disp(0x0);
4204   %}
4205 %}
4206 
4207 // Indirect Memory Plus Short Offset Operand
4208 operand indOffset8Narrow(rRegN reg, immL8 off)
4209 %{
4210   predicate(Universe::narrow_oop_shift() == 0);
4211   constraint(ALLOC_IN_RC(ptr_reg));
4212   match(AddP (DecodeN reg) off);
4213 
4214   format %{ "[$reg + $off (8-bit)]" %}
4215   interface(MEMORY_INTER) %{
4216     base($reg);
4217     index(0x4);
4218     scale(0x0);
4219     disp($off);
4220   %}
4221 %}
4222 
4223 // Indirect Memory Plus Long Offset Operand
4224 operand indOffset32Narrow(rRegN reg, immL32 off)
4225 %{
4226   predicate(Universe::narrow_oop_shift() == 0);
4227   constraint(ALLOC_IN_RC(ptr_reg));
4228   match(AddP (DecodeN reg) off);
4229 
4230   format %{ "[$reg + $off (32-bit)]" %}
4231   interface(MEMORY_INTER) %{
4232     base($reg);
4233     index(0x4);
4234     scale(0x0);
4235     disp($off);
4236   %}
4237 %}
4238 
4239 // Indirect Memory Plus Index Register Plus Offset Operand
4240 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
4241 %{
4242   predicate(Universe::narrow_oop_shift() == 0);
4243   constraint(ALLOC_IN_RC(ptr_reg));
4244   match(AddP (AddP (DecodeN reg) lreg) off);
4245 
4246   op_cost(10);
4247   format %{"[$reg + $off + $lreg]" %}
4248   interface(MEMORY_INTER) %{
4249     base($reg);
4250     index($lreg);
4251     scale(0x0);
4252     disp($off);
4253   %}
4254 %}
4255 
4256 // Indirect Memory Plus Index Register Plus Offset Operand
4257 operand indIndexNarrow(rRegN reg, rRegL lreg)
4258 %{
4259   predicate(Universe::narrow_oop_shift() == 0);
4260   constraint(ALLOC_IN_RC(ptr_reg));
4261   match(AddP (DecodeN reg) lreg);
4262 
4263   op_cost(10);
4264   format %{"[$reg + $lreg]" %}
4265   interface(MEMORY_INTER) %{
4266     base($reg);
4267     index($lreg);
4268     scale(0x0);
4269     disp(0x0);
4270   %}
4271 %}
4272 
4273 // Indirect Memory Times Scale Plus Index Register
4274 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4275 %{
4276   predicate(Universe::narrow_oop_shift() == 0);
4277   constraint(ALLOC_IN_RC(ptr_reg));
4278   match(AddP (DecodeN reg) (LShiftL lreg scale));
4279 
4280   op_cost(10);
4281   format %{"[$reg + $lreg << $scale]" %}
4282   interface(MEMORY_INTER) %{
4283     base($reg);
4284     index($lreg);
4285     scale($scale);
4286     disp(0x0);
4287   %}
4288 %}
4289 
4290 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4291 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4292 %{
4293   predicate(Universe::narrow_oop_shift() == 0);
4294   constraint(ALLOC_IN_RC(ptr_reg));
4295   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4296 
4297   op_cost(10);
4298   format %{"[$reg + $off + $lreg << $scale]" %}
4299   interface(MEMORY_INTER) %{
4300     base($reg);
4301     index($lreg);
4302     scale($scale);
4303     disp($off);
4304   %}
4305 %}
4306 
4307 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4308 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4309 %{
4310   constraint(ALLOC_IN_RC(ptr_reg));
4311   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4312   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4313 
4314   op_cost(10);
4315   format %{"[$reg + $off + $idx << $scale]" %}
4316   interface(MEMORY_INTER) %{
4317     base($reg);
4318     index($idx);
4319     scale($scale);
4320     disp($off);
4321   %}
4322 %}
4323 
4324 
4325 //----------Special Memory Operands--------------------------------------------
4326 // Stack Slot Operand - This operand is used for loading and storing temporary
4327 //                      values on the stack where a match requires a value to
4328 //                      flow through memory.
4329 operand stackSlotP(sRegP reg)
4330 %{
4331   constraint(ALLOC_IN_RC(stack_slots));
4332   // No match rule because this operand is only generated in matching
4333 
4334   format %{ "[$reg]" %}
4335   interface(MEMORY_INTER) %{
4336     base(0x4);   // RSP
4337     index(0x4);  // No Index
4338     scale(0x0);  // No Scale
4339     disp($reg);  // Stack Offset
4340   %}
4341 %}
4342 
4343 operand stackSlotI(sRegI reg)
4344 %{
4345   constraint(ALLOC_IN_RC(stack_slots));
4346   // No match rule because this operand is only generated in matching
4347 
4348   format %{ "[$reg]" %}
4349   interface(MEMORY_INTER) %{
4350     base(0x4);   // RSP
4351     index(0x4);  // No Index
4352     scale(0x0);  // No Scale
4353     disp($reg);  // Stack Offset
4354   %}
4355 %}
4356 
4357 operand stackSlotF(sRegF reg)
4358 %{
4359   constraint(ALLOC_IN_RC(stack_slots));
4360   // No match rule because this operand is only generated in matching
4361 
4362   format %{ "[$reg]" %}
4363   interface(MEMORY_INTER) %{
4364     base(0x4);   // RSP
4365     index(0x4);  // No Index
4366     scale(0x0);  // No Scale
4367     disp($reg);  // Stack Offset
4368   %}
4369 %}
4370 
4371 operand stackSlotD(sRegD reg)
4372 %{
4373   constraint(ALLOC_IN_RC(stack_slots));
4374   // No match rule because this operand is only generated in matching
4375 
4376   format %{ "[$reg]" %}
4377   interface(MEMORY_INTER) %{
4378     base(0x4);   // RSP
4379     index(0x4);  // No Index
4380     scale(0x0);  // No Scale
4381     disp($reg);  // Stack Offset
4382   %}
4383 %}
4384 operand stackSlotL(sRegL reg)
4385 %{
4386   constraint(ALLOC_IN_RC(stack_slots));
4387   // No match rule because this operand is only generated in matching
4388 
4389   format %{ "[$reg]" %}
4390   interface(MEMORY_INTER) %{
4391     base(0x4);   // RSP
4392     index(0x4);  // No Index
4393     scale(0x0);  // No Scale
4394     disp($reg);  // Stack Offset
4395   %}
4396 %}
4397 
4398 //----------Conditional Branch Operands----------------------------------------
4399 // Comparison Op  - This is the operation of the comparison, and is limited to
4400 //                  the following set of codes:
4401 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4402 //
4403 // Other attributes of the comparison, such as unsignedness, are specified
4404 // by the comparison instruction that sets a condition code flags register.
4405 // That result is represented by a flags operand whose subtype is appropriate
4406 // to the unsignedness (etc.) of the comparison.
4407 //
4408 // Later, the instruction which matches both the Comparison Op (a Bool) and
4409 // the flags (produced by the Cmp) specifies the coding of the comparison op
4410 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4411 
4412 // Comparision Code
4413 operand cmpOp()
4414 %{
4415   match(Bool);
4416 
4417   format %{ "" %}
4418   interface(COND_INTER) %{
4419     equal(0x4, "e");
4420     not_equal(0x5, "ne");
4421     less(0xC, "l");
4422     greater_equal(0xD, "ge");
4423     less_equal(0xE, "le");
4424     greater(0xF, "g");
4425   %}
4426 %}
4427 
4428 // Comparison Code, unsigned compare.  Used by FP also, with
4429 // C2 (unordered) turned into GT or LT already.  The other bits
4430 // C0 and C3 are turned into Carry & Zero flags.
4431 operand cmpOpU()
4432 %{
4433   match(Bool);
4434 
4435   format %{ "" %}
4436   interface(COND_INTER) %{
4437     equal(0x4, "e");
4438     not_equal(0x5, "ne");
4439     less(0x2, "b");
4440     greater_equal(0x3, "nb");
4441     less_equal(0x6, "be");
4442     greater(0x7, "nbe");
4443   %}
4444 %}
4445 
4446 
4447 // Floating comparisons that don't require any fixup for the unordered case
4448 operand cmpOpUCF() %{
4449   match(Bool);
4450   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4451             n->as_Bool()->_test._test == BoolTest::ge ||
4452             n->as_Bool()->_test._test == BoolTest::le ||
4453             n->as_Bool()->_test._test == BoolTest::gt);
4454   format %{ "" %}
4455   interface(COND_INTER) %{
4456     equal(0x4, "e");
4457     not_equal(0x5, "ne");
4458     less(0x2, "b");
4459     greater_equal(0x3, "nb");
4460     less_equal(0x6, "be");
4461     greater(0x7, "nbe");
4462   %}
4463 %}
4464 
4465 
4466 // Floating comparisons that can be fixed up with extra conditional jumps
4467 operand cmpOpUCF2() %{
4468   match(Bool);
4469   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4470             n->as_Bool()->_test._test == BoolTest::eq);
4471   format %{ "" %}
4472   interface(COND_INTER) %{
4473     equal(0x4, "e");
4474     not_equal(0x5, "ne");
4475     less(0x2, "b");
4476     greater_equal(0x3, "nb");
4477     less_equal(0x6, "be");
4478     greater(0x7, "nbe");
4479   %}
4480 %}
4481 
4482 
4483 //----------OPERAND CLASSES----------------------------------------------------
4484 // Operand Classes are groups of operands that are used as to simplify
4485 // instruction definitions by not requiring the AD writer to specify separate
4486 // instructions for every form of operand when the instruction accepts
4487 // multiple operand types with the same basic encoding and format.  The classic
4488 // case of this is memory operands.
4489 
4490 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4491                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
4492                indCompressedOopOffset,
4493                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4494                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4495                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
4496 
4497 //----------PIPELINE-----------------------------------------------------------
4498 // Rules which define the behavior of the target architectures pipeline.
4499 pipeline %{
4500 
4501 //----------ATTRIBUTES---------------------------------------------------------
4502 attributes %{
4503   variable_size_instructions;        // Fixed size instructions
4504   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4505   instruction_unit_size = 1;         // An instruction is 1 bytes long
4506   instruction_fetch_unit_size = 16;  // The processor fetches one line
4507   instruction_fetch_units = 1;       // of 16 bytes
4508 
4509   // List of nop instructions
4510   nops( MachNop );
4511 %}
4512 
4513 //----------RESOURCES----------------------------------------------------------
4514 // Resources are the functional units available to the machine
4515 
4516 // Generic P2/P3 pipeline
4517 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4518 // 3 instructions decoded per cycle.
4519 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4520 // 3 ALU op, only ALU0 handles mul instructions.
4521 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4522            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4523            BR, FPU,
4524            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4525 
4526 //----------PIPELINE DESCRIPTION-----------------------------------------------
4527 // Pipeline Description specifies the stages in the machine's pipeline
4528 
4529 // Generic P2/P3 pipeline
4530 pipe_desc(S0, S1, S2, S3, S4, S5);
4531 
4532 //----------PIPELINE CLASSES---------------------------------------------------
4533 // Pipeline Classes describe the stages in which input and output are
4534 // referenced by the hardware pipeline.
4535 
4536 // Naming convention: ialu or fpu
4537 // Then: _reg
4538 // Then: _reg if there is a 2nd register
4539 // Then: _long if it's a pair of instructions implementing a long
4540 // Then: _fat if it requires the big decoder
4541 //   Or: _mem if it requires the big decoder and a memory unit.
4542 
4543 // Integer ALU reg operation
4544 pipe_class ialu_reg(rRegI dst)
4545 %{
4546     single_instruction;
4547     dst    : S4(write);
4548     dst    : S3(read);
4549     DECODE : S0;        // any decoder
4550     ALU    : S3;        // any alu
4551 %}
4552 
4553 // Long ALU reg operation
4554 pipe_class ialu_reg_long(rRegL dst)
4555 %{
4556     instruction_count(2);
4557     dst    : S4(write);
4558     dst    : S3(read);
4559     DECODE : S0(2);     // any 2 decoders
4560     ALU    : S3(2);     // both alus
4561 %}
4562 
4563 // Integer ALU reg operation using big decoder
4564 pipe_class ialu_reg_fat(rRegI dst)
4565 %{
4566     single_instruction;
4567     dst    : S4(write);
4568     dst    : S3(read);
4569     D0     : S0;        // big decoder only
4570     ALU    : S3;        // any alu
4571 %}
4572 
4573 // Long ALU reg operation using big decoder
4574 pipe_class ialu_reg_long_fat(rRegL dst)
4575 %{
4576     instruction_count(2);
4577     dst    : S4(write);
4578     dst    : S3(read);
4579     D0     : S0(2);     // big decoder only; twice
4580     ALU    : S3(2);     // any 2 alus
4581 %}
4582 
4583 // Integer ALU reg-reg operation
4584 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4585 %{
4586     single_instruction;
4587     dst    : S4(write);
4588     src    : S3(read);
4589     DECODE : S0;        // any decoder
4590     ALU    : S3;        // any alu
4591 %}
4592 
4593 // Long ALU reg-reg operation
4594 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4595 %{
4596     instruction_count(2);
4597     dst    : S4(write);
4598     src    : S3(read);
4599     DECODE : S0(2);     // any 2 decoders
4600     ALU    : S3(2);     // both alus
4601 %}
4602 
4603 // Integer ALU reg-reg operation
4604 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4605 %{
4606     single_instruction;
4607     dst    : S4(write);
4608     src    : S3(read);
4609     D0     : S0;        // big decoder only
4610     ALU    : S3;        // any alu
4611 %}
4612 
4613 // Long ALU reg-reg operation
4614 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4615 %{
4616     instruction_count(2);
4617     dst    : S4(write);
4618     src    : S3(read);
4619     D0     : S0(2);     // big decoder only; twice
4620     ALU    : S3(2);     // both alus
4621 %}
4622 
4623 // Integer ALU reg-mem operation
4624 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4625 %{
4626     single_instruction;
4627     dst    : S5(write);
4628     mem    : S3(read);
4629     D0     : S0;        // big decoder only
4630     ALU    : S4;        // any alu
4631     MEM    : S3;        // any mem
4632 %}
4633 
4634 // Integer mem operation (prefetch)
4635 pipe_class ialu_mem(memory mem)
4636 %{
4637     single_instruction;
4638     mem    : S3(read);
4639     D0     : S0;        // big decoder only
4640     MEM    : S3;        // any mem
4641 %}
4642 
4643 // Integer Store to Memory
4644 pipe_class ialu_mem_reg(memory mem, rRegI src)
4645 %{
4646     single_instruction;
4647     mem    : S3(read);
4648     src    : S5(read);
4649     D0     : S0;        // big decoder only
4650     ALU    : S4;        // any alu
4651     MEM    : S3;
4652 %}
4653 
4654 // // Long Store to Memory
4655 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4656 // %{
4657 //     instruction_count(2);
4658 //     mem    : S3(read);
4659 //     src    : S5(read);
4660 //     D0     : S0(2);          // big decoder only; twice
4661 //     ALU    : S4(2);     // any 2 alus
4662 //     MEM    : S3(2);  // Both mems
4663 // %}
4664 
4665 // Integer Store to Memory
4666 pipe_class ialu_mem_imm(memory mem)
4667 %{
4668     single_instruction;
4669     mem    : S3(read);
4670     D0     : S0;        // big decoder only
4671     ALU    : S4;        // any alu
4672     MEM    : S3;
4673 %}
4674 
4675 // Integer ALU0 reg-reg operation
4676 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4677 %{
4678     single_instruction;
4679     dst    : S4(write);
4680     src    : S3(read);
4681     D0     : S0;        // Big decoder only
4682     ALU0   : S3;        // only alu0
4683 %}
4684 
4685 // Integer ALU0 reg-mem operation
4686 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4687 %{
4688     single_instruction;
4689     dst    : S5(write);
4690     mem    : S3(read);
4691     D0     : S0;        // big decoder only
4692     ALU0   : S4;        // ALU0 only
4693     MEM    : S3;        // any mem
4694 %}
4695 
4696 // Integer ALU reg-reg operation
4697 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4698 %{
4699     single_instruction;
4700     cr     : S4(write);
4701     src1   : S3(read);
4702     src2   : S3(read);
4703     DECODE : S0;        // any decoder
4704     ALU    : S3;        // any alu
4705 %}
4706 
4707 // Integer ALU reg-imm operation
4708 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4709 %{
4710     single_instruction;
4711     cr     : S4(write);
4712     src1   : S3(read);
4713     DECODE : S0;        // any decoder
4714     ALU    : S3;        // any alu
4715 %}
4716 
4717 // Integer ALU reg-mem operation
4718 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4719 %{
4720     single_instruction;
4721     cr     : S4(write);
4722     src1   : S3(read);
4723     src2   : S3(read);
4724     D0     : S0;        // big decoder only
4725     ALU    : S4;        // any alu
4726     MEM    : S3;
4727 %}
4728 
4729 // Conditional move reg-reg
4730 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4731 %{
4732     instruction_count(4);
4733     y      : S4(read);
4734     q      : S3(read);
4735     p      : S3(read);
4736     DECODE : S0(4);     // any decoder
4737 %}
4738 
4739 // Conditional move reg-reg
4740 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4741 %{
4742     single_instruction;
4743     dst    : S4(write);
4744     src    : S3(read);
4745     cr     : S3(read);
4746     DECODE : S0;        // any decoder
4747 %}
4748 
4749 // Conditional move reg-mem
4750 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4751 %{
4752     single_instruction;
4753     dst    : S4(write);
4754     src    : S3(read);
4755     cr     : S3(read);
4756     DECODE : S0;        // any decoder
4757     MEM    : S3;
4758 %}
4759 
4760 // Conditional move reg-reg long
4761 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4762 %{
4763     single_instruction;
4764     dst    : S4(write);
4765     src    : S3(read);
4766     cr     : S3(read);
4767     DECODE : S0(2);     // any 2 decoders
4768 %}
4769 
4770 // XXX
4771 // // Conditional move double reg-reg
4772 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4773 // %{
4774 //     single_instruction;
4775 //     dst    : S4(write);
4776 //     src    : S3(read);
4777 //     cr     : S3(read);
4778 //     DECODE : S0;     // any decoder
4779 // %}
4780 
4781 // Float reg-reg operation
4782 pipe_class fpu_reg(regD dst)
4783 %{
4784     instruction_count(2);
4785     dst    : S3(read);
4786     DECODE : S0(2);     // any 2 decoders
4787     FPU    : S3;
4788 %}
4789 
4790 // Float reg-reg operation
4791 pipe_class fpu_reg_reg(regD dst, regD src)
4792 %{
4793     instruction_count(2);
4794     dst    : S4(write);
4795     src    : S3(read);
4796     DECODE : S0(2);     // any 2 decoders
4797     FPU    : S3;
4798 %}
4799 
4800 // Float reg-reg operation
4801 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4802 %{
4803     instruction_count(3);
4804     dst    : S4(write);
4805     src1   : S3(read);
4806     src2   : S3(read);
4807     DECODE : S0(3);     // any 3 decoders
4808     FPU    : S3(2);
4809 %}
4810 
4811 // Float reg-reg operation
4812 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4813 %{
4814     instruction_count(4);
4815     dst    : S4(write);
4816     src1   : S3(read);
4817     src2   : S3(read);
4818     src3   : S3(read);
4819     DECODE : S0(4);     // any 3 decoders
4820     FPU    : S3(2);
4821 %}
4822 
4823 // Float reg-reg operation
4824 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4825 %{
4826     instruction_count(4);
4827     dst    : S4(write);
4828     src1   : S3(read);
4829     src2   : S3(read);
4830     src3   : S3(read);
4831     DECODE : S1(3);     // any 3 decoders
4832     D0     : S0;        // Big decoder only
4833     FPU    : S3(2);
4834     MEM    : S3;
4835 %}
4836 
4837 // Float reg-mem operation
4838 pipe_class fpu_reg_mem(regD dst, memory mem)
4839 %{
4840     instruction_count(2);
4841     dst    : S5(write);
4842     mem    : S3(read);
4843     D0     : S0;        // big decoder only
4844     DECODE : S1;        // any decoder for FPU POP
4845     FPU    : S4;
4846     MEM    : S3;        // any mem
4847 %}
4848 
4849 // Float reg-mem operation
4850 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4851 %{
4852     instruction_count(3);
4853     dst    : S5(write);
4854     src1   : S3(read);
4855     mem    : S3(read);
4856     D0     : S0;        // big decoder only
4857     DECODE : S1(2);     // any decoder for FPU POP
4858     FPU    : S4;
4859     MEM    : S3;        // any mem
4860 %}
4861 
4862 // Float mem-reg operation
4863 pipe_class fpu_mem_reg(memory mem, regD src)
4864 %{
4865     instruction_count(2);
4866     src    : S5(read);
4867     mem    : S3(read);
4868     DECODE : S0;        // any decoder for FPU PUSH
4869     D0     : S1;        // big decoder only
4870     FPU    : S4;
4871     MEM    : S3;        // any mem
4872 %}
4873 
4874 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4875 %{
4876     instruction_count(3);
4877     src1   : S3(read);
4878     src2   : S3(read);
4879     mem    : S3(read);
4880     DECODE : S0(2);     // any decoder for FPU PUSH
4881     D0     : S1;        // big decoder only
4882     FPU    : S4;
4883     MEM    : S3;        // any mem
4884 %}
4885 
4886 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4887 %{
4888     instruction_count(3);
4889     src1   : S3(read);
4890     src2   : S3(read);
4891     mem    : S4(read);
4892     DECODE : S0;        // any decoder for FPU PUSH
4893     D0     : S0(2);     // big decoder only
4894     FPU    : S4;
4895     MEM    : S3(2);     // any mem
4896 %}
4897 
4898 pipe_class fpu_mem_mem(memory dst, memory src1)
4899 %{
4900     instruction_count(2);
4901     src1   : S3(read);
4902     dst    : S4(read);
4903     D0     : S0(2);     // big decoder only
4904     MEM    : S3(2);     // any mem
4905 %}
4906 
4907 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4908 %{
4909     instruction_count(3);
4910     src1   : S3(read);
4911     src2   : S3(read);
4912     dst    : S4(read);
4913     D0     : S0(3);     // big decoder only
4914     FPU    : S4;
4915     MEM    : S3(3);     // any mem
4916 %}
4917 
4918 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4919 %{
4920     instruction_count(3);
4921     src1   : S4(read);
4922     mem    : S4(read);
4923     DECODE : S0;        // any decoder for FPU PUSH
4924     D0     : S0(2);     // big decoder only
4925     FPU    : S4;
4926     MEM    : S3(2);     // any mem
4927 %}
4928 
4929 // Float load constant
4930 pipe_class fpu_reg_con(regD dst)
4931 %{
4932     instruction_count(2);
4933     dst    : S5(write);
4934     D0     : S0;        // big decoder only for the load
4935     DECODE : S1;        // any decoder for FPU POP
4936     FPU    : S4;
4937     MEM    : S3;        // any mem
4938 %}
4939 
4940 // Float load constant
4941 pipe_class fpu_reg_reg_con(regD dst, regD src)
4942 %{
4943     instruction_count(3);
4944     dst    : S5(write);
4945     src    : S3(read);
4946     D0     : S0;        // big decoder only for the load
4947     DECODE : S1(2);     // any decoder for FPU POP
4948     FPU    : S4;
4949     MEM    : S3;        // any mem
4950 %}
4951 
4952 // UnConditional branch
4953 pipe_class pipe_jmp(label labl)
4954 %{
4955     single_instruction;
4956     BR   : S3;
4957 %}
4958 
4959 // Conditional branch
4960 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4961 %{
4962     single_instruction;
4963     cr    : S1(read);
4964     BR    : S3;
4965 %}
4966 
4967 // Allocation idiom
4968 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4969 %{
4970     instruction_count(1); force_serialization;
4971     fixed_latency(6);
4972     heap_ptr : S3(read);
4973     DECODE   : S0(3);
4974     D0       : S2;
4975     MEM      : S3;
4976     ALU      : S3(2);
4977     dst      : S5(write);
4978     BR       : S5;
4979 %}
4980 
4981 // Generic big/slow expanded idiom
4982 pipe_class pipe_slow()
4983 %{
4984     instruction_count(10); multiple_bundles; force_serialization;
4985     fixed_latency(100);
4986     D0  : S0(2);
4987     MEM : S3(2);
4988 %}
4989 
4990 // The real do-nothing guy
4991 pipe_class empty()
4992 %{
4993     instruction_count(0);
4994 %}
4995 
4996 // Define the class for the Nop node
4997 define
4998 %{
4999    MachNop = empty;
5000 %}
5001 
5002 %}
5003 
5004 //----------INSTRUCTIONS-------------------------------------------------------
5005 //
5006 // match      -- States which machine-independent subtree may be replaced
5007 //               by this instruction.
5008 // ins_cost   -- The estimated cost of this instruction is used by instruction
5009 //               selection to identify a minimum cost tree of machine
5010 //               instructions that matches a tree of machine-independent
5011 //               instructions.
5012 // format     -- A string providing the disassembly for this instruction.
5013 //               The value of an instruction's operand may be inserted
5014 //               by referring to it with a '$' prefix.
5015 // opcode     -- Three instruction opcodes may be provided.  These are referred
5016 //               to within an encode class as $primary, $secondary, and $tertiary
5017 //               rrspectively.  The primary opcode is commonly used to
5018 //               indicate the type of machine instruction, while secondary
5019 //               and tertiary are often used for prefix options or addressing
5020 //               modes.
5021 // ins_encode -- A list of encode classes with parameters. The encode class
5022 //               name must have been defined in an 'enc_class' specification
5023 //               in the encode section of the architecture description.
5024 
5025 
5026 //----------Load/Store/Move Instructions---------------------------------------
5027 //----------Load Instructions--------------------------------------------------
5028 
5029 // Load Byte (8 bit signed)
5030 instruct loadB(rRegI dst, memory mem)
5031 %{
5032   match(Set dst (LoadB mem));
5033 
5034   ins_cost(125);
5035   format %{ "movsbl  $dst, $mem\t# byte" %}
5036 
5037   ins_encode %{
5038     __ movsbl($dst$$Register, $mem$$Address);
5039   %}
5040 
5041   ins_pipe(ialu_reg_mem);
5042 %}
5043 
5044 // Load Byte (8 bit signed) into Long Register
5045 instruct loadB2L(rRegL dst, memory mem)
5046 %{
5047   match(Set dst (ConvI2L (LoadB mem)));
5048 
5049   ins_cost(125);
5050   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5051 
5052   ins_encode %{
5053     __ movsbq($dst$$Register, $mem$$Address);
5054   %}
5055 
5056   ins_pipe(ialu_reg_mem);
5057 %}
5058 
5059 // Load Unsigned Byte (8 bit UNsigned)
5060 instruct loadUB(rRegI dst, memory mem)
5061 %{
5062   match(Set dst (LoadUB mem));
5063 
5064   ins_cost(125);
5065   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5066 
5067   ins_encode %{
5068     __ movzbl($dst$$Register, $mem$$Address);
5069   %}
5070 
5071   ins_pipe(ialu_reg_mem);
5072 %}
5073 
5074 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5075 instruct loadUB2L(rRegL dst, memory mem)
5076 %{
5077   match(Set dst (ConvI2L (LoadUB mem)));
5078 
5079   ins_cost(125);
5080   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5081 
5082   ins_encode %{
5083     __ movzbq($dst$$Register, $mem$$Address);
5084   %}
5085 
5086   ins_pipe(ialu_reg_mem);
5087 %}
5088 
5089 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5090 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5091   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5092   effect(KILL cr);
5093 
5094   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5095             "andl    $dst, $mask" %}
5096   ins_encode %{
5097     Register Rdst = $dst$$Register;
5098     __ movzbq(Rdst, $mem$$Address);
5099     __ andl(Rdst, $mask$$constant);
5100   %}
5101   ins_pipe(ialu_reg_mem);
5102 %}
5103 
5104 // Load Short (16 bit signed)
5105 instruct loadS(rRegI dst, memory mem)
5106 %{
5107   match(Set dst (LoadS mem));
5108 
5109   ins_cost(125);
5110   format %{ "movswl $dst, $mem\t# short" %}
5111 
5112   ins_encode %{
5113     __ movswl($dst$$Register, $mem$$Address);
5114   %}
5115 
5116   ins_pipe(ialu_reg_mem);
5117 %}
5118 
5119 // Load Short (16 bit signed) to Byte (8 bit signed)
5120 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5121   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5122 
5123   ins_cost(125);
5124   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5125   ins_encode %{
5126     __ movsbl($dst$$Register, $mem$$Address);
5127   %}
5128   ins_pipe(ialu_reg_mem);
5129 %}
5130 
5131 // Load Short (16 bit signed) into Long Register
5132 instruct loadS2L(rRegL dst, memory mem)
5133 %{
5134   match(Set dst (ConvI2L (LoadS mem)));
5135 
5136   ins_cost(125);
5137   format %{ "movswq $dst, $mem\t# short -> long" %}
5138 
5139   ins_encode %{
5140     __ movswq($dst$$Register, $mem$$Address);
5141   %}
5142 
5143   ins_pipe(ialu_reg_mem);
5144 %}
5145 
5146 // Load Unsigned Short/Char (16 bit UNsigned)
5147 instruct loadUS(rRegI dst, memory mem)
5148 %{
5149   match(Set dst (LoadUS mem));
5150 
5151   ins_cost(125);
5152   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5153 
5154   ins_encode %{
5155     __ movzwl($dst$$Register, $mem$$Address);
5156   %}
5157 
5158   ins_pipe(ialu_reg_mem);
5159 %}
5160 
5161 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5162 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5163   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5164 
5165   ins_cost(125);
5166   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5167   ins_encode %{
5168     __ movsbl($dst$$Register, $mem$$Address);
5169   %}
5170   ins_pipe(ialu_reg_mem);
5171 %}
5172 
5173 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5174 instruct loadUS2L(rRegL dst, memory mem)
5175 %{
5176   match(Set dst (ConvI2L (LoadUS mem)));
5177 
5178   ins_cost(125);
5179   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5180 
5181   ins_encode %{
5182     __ movzwq($dst$$Register, $mem$$Address);
5183   %}
5184 
5185   ins_pipe(ialu_reg_mem);
5186 %}
5187 
5188 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5189 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5190   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5191 
5192   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5193   ins_encode %{
5194     __ movzbq($dst$$Register, $mem$$Address);
5195   %}
5196   ins_pipe(ialu_reg_mem);
5197 %}
5198 
5199 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
5200 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
5201   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5202   effect(KILL cr);
5203 
5204   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5205             "andl    $dst, $mask" %}
5206   ins_encode %{
5207     Register Rdst = $dst$$Register;
5208     __ movzwq(Rdst, $mem$$Address);
5209     __ andl(Rdst, $mask$$constant);
5210   %}
5211   ins_pipe(ialu_reg_mem);
5212 %}
5213 
5214 // Load Integer
5215 instruct loadI(rRegI dst, memory mem)
5216 %{
5217   match(Set dst (LoadI mem));
5218 
5219   ins_cost(125);
5220   format %{ "movl    $dst, $mem\t# int" %}
5221 
5222   ins_encode %{
5223     __ movl($dst$$Register, $mem$$Address);
5224   %}
5225 
5226   ins_pipe(ialu_reg_mem);
5227 %}
5228 
5229 // Load Integer (32 bit signed) to Byte (8 bit signed)
5230 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5231   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5232 
5233   ins_cost(125);
5234   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5235   ins_encode %{
5236     __ movsbl($dst$$Register, $mem$$Address);
5237   %}
5238   ins_pipe(ialu_reg_mem);
5239 %}
5240 
5241 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5242 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5243   match(Set dst (AndI (LoadI mem) mask));
5244 
5245   ins_cost(125);
5246   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5247   ins_encode %{
5248     __ movzbl($dst$$Register, $mem$$Address);
5249   %}
5250   ins_pipe(ialu_reg_mem);
5251 %}
5252 
5253 // Load Integer (32 bit signed) to Short (16 bit signed)
5254 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5255   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5256 
5257   ins_cost(125);
5258   format %{ "movswl  $dst, $mem\t# int -> short" %}
5259   ins_encode %{
5260     __ movswl($dst$$Register, $mem$$Address);
5261   %}
5262   ins_pipe(ialu_reg_mem);
5263 %}
5264 
5265 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5266 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5267   match(Set dst (AndI (LoadI mem) mask));
5268 
5269   ins_cost(125);
5270   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5271   ins_encode %{
5272     __ movzwl($dst$$Register, $mem$$Address);
5273   %}
5274   ins_pipe(ialu_reg_mem);
5275 %}
5276 
5277 // Load Integer into Long Register
5278 instruct loadI2L(rRegL dst, memory mem)
5279 %{
5280   match(Set dst (ConvI2L (LoadI mem)));
5281 
5282   ins_cost(125);
5283   format %{ "movslq  $dst, $mem\t# int -> long" %}
5284 
5285   ins_encode %{
5286     __ movslq($dst$$Register, $mem$$Address);
5287   %}
5288 
5289   ins_pipe(ialu_reg_mem);
5290 %}
5291 
5292 // Load Integer with mask 0xFF into Long Register
5293 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5294   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5295 
5296   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5297   ins_encode %{
5298     __ movzbq($dst$$Register, $mem$$Address);
5299   %}
5300   ins_pipe(ialu_reg_mem);
5301 %}
5302 
5303 // Load Integer with mask 0xFFFF into Long Register
5304 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5305   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5306 
5307   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5308   ins_encode %{
5309     __ movzwq($dst$$Register, $mem$$Address);
5310   %}
5311   ins_pipe(ialu_reg_mem);
5312 %}
5313 
5314 // Load Integer with a 32-bit mask into Long Register
5315 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5316   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5317   effect(KILL cr);
5318 
5319   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
5320             "andl    $dst, $mask" %}
5321   ins_encode %{
5322     Register Rdst = $dst$$Register;
5323     __ movl(Rdst, $mem$$Address);
5324     __ andl(Rdst, $mask$$constant);
5325   %}
5326   ins_pipe(ialu_reg_mem);
5327 %}
5328 
5329 // Load Unsigned Integer into Long Register
5330 instruct loadUI2L(rRegL dst, memory mem)
5331 %{
5332   match(Set dst (LoadUI2L mem));
5333 
5334   ins_cost(125);
5335   format %{ "movl    $dst, $mem\t# uint -> long" %}
5336 
5337   ins_encode %{
5338     __ movl($dst$$Register, $mem$$Address);
5339   %}
5340 
5341   ins_pipe(ialu_reg_mem);
5342 %}
5343 
5344 // Load Long
5345 instruct loadL(rRegL dst, memory mem)
5346 %{
5347   match(Set dst (LoadL mem));
5348 
5349   ins_cost(125);
5350   format %{ "movq    $dst, $mem\t# long" %}
5351 
5352   ins_encode %{
5353     __ movq($dst$$Register, $mem$$Address);
5354   %}
5355 
5356   ins_pipe(ialu_reg_mem); // XXX
5357 %}
5358 
5359 // Load Range
5360 instruct loadRange(rRegI dst, memory mem)
5361 %{
5362   match(Set dst (LoadRange mem));
5363 
5364   ins_cost(125); // XXX
5365   format %{ "movl    $dst, $mem\t# range" %}
5366   opcode(0x8B);
5367   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5368   ins_pipe(ialu_reg_mem);
5369 %}
5370 
5371 // Load Pointer
5372 instruct loadP(rRegP dst, memory mem)
5373 %{
5374   match(Set dst (LoadP mem));
5375 
5376   ins_cost(125); // XXX
5377   format %{ "movq    $dst, $mem\t# ptr" %}
5378   opcode(0x8B);
5379   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5380   ins_pipe(ialu_reg_mem); // XXX
5381 %}
5382 
5383 // Load Compressed Pointer
5384 instruct loadN(rRegN dst, memory mem)
5385 %{
5386    match(Set dst (LoadN mem));
5387 
5388    ins_cost(125); // XXX
5389    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5390    ins_encode %{
5391      __ movl($dst$$Register, $mem$$Address);
5392    %}
5393    ins_pipe(ialu_reg_mem); // XXX
5394 %}
5395 
5396 
5397 // Load Klass Pointer
5398 instruct loadKlass(rRegP dst, memory mem)
5399 %{
5400   match(Set dst (LoadKlass mem));
5401 
5402   ins_cost(125); // XXX
5403   format %{ "movq    $dst, $mem\t# class" %}
5404   opcode(0x8B);
5405   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5406   ins_pipe(ialu_reg_mem); // XXX
5407 %}
5408 
5409 // Load narrow Klass Pointer
5410 instruct loadNKlass(rRegN dst, memory mem)
5411 %{
5412   match(Set dst (LoadNKlass mem));
5413 
5414   ins_cost(125); // XXX
5415   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5416   ins_encode %{
5417     __ movl($dst$$Register, $mem$$Address);
5418   %}
5419   ins_pipe(ialu_reg_mem); // XXX
5420 %}
5421 
5422 // Load Float
5423 instruct loadF(regF dst, memory mem)
5424 %{
5425   match(Set dst (LoadF mem));
5426 
5427   ins_cost(145); // XXX
5428   format %{ "movss   $dst, $mem\t# float" %}
5429   ins_encode %{
5430     __ movflt($dst$$XMMRegister, $mem$$Address);
5431   %}
5432   ins_pipe(pipe_slow); // XXX
5433 %}
5434 
5435 // Load Double
5436 instruct loadD_partial(regD dst, memory mem)
5437 %{
5438   predicate(!UseXmmLoadAndClearUpper);
5439   match(Set dst (LoadD mem));
5440 
5441   ins_cost(145); // XXX
5442   format %{ "movlpd  $dst, $mem\t# double" %}
5443   ins_encode %{
5444     __ movdbl($dst$$XMMRegister, $mem$$Address);
5445   %}
5446   ins_pipe(pipe_slow); // XXX
5447 %}
5448 
5449 instruct loadD(regD dst, memory mem)
5450 %{
5451   predicate(UseXmmLoadAndClearUpper);
5452   match(Set dst (LoadD mem));
5453 
5454   ins_cost(145); // XXX
5455   format %{ "movsd   $dst, $mem\t# double" %}
5456   ins_encode %{
5457     __ movdbl($dst$$XMMRegister, $mem$$Address);
5458   %}
5459   ins_pipe(pipe_slow); // XXX
5460 %}
5461 
5462 // Load Aligned Packed Byte to XMM register
5463 instruct loadA8B(regD dst, memory mem) %{
5464   match(Set dst (Load8B mem));
5465   ins_cost(125);
5466   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
5467   ins_encode %{
5468     __ movq($dst$$XMMRegister, $mem$$Address);
5469   %}
5470   ins_pipe( pipe_slow );
5471 %}
5472 
5473 // Load Aligned Packed Short to XMM register
5474 instruct loadA4S(regD dst, memory mem) %{
5475   match(Set dst (Load4S mem));
5476   ins_cost(125);
5477   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
5478   ins_encode %{
5479     __ movq($dst$$XMMRegister, $mem$$Address);
5480   %}
5481   ins_pipe( pipe_slow );
5482 %}
5483 
5484 // Load Aligned Packed Char to XMM register
5485 instruct loadA4C(regD dst, memory mem) %{
5486   match(Set dst (Load4C mem));
5487   ins_cost(125);
5488   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
5489   ins_encode %{
5490     __ movq($dst$$XMMRegister, $mem$$Address);
5491   %}
5492   ins_pipe( pipe_slow );
5493 %}
5494 
5495 // Load Aligned Packed Integer to XMM register
5496 instruct load2IU(regD dst, memory mem) %{
5497   match(Set dst (Load2I mem));
5498   ins_cost(125);
5499   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
5500   ins_encode %{
5501     __ movq($dst$$XMMRegister, $mem$$Address);
5502   %}
5503   ins_pipe( pipe_slow );
5504 %}
5505 
5506 // Load Aligned Packed Single to XMM
5507 instruct loadA2F(regD dst, memory mem) %{
5508   match(Set dst (Load2F mem));
5509   ins_cost(125);
5510   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
5511   ins_encode %{
5512     __ movq($dst$$XMMRegister, $mem$$Address);
5513   %}
5514   ins_pipe( pipe_slow );
5515 %}
5516 
5517 // Load Effective Address
5518 instruct leaP8(rRegP dst, indOffset8 mem)
5519 %{
5520   match(Set dst mem);
5521 
5522   ins_cost(110); // XXX
5523   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5524   opcode(0x8D);
5525   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5526   ins_pipe(ialu_reg_reg_fat);
5527 %}
5528 
5529 instruct leaP32(rRegP dst, indOffset32 mem)
5530 %{
5531   match(Set dst mem);
5532 
5533   ins_cost(110);
5534   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5535   opcode(0x8D);
5536   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5537   ins_pipe(ialu_reg_reg_fat);
5538 %}
5539 
5540 // instruct leaPIdx(rRegP dst, indIndex mem)
5541 // %{
5542 //   match(Set dst mem);
5543 
5544 //   ins_cost(110);
5545 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5546 //   opcode(0x8D);
5547 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5548 //   ins_pipe(ialu_reg_reg_fat);
5549 // %}
5550 
5551 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5552 %{
5553   match(Set dst mem);
5554 
5555   ins_cost(110);
5556   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5557   opcode(0x8D);
5558   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5559   ins_pipe(ialu_reg_reg_fat);
5560 %}
5561 
5562 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5563 %{
5564   match(Set dst mem);
5565 
5566   ins_cost(110);
5567   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5568   opcode(0x8D);
5569   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5570   ins_pipe(ialu_reg_reg_fat);
5571 %}
5572 
5573 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5574 %{
5575   match(Set dst mem);
5576 
5577   ins_cost(110);
5578   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5579   opcode(0x8D);
5580   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5581   ins_pipe(ialu_reg_reg_fat);
5582 %}
5583 
5584 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5585 %{
5586   match(Set dst mem);
5587 
5588   ins_cost(110);
5589   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5590   opcode(0x8D);
5591   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5592   ins_pipe(ialu_reg_reg_fat);
5593 %}
5594 
5595 // Load Effective Address which uses Narrow (32-bits) oop
5596 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5597 %{
5598   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5599   match(Set dst mem);
5600 
5601   ins_cost(110);
5602   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5603   opcode(0x8D);
5604   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5605   ins_pipe(ialu_reg_reg_fat);
5606 %}
5607 
5608 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5609 %{
5610   predicate(Universe::narrow_oop_shift() == 0);
5611   match(Set dst mem);
5612 
5613   ins_cost(110); // XXX
5614   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5615   opcode(0x8D);
5616   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5617   ins_pipe(ialu_reg_reg_fat);
5618 %}
5619 
5620 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5621 %{
5622   predicate(Universe::narrow_oop_shift() == 0);
5623   match(Set dst mem);
5624 
5625   ins_cost(110);
5626   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5627   opcode(0x8D);
5628   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5629   ins_pipe(ialu_reg_reg_fat);
5630 %}
5631 
5632 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5633 %{
5634   predicate(Universe::narrow_oop_shift() == 0);
5635   match(Set dst mem);
5636 
5637   ins_cost(110);
5638   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5639   opcode(0x8D);
5640   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5641   ins_pipe(ialu_reg_reg_fat);
5642 %}
5643 
5644 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5645 %{
5646   predicate(Universe::narrow_oop_shift() == 0);
5647   match(Set dst mem);
5648 
5649   ins_cost(110);
5650   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5651   opcode(0x8D);
5652   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5653   ins_pipe(ialu_reg_reg_fat);
5654 %}
5655 
5656 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5657 %{
5658   predicate(Universe::narrow_oop_shift() == 0);
5659   match(Set dst mem);
5660 
5661   ins_cost(110);
5662   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5663   opcode(0x8D);
5664   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5665   ins_pipe(ialu_reg_reg_fat);
5666 %}
5667 
5668 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5669 %{
5670   predicate(Universe::narrow_oop_shift() == 0);
5671   match(Set dst mem);
5672 
5673   ins_cost(110);
5674   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5675   opcode(0x8D);
5676   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5677   ins_pipe(ialu_reg_reg_fat);
5678 %}
5679 
5680 instruct loadConI(rRegI dst, immI src)
5681 %{
5682   match(Set dst src);
5683 
5684   format %{ "movl    $dst, $src\t# int" %}
5685   ins_encode(load_immI(dst, src));
5686   ins_pipe(ialu_reg_fat); // XXX
5687 %}
5688 
5689 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5690 %{
5691   match(Set dst src);
5692   effect(KILL cr);
5693 
5694   ins_cost(50);
5695   format %{ "xorl    $dst, $dst\t# int" %}
5696   opcode(0x33); /* + rd */
5697   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5698   ins_pipe(ialu_reg);
5699 %}
5700 
5701 instruct loadConL(rRegL dst, immL src)
5702 %{
5703   match(Set dst src);
5704 
5705   ins_cost(150);
5706   format %{ "movq    $dst, $src\t# long" %}
5707   ins_encode(load_immL(dst, src));
5708   ins_pipe(ialu_reg);
5709 %}
5710 
5711 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5712 %{
5713   match(Set dst src);
5714   effect(KILL cr);
5715 
5716   ins_cost(50);
5717   format %{ "xorl    $dst, $dst\t# long" %}
5718   opcode(0x33); /* + rd */
5719   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5720   ins_pipe(ialu_reg); // XXX
5721 %}
5722 
5723 instruct loadConUL32(rRegL dst, immUL32 src)
5724 %{
5725   match(Set dst src);
5726 
5727   ins_cost(60);
5728   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5729   ins_encode(load_immUL32(dst, src));
5730   ins_pipe(ialu_reg);
5731 %}
5732 
5733 instruct loadConL32(rRegL dst, immL32 src)
5734 %{
5735   match(Set dst src);
5736 
5737   ins_cost(70);
5738   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5739   ins_encode(load_immL32(dst, src));
5740   ins_pipe(ialu_reg);
5741 %}
5742 
5743 instruct loadConP(rRegP dst, immP con) %{
5744   match(Set dst con);
5745 
5746   format %{ "movq    $dst, $con\t# ptr" %}
5747   ins_encode(load_immP(dst, con));
5748   ins_pipe(ialu_reg_fat); // XXX
5749 %}
5750 
5751 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5752 %{
5753   match(Set dst src);
5754   effect(KILL cr);
5755 
5756   ins_cost(50);
5757   format %{ "xorl    $dst, $dst\t# ptr" %}
5758   opcode(0x33); /* + rd */
5759   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5760   ins_pipe(ialu_reg);
5761 %}
5762 
5763 instruct loadConP_poll(rRegP dst, immP_poll src) %{
5764   match(Set dst src);
5765   format %{ "movq    $dst, $src\t!ptr" %}
5766   ins_encode %{
5767     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
5768     __ lea($dst$$Register, polling_page);
5769   %}
5770   ins_pipe(ialu_reg_fat);
5771 %}
5772 
5773 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5774 %{
5775   match(Set dst src);
5776   effect(KILL cr);
5777 
5778   ins_cost(60);
5779   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5780   ins_encode(load_immP31(dst, src));
5781   ins_pipe(ialu_reg);
5782 %}
5783 
5784 instruct loadConF(regF dst, immF con) %{
5785   match(Set dst con);
5786   ins_cost(125);
5787   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5788   ins_encode %{
5789     __ movflt($dst$$XMMRegister, $constantaddress($con));
5790   %}
5791   ins_pipe(pipe_slow);
5792 %}
5793 
5794 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5795   match(Set dst src);
5796   effect(KILL cr);
5797   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5798   ins_encode %{
5799     __ xorq($dst$$Register, $dst$$Register);
5800   %}
5801   ins_pipe(ialu_reg);
5802 %}
5803 
5804 instruct loadConN(rRegN dst, immN src) %{
5805   match(Set dst src);
5806 
5807   ins_cost(125);
5808   format %{ "movl    $dst, $src\t# compressed ptr" %}
5809   ins_encode %{
5810     address con = (address)$src$$constant;
5811     if (con == NULL) {
5812       ShouldNotReachHere();
5813     } else {
5814       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5815     }
5816   %}
5817   ins_pipe(ialu_reg_fat); // XXX
5818 %}
5819 
5820 instruct loadConF0(regF dst, immF0 src)
5821 %{
5822   match(Set dst src);
5823   ins_cost(100);
5824 
5825   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5826   ins_encode %{
5827     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5828   %}
5829   ins_pipe(pipe_slow);
5830 %}
5831 
5832 // Use the same format since predicate() can not be used here.
5833 instruct loadConD(regD dst, immD con) %{
5834   match(Set dst con);
5835   ins_cost(125);
5836   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5837   ins_encode %{
5838     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5839   %}
5840   ins_pipe(pipe_slow);
5841 %}
5842 
5843 instruct loadConD0(regD dst, immD0 src)
5844 %{
5845   match(Set dst src);
5846   ins_cost(100);
5847 
5848   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5849   ins_encode %{
5850     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5851   %}
5852   ins_pipe(pipe_slow);
5853 %}
5854 
5855 instruct loadSSI(rRegI dst, stackSlotI src)
5856 %{
5857   match(Set dst src);
5858 
5859   ins_cost(125);
5860   format %{ "movl    $dst, $src\t# int stk" %}
5861   opcode(0x8B);
5862   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5863   ins_pipe(ialu_reg_mem);
5864 %}
5865 
5866 instruct loadSSL(rRegL dst, stackSlotL src)
5867 %{
5868   match(Set dst src);
5869 
5870   ins_cost(125);
5871   format %{ "movq    $dst, $src\t# long stk" %}
5872   opcode(0x8B);
5873   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5874   ins_pipe(ialu_reg_mem);
5875 %}
5876 
5877 instruct loadSSP(rRegP dst, stackSlotP src)
5878 %{
5879   match(Set dst src);
5880 
5881   ins_cost(125);
5882   format %{ "movq    $dst, $src\t# ptr stk" %}
5883   opcode(0x8B);
5884   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5885   ins_pipe(ialu_reg_mem);
5886 %}
5887 
5888 instruct loadSSF(regF dst, stackSlotF src)
5889 %{
5890   match(Set dst src);
5891 
5892   ins_cost(125);
5893   format %{ "movss   $dst, $src\t# float stk" %}
5894   ins_encode %{
5895     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5896   %}
5897   ins_pipe(pipe_slow); // XXX
5898 %}
5899 
5900 // Use the same format since predicate() can not be used here.
5901 instruct loadSSD(regD dst, stackSlotD src)
5902 %{
5903   match(Set dst src);
5904 
5905   ins_cost(125);
5906   format %{ "movsd   $dst, $src\t# double stk" %}
5907   ins_encode  %{
5908     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5909   %}
5910   ins_pipe(pipe_slow); // XXX
5911 %}
5912 
5913 // Prefetch instructions.
5914 // Must be safe to execute with invalid address (cannot fault).
5915 
5916 instruct prefetchr( memory mem ) %{
5917   predicate(ReadPrefetchInstr==3);
5918   match(PrefetchRead mem);
5919   ins_cost(125);
5920 
5921   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
5922   ins_encode %{
5923     __ prefetchr($mem$$Address);
5924   %}
5925   ins_pipe(ialu_mem);
5926 %}
5927 
5928 instruct prefetchrNTA( memory mem ) %{
5929   predicate(ReadPrefetchInstr==0);
5930   match(PrefetchRead mem);
5931   ins_cost(125);
5932 
5933   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
5934   ins_encode %{
5935     __ prefetchnta($mem$$Address);
5936   %}
5937   ins_pipe(ialu_mem);
5938 %}
5939 
5940 instruct prefetchrT0( memory mem ) %{
5941   predicate(ReadPrefetchInstr==1);
5942   match(PrefetchRead mem);
5943   ins_cost(125);
5944 
5945   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
5946   ins_encode %{
5947     __ prefetcht0($mem$$Address);
5948   %}
5949   ins_pipe(ialu_mem);
5950 %}
5951 
5952 instruct prefetchrT2( memory mem ) %{
5953   predicate(ReadPrefetchInstr==2);
5954   match(PrefetchRead mem);
5955   ins_cost(125);
5956 
5957   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
5958   ins_encode %{
5959     __ prefetcht2($mem$$Address);
5960   %}
5961   ins_pipe(ialu_mem);
5962 %}
5963 
5964 instruct prefetchwNTA( memory mem ) %{
5965   match(PrefetchWrite mem);
5966   ins_cost(125);
5967 
5968   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
5969   ins_encode %{
5970     __ prefetchnta($mem$$Address);
5971   %}
5972   ins_pipe(ialu_mem);
5973 %}
5974 
5975 // Prefetch instructions for allocation.
5976 
5977 instruct prefetchAlloc( memory mem ) %{
5978   predicate(AllocatePrefetchInstr==3);
5979   match(PrefetchAllocation mem);
5980   ins_cost(125);
5981 
5982   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5983   ins_encode %{
5984     __ prefetchw($mem$$Address);
5985   %}
5986   ins_pipe(ialu_mem);
5987 %}
5988 
5989 instruct prefetchAllocNTA( memory mem ) %{
5990   predicate(AllocatePrefetchInstr==0);
5991   match(PrefetchAllocation mem);
5992   ins_cost(125);
5993 
5994   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5995   ins_encode %{
5996     __ prefetchnta($mem$$Address);
5997   %}
5998   ins_pipe(ialu_mem);
5999 %}
6000 
6001 instruct prefetchAllocT0( memory mem ) %{
6002   predicate(AllocatePrefetchInstr==1);
6003   match(PrefetchAllocation mem);
6004   ins_cost(125);
6005 
6006   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
6007   ins_encode %{
6008     __ prefetcht0($mem$$Address);
6009   %}
6010   ins_pipe(ialu_mem);
6011 %}
6012 
6013 instruct prefetchAllocT2( memory mem ) %{
6014   predicate(AllocatePrefetchInstr==2);
6015   match(PrefetchAllocation mem);
6016   ins_cost(125);
6017 
6018   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
6019   ins_encode %{
6020     __ prefetcht2($mem$$Address);
6021   %}
6022   ins_pipe(ialu_mem);
6023 %}
6024 
6025 //----------Store Instructions-------------------------------------------------
6026 
6027 // Store Byte
6028 instruct storeB(memory mem, rRegI src)
6029 %{
6030   match(Set mem (StoreB mem src));
6031 
6032   ins_cost(125); // XXX
6033   format %{ "movb    $mem, $src\t# byte" %}
6034   opcode(0x88);
6035   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6036   ins_pipe(ialu_mem_reg);
6037 %}
6038 
6039 // Store Char/Short
6040 instruct storeC(memory mem, rRegI src)
6041 %{
6042   match(Set mem (StoreC mem src));
6043 
6044   ins_cost(125); // XXX
6045   format %{ "movw    $mem, $src\t# char/short" %}
6046   opcode(0x89);
6047   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6048   ins_pipe(ialu_mem_reg);
6049 %}
6050 
6051 // Store Integer
6052 instruct storeI(memory mem, rRegI src)
6053 %{
6054   match(Set mem (StoreI mem src));
6055 
6056   ins_cost(125); // XXX
6057   format %{ "movl    $mem, $src\t# int" %}
6058   opcode(0x89);
6059   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6060   ins_pipe(ialu_mem_reg);
6061 %}
6062 
6063 // Store Long
6064 instruct storeL(memory mem, rRegL src)
6065 %{
6066   match(Set mem (StoreL mem src));
6067 
6068   ins_cost(125); // XXX
6069   format %{ "movq    $mem, $src\t# long" %}
6070   opcode(0x89);
6071   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6072   ins_pipe(ialu_mem_reg); // XXX
6073 %}
6074 
6075 // Store Pointer
6076 instruct storeP(memory mem, any_RegP src)
6077 %{
6078   match(Set mem (StoreP mem src));
6079 
6080   ins_cost(125); // XXX
6081   format %{ "movq    $mem, $src\t# ptr" %}
6082   opcode(0x89);
6083   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6084   ins_pipe(ialu_mem_reg);
6085 %}
6086 
6087 instruct storeImmP0(memory mem, immP0 zero)
6088 %{
6089   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6090   match(Set mem (StoreP mem zero));
6091 
6092   ins_cost(125); // XXX
6093   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6094   ins_encode %{
6095     __ movq($mem$$Address, r12);
6096   %}
6097   ins_pipe(ialu_mem_reg);
6098 %}
6099 
6100 // Store NULL Pointer, mark word, or other simple pointer constant.
6101 instruct storeImmP(memory mem, immP31 src)
6102 %{
6103   match(Set mem (StoreP mem src));
6104 
6105   ins_cost(150); // XXX
6106   format %{ "movq    $mem, $src\t# ptr" %}
6107   opcode(0xC7); /* C7 /0 */
6108   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6109   ins_pipe(ialu_mem_imm);
6110 %}
6111 
6112 // Store Compressed Pointer
6113 instruct storeN(memory mem, rRegN src)
6114 %{
6115   match(Set mem (StoreN mem src));
6116 
6117   ins_cost(125); // XXX
6118   format %{ "movl    $mem, $src\t# compressed ptr" %}
6119   ins_encode %{
6120     __ movl($mem$$Address, $src$$Register);
6121   %}
6122   ins_pipe(ialu_mem_reg);
6123 %}
6124 
6125 instruct storeImmN0(memory mem, immN0 zero)
6126 %{
6127   predicate(Universe::narrow_oop_base() == NULL);
6128   match(Set mem (StoreN mem zero));
6129 
6130   ins_cost(125); // XXX
6131   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6132   ins_encode %{
6133     __ movl($mem$$Address, r12);
6134   %}
6135   ins_pipe(ialu_mem_reg);
6136 %}
6137 
6138 instruct storeImmN(memory mem, immN src)
6139 %{
6140   match(Set mem (StoreN mem src));
6141 
6142   ins_cost(150); // XXX
6143   format %{ "movl    $mem, $src\t# compressed ptr" %}
6144   ins_encode %{
6145     address con = (address)$src$$constant;
6146     if (con == NULL) {
6147       __ movl($mem$$Address, (int32_t)0);
6148     } else {
6149       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6150     }
6151   %}
6152   ins_pipe(ialu_mem_imm);
6153 %}
6154 
6155 // Store Integer Immediate
6156 instruct storeImmI0(memory mem, immI0 zero)
6157 %{
6158   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6159   match(Set mem (StoreI mem zero));
6160 
6161   ins_cost(125); // XXX
6162   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6163   ins_encode %{
6164     __ movl($mem$$Address, r12);
6165   %}
6166   ins_pipe(ialu_mem_reg);
6167 %}
6168 
6169 instruct storeImmI(memory mem, immI src)
6170 %{
6171   match(Set mem (StoreI mem src));
6172 
6173   ins_cost(150);
6174   format %{ "movl    $mem, $src\t# int" %}
6175   opcode(0xC7); /* C7 /0 */
6176   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6177   ins_pipe(ialu_mem_imm);
6178 %}
6179 
6180 // Store Long Immediate
6181 instruct storeImmL0(memory mem, immL0 zero)
6182 %{
6183   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6184   match(Set mem (StoreL mem zero));
6185 
6186   ins_cost(125); // XXX
6187   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6188   ins_encode %{
6189     __ movq($mem$$Address, r12);
6190   %}
6191   ins_pipe(ialu_mem_reg);
6192 %}
6193 
6194 instruct storeImmL(memory mem, immL32 src)
6195 %{
6196   match(Set mem (StoreL mem src));
6197 
6198   ins_cost(150);
6199   format %{ "movq    $mem, $src\t# long" %}
6200   opcode(0xC7); /* C7 /0 */
6201   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6202   ins_pipe(ialu_mem_imm);
6203 %}
6204 
6205 // Store Short/Char Immediate
6206 instruct storeImmC0(memory mem, immI0 zero)
6207 %{
6208   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6209   match(Set mem (StoreC mem zero));
6210 
6211   ins_cost(125); // XXX
6212   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6213   ins_encode %{
6214     __ movw($mem$$Address, r12);
6215   %}
6216   ins_pipe(ialu_mem_reg);
6217 %}
6218 
6219 instruct storeImmI16(memory mem, immI16 src)
6220 %{
6221   predicate(UseStoreImmI16);
6222   match(Set mem (StoreC mem src));
6223 
6224   ins_cost(150);
6225   format %{ "movw    $mem, $src\t# short/char" %}
6226   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6227   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6228   ins_pipe(ialu_mem_imm);
6229 %}
6230 
6231 // Store Byte Immediate
6232 instruct storeImmB0(memory mem, immI0 zero)
6233 %{
6234   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6235   match(Set mem (StoreB mem zero));
6236 
6237   ins_cost(125); // XXX
6238   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6239   ins_encode %{
6240     __ movb($mem$$Address, r12);
6241   %}
6242   ins_pipe(ialu_mem_reg);
6243 %}
6244 
6245 instruct storeImmB(memory mem, immI8 src)
6246 %{
6247   match(Set mem (StoreB mem src));
6248 
6249   ins_cost(150); // XXX
6250   format %{ "movb    $mem, $src\t# byte" %}
6251   opcode(0xC6); /* C6 /0 */
6252   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6253   ins_pipe(ialu_mem_imm);
6254 %}
6255 
6256 // Store Aligned Packed Byte XMM register to memory
6257 instruct storeA8B(memory mem, regD src) %{
6258   match(Set mem (Store8B mem src));
6259   ins_cost(145);
6260   format %{ "MOVQ  $mem,$src\t! packed8B" %}
6261   ins_encode %{
6262     __ movq($mem$$Address, $src$$XMMRegister);
6263   %}
6264   ins_pipe( pipe_slow );
6265 %}
6266 
6267 // Store Aligned Packed Char/Short XMM register to memory
6268 instruct storeA4C(memory mem, regD src) %{
6269   match(Set mem (Store4C mem src));
6270   ins_cost(145);
6271   format %{ "MOVQ  $mem,$src\t! packed4C" %}
6272   ins_encode %{
6273     __ movq($mem$$Address, $src$$XMMRegister);
6274   %}
6275   ins_pipe( pipe_slow );
6276 %}
6277 
6278 // Store Aligned Packed Integer XMM register to memory
6279 instruct storeA2I(memory mem, regD src) %{
6280   match(Set mem (Store2I mem src));
6281   ins_cost(145);
6282   format %{ "MOVQ  $mem,$src\t! packed2I" %}
6283   ins_encode %{
6284     __ movq($mem$$Address, $src$$XMMRegister);
6285   %}
6286   ins_pipe( pipe_slow );
6287 %}
6288 
6289 // Store CMS card-mark Immediate
6290 instruct storeImmCM0_reg(memory mem, immI0 zero)
6291 %{
6292   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6293   match(Set mem (StoreCM mem zero));
6294 
6295   ins_cost(125); // XXX
6296   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6297   ins_encode %{
6298     __ movb($mem$$Address, r12);
6299   %}
6300   ins_pipe(ialu_mem_reg);
6301 %}
6302 
6303 instruct storeImmCM0(memory mem, immI0 src)
6304 %{
6305   match(Set mem (StoreCM mem src));
6306 
6307   ins_cost(150); // XXX
6308   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6309   opcode(0xC6); /* C6 /0 */
6310   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6311   ins_pipe(ialu_mem_imm);
6312 %}
6313 
6314 // Store Aligned Packed Single Float XMM register to memory
6315 instruct storeA2F(memory mem, regD src) %{
6316   match(Set mem (Store2F mem src));
6317   ins_cost(145);
6318   format %{ "MOVQ  $mem,$src\t! packed2F" %}
6319   ins_encode %{
6320     __ movq($mem$$Address, $src$$XMMRegister);
6321   %}
6322   ins_pipe( pipe_slow );
6323 %}
6324 
6325 // Store Float
6326 instruct storeF(memory mem, regF src)
6327 %{
6328   match(Set mem (StoreF mem src));
6329 
6330   ins_cost(95); // XXX
6331   format %{ "movss   $mem, $src\t# float" %}
6332   ins_encode %{
6333     __ movflt($mem$$Address, $src$$XMMRegister);
6334   %}
6335   ins_pipe(pipe_slow); // XXX
6336 %}
6337 
6338 // Store immediate Float value (it is faster than store from XMM register)
6339 instruct storeF0(memory mem, immF0 zero)
6340 %{
6341   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6342   match(Set mem (StoreF mem zero));
6343 
6344   ins_cost(25); // XXX
6345   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6346   ins_encode %{
6347     __ movl($mem$$Address, r12);
6348   %}
6349   ins_pipe(ialu_mem_reg);
6350 %}
6351 
6352 instruct storeF_imm(memory mem, immF src)
6353 %{
6354   match(Set mem (StoreF mem src));
6355 
6356   ins_cost(50);
6357   format %{ "movl    $mem, $src\t# float" %}
6358   opcode(0xC7); /* C7 /0 */
6359   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6360   ins_pipe(ialu_mem_imm);
6361 %}
6362 
6363 // Store Double
6364 instruct storeD(memory mem, regD src)
6365 %{
6366   match(Set mem (StoreD mem src));
6367 
6368   ins_cost(95); // XXX
6369   format %{ "movsd   $mem, $src\t# double" %}
6370   ins_encode %{
6371     __ movdbl($mem$$Address, $src$$XMMRegister);
6372   %}
6373   ins_pipe(pipe_slow); // XXX
6374 %}
6375 
6376 // Store immediate double 0.0 (it is faster than store from XMM register)
6377 instruct storeD0_imm(memory mem, immD0 src)
6378 %{
6379   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6380   match(Set mem (StoreD mem src));
6381 
6382   ins_cost(50);
6383   format %{ "movq    $mem, $src\t# double 0." %}
6384   opcode(0xC7); /* C7 /0 */
6385   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6386   ins_pipe(ialu_mem_imm);
6387 %}
6388 
6389 instruct storeD0(memory mem, immD0 zero)
6390 %{
6391   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6392   match(Set mem (StoreD mem zero));
6393 
6394   ins_cost(25); // XXX
6395   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6396   ins_encode %{
6397     __ movq($mem$$Address, r12);
6398   %}
6399   ins_pipe(ialu_mem_reg);
6400 %}
6401 
6402 instruct storeSSI(stackSlotI dst, rRegI src)
6403 %{
6404   match(Set dst src);
6405 
6406   ins_cost(100);
6407   format %{ "movl    $dst, $src\t# int stk" %}
6408   opcode(0x89);
6409   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6410   ins_pipe( ialu_mem_reg );
6411 %}
6412 
6413 instruct storeSSL(stackSlotL dst, rRegL src)
6414 %{
6415   match(Set dst src);
6416 
6417   ins_cost(100);
6418   format %{ "movq    $dst, $src\t# long stk" %}
6419   opcode(0x89);
6420   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6421   ins_pipe(ialu_mem_reg);
6422 %}
6423 
6424 instruct storeSSP(stackSlotP dst, rRegP src)
6425 %{
6426   match(Set dst src);
6427 
6428   ins_cost(100);
6429   format %{ "movq    $dst, $src\t# ptr stk" %}
6430   opcode(0x89);
6431   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6432   ins_pipe(ialu_mem_reg);
6433 %}
6434 
6435 instruct storeSSF(stackSlotF dst, regF src)
6436 %{
6437   match(Set dst src);
6438 
6439   ins_cost(95); // XXX
6440   format %{ "movss   $dst, $src\t# float stk" %}
6441   ins_encode %{
6442     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6443   %}
6444   ins_pipe(pipe_slow); // XXX
6445 %}
6446 
6447 instruct storeSSD(stackSlotD dst, regD src)
6448 %{
6449   match(Set dst src);
6450 
6451   ins_cost(95); // XXX
6452   format %{ "movsd   $dst, $src\t# double stk" %}
6453   ins_encode %{
6454     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6455   %}
6456   ins_pipe(pipe_slow); // XXX
6457 %}
6458 
6459 //----------BSWAP Instructions-------------------------------------------------
6460 instruct bytes_reverse_int(rRegI dst) %{
6461   match(Set dst (ReverseBytesI dst));
6462 
6463   format %{ "bswapl  $dst" %}
6464   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6465   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6466   ins_pipe( ialu_reg );
6467 %}
6468 
6469 instruct bytes_reverse_long(rRegL dst) %{
6470   match(Set dst (ReverseBytesL dst));
6471 
6472   format %{ "bswapq  $dst" %}
6473 
6474   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6475   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6476   ins_pipe( ialu_reg);
6477 %}
6478 
6479 instruct bytes_reverse_unsigned_short(rRegI dst) %{
6480   match(Set dst (ReverseBytesUS dst));
6481 
6482   format %{ "bswapl  $dst\n\t"
6483             "shrl    $dst,16\n\t" %}
6484   ins_encode %{
6485     __ bswapl($dst$$Register);
6486     __ shrl($dst$$Register, 16);
6487   %}
6488   ins_pipe( ialu_reg );
6489 %}
6490 
6491 instruct bytes_reverse_short(rRegI dst) %{
6492   match(Set dst (ReverseBytesS dst));
6493 
6494   format %{ "bswapl  $dst\n\t"
6495             "sar     $dst,16\n\t" %}
6496   ins_encode %{
6497     __ bswapl($dst$$Register);
6498     __ sarl($dst$$Register, 16);
6499   %}
6500   ins_pipe( ialu_reg );
6501 %}
6502 
6503 //---------- Zeros Count Instructions ------------------------------------------
6504 
6505 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6506   predicate(UseCountLeadingZerosInstruction);
6507   match(Set dst (CountLeadingZerosI src));
6508   effect(KILL cr);
6509 
6510   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6511   ins_encode %{
6512     __ lzcntl($dst$$Register, $src$$Register);
6513   %}
6514   ins_pipe(ialu_reg);
6515 %}
6516 
6517 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6518   predicate(!UseCountLeadingZerosInstruction);
6519   match(Set dst (CountLeadingZerosI src));
6520   effect(KILL cr);
6521 
6522   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6523             "jnz     skip\n\t"
6524             "movl    $dst, -1\n"
6525       "skip:\n\t"
6526             "negl    $dst\n\t"
6527             "addl    $dst, 31" %}
6528   ins_encode %{
6529     Register Rdst = $dst$$Register;
6530     Register Rsrc = $src$$Register;
6531     Label skip;
6532     __ bsrl(Rdst, Rsrc);
6533     __ jccb(Assembler::notZero, skip);
6534     __ movl(Rdst, -1);
6535     __ bind(skip);
6536     __ negl(Rdst);
6537     __ addl(Rdst, BitsPerInt - 1);
6538   %}
6539   ins_pipe(ialu_reg);
6540 %}
6541 
6542 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6543   predicate(UseCountLeadingZerosInstruction);
6544   match(Set dst (CountLeadingZerosL src));
6545   effect(KILL cr);
6546 
6547   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6548   ins_encode %{
6549     __ lzcntq($dst$$Register, $src$$Register);
6550   %}
6551   ins_pipe(ialu_reg);
6552 %}
6553 
6554 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6555   predicate(!UseCountLeadingZerosInstruction);
6556   match(Set dst (CountLeadingZerosL src));
6557   effect(KILL cr);
6558 
6559   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6560             "jnz     skip\n\t"
6561             "movl    $dst, -1\n"
6562       "skip:\n\t"
6563             "negl    $dst\n\t"
6564             "addl    $dst, 63" %}
6565   ins_encode %{
6566     Register Rdst = $dst$$Register;
6567     Register Rsrc = $src$$Register;
6568     Label skip;
6569     __ bsrq(Rdst, Rsrc);
6570     __ jccb(Assembler::notZero, skip);
6571     __ movl(Rdst, -1);
6572     __ bind(skip);
6573     __ negl(Rdst);
6574     __ addl(Rdst, BitsPerLong - 1);
6575   %}
6576   ins_pipe(ialu_reg);
6577 %}
6578 
6579 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6580   match(Set dst (CountTrailingZerosI src));
6581   effect(KILL cr);
6582 
6583   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6584             "jnz     done\n\t"
6585             "movl    $dst, 32\n"
6586       "done:" %}
6587   ins_encode %{
6588     Register Rdst = $dst$$Register;
6589     Label done;
6590     __ bsfl(Rdst, $src$$Register);
6591     __ jccb(Assembler::notZero, done);
6592     __ movl(Rdst, BitsPerInt);
6593     __ bind(done);
6594   %}
6595   ins_pipe(ialu_reg);
6596 %}
6597 
6598 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6599   match(Set dst (CountTrailingZerosL src));
6600   effect(KILL cr);
6601 
6602   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6603             "jnz     done\n\t"
6604             "movl    $dst, 64\n"
6605       "done:" %}
6606   ins_encode %{
6607     Register Rdst = $dst$$Register;
6608     Label done;
6609     __ bsfq(Rdst, $src$$Register);
6610     __ jccb(Assembler::notZero, done);
6611     __ movl(Rdst, BitsPerLong);
6612     __ bind(done);
6613   %}
6614   ins_pipe(ialu_reg);
6615 %}
6616 
6617 
6618 //---------- Population Count Instructions -------------------------------------
6619 
6620 instruct popCountI(rRegI dst, rRegI src) %{
6621   predicate(UsePopCountInstruction);
6622   match(Set dst (PopCountI src));
6623 
6624   format %{ "popcnt  $dst, $src" %}
6625   ins_encode %{
6626     __ popcntl($dst$$Register, $src$$Register);
6627   %}
6628   ins_pipe(ialu_reg);
6629 %}
6630 
6631 instruct popCountI_mem(rRegI dst, memory mem) %{
6632   predicate(UsePopCountInstruction);
6633   match(Set dst (PopCountI (LoadI mem)));
6634 
6635   format %{ "popcnt  $dst, $mem" %}
6636   ins_encode %{
6637     __ popcntl($dst$$Register, $mem$$Address);
6638   %}
6639   ins_pipe(ialu_reg);
6640 %}
6641 
6642 // Note: Long.bitCount(long) returns an int.
6643 instruct popCountL(rRegI dst, rRegL src) %{
6644   predicate(UsePopCountInstruction);
6645   match(Set dst (PopCountL src));
6646 
6647   format %{ "popcnt  $dst, $src" %}
6648   ins_encode %{
6649     __ popcntq($dst$$Register, $src$$Register);
6650   %}
6651   ins_pipe(ialu_reg);
6652 %}
6653 
6654 // Note: Long.bitCount(long) returns an int.
6655 instruct popCountL_mem(rRegI dst, memory mem) %{
6656   predicate(UsePopCountInstruction);
6657   match(Set dst (PopCountL (LoadL mem)));
6658 
6659   format %{ "popcnt  $dst, $mem" %}
6660   ins_encode %{
6661     __ popcntq($dst$$Register, $mem$$Address);
6662   %}
6663   ins_pipe(ialu_reg);
6664 %}
6665 
6666 
6667 //----------MemBar Instructions-----------------------------------------------
6668 // Memory barrier flavors
6669 
6670 instruct membar_acquire()
6671 %{
6672   match(MemBarAcquire);
6673   ins_cost(0);
6674 
6675   size(0);
6676   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6677   ins_encode();
6678   ins_pipe(empty);
6679 %}
6680 
6681 instruct membar_acquire_lock()
6682 %{
6683   match(MemBarAcquireLock);
6684   ins_cost(0);
6685 
6686   size(0);
6687   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6688   ins_encode();
6689   ins_pipe(empty);
6690 %}
6691 
6692 instruct membar_release()
6693 %{
6694   match(MemBarRelease);
6695   ins_cost(0);
6696 
6697   size(0);
6698   format %{ "MEMBAR-release ! (empty encoding)" %}
6699   ins_encode();
6700   ins_pipe(empty);
6701 %}
6702 
6703 instruct membar_release_lock()
6704 %{
6705   match(MemBarReleaseLock);
6706   ins_cost(0);
6707 
6708   size(0);
6709   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6710   ins_encode();
6711   ins_pipe(empty);
6712 %}
6713 
6714 instruct membar_volatile(rFlagsReg cr) %{
6715   match(MemBarVolatile);
6716   effect(KILL cr);
6717   ins_cost(400);
6718 
6719   format %{
6720     $$template
6721     if (os::is_MP()) {
6722       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6723     } else {
6724       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6725     }
6726   %}
6727   ins_encode %{
6728     __ membar(Assembler::StoreLoad);
6729   %}
6730   ins_pipe(pipe_slow);
6731 %}
6732 
6733 instruct unnecessary_membar_volatile()
6734 %{
6735   match(MemBarVolatile);
6736   predicate(Matcher::post_store_load_barrier(n));
6737   ins_cost(0);
6738 
6739   size(0);
6740   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6741   ins_encode();
6742   ins_pipe(empty);
6743 %}
6744 
6745 instruct membar_storestore() %{
6746   match(MemBarStoreStore);
6747   ins_cost(0);
6748 
6749   size(0);
6750   format %{ "MEMBAR-storestore (empty encoding)" %}
6751   ins_encode( );
6752   ins_pipe(empty);
6753 %}
6754 
6755 //----------Move Instructions--------------------------------------------------
6756 
6757 instruct castX2P(rRegP dst, rRegL src)
6758 %{
6759   match(Set dst (CastX2P src));
6760 
6761   format %{ "movq    $dst, $src\t# long->ptr" %}
6762   ins_encode %{
6763     if ($dst$$reg != $src$$reg) {
6764       __ movptr($dst$$Register, $src$$Register);
6765     }
6766   %}
6767   ins_pipe(ialu_reg_reg); // XXX
6768 %}
6769 
6770 instruct castP2X(rRegL dst, rRegP src)
6771 %{
6772   match(Set dst (CastP2X src));
6773 
6774   format %{ "movq    $dst, $src\t# ptr -> long" %}
6775   ins_encode %{
6776     if ($dst$$reg != $src$$reg) {
6777       __ movptr($dst$$Register, $src$$Register);
6778     }
6779   %}
6780   ins_pipe(ialu_reg_reg); // XXX
6781 %}
6782 
6783 
6784 // Convert oop pointer into compressed form
6785 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6786   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6787   match(Set dst (EncodeP src));
6788   effect(KILL cr);
6789   format %{ "encode_heap_oop $dst,$src" %}
6790   ins_encode %{
6791     Register s = $src$$Register;
6792     Register d = $dst$$Register;
6793     if (s != d) {
6794       __ movq(d, s);
6795     }
6796     __ encode_heap_oop(d);
6797   %}
6798   ins_pipe(ialu_reg_long);
6799 %}
6800 
6801 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6802   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6803   match(Set dst (EncodeP src));
6804   effect(KILL cr);
6805   format %{ "encode_heap_oop_not_null $dst,$src" %}
6806   ins_encode %{
6807     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6808   %}
6809   ins_pipe(ialu_reg_long);
6810 %}
6811 
6812 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6813   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
6814             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
6815   match(Set dst (DecodeN src));
6816   effect(KILL cr);
6817   format %{ "decode_heap_oop $dst,$src" %}
6818   ins_encode %{
6819     Register s = $src$$Register;
6820     Register d = $dst$$Register;
6821     if (s != d) {
6822       __ movq(d, s);
6823     }
6824     __ decode_heap_oop(d);
6825   %}
6826   ins_pipe(ialu_reg_long);
6827 %}
6828 
6829 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6830   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
6831             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
6832   match(Set dst (DecodeN src));
6833   effect(KILL cr);
6834   format %{ "decode_heap_oop_not_null $dst,$src" %}
6835   ins_encode %{
6836     Register s = $src$$Register;
6837     Register d = $dst$$Register;
6838     if (s != d) {
6839       __ decode_heap_oop_not_null(d, s);
6840     } else {
6841       __ decode_heap_oop_not_null(d);
6842     }
6843   %}
6844   ins_pipe(ialu_reg_long);
6845 %}
6846 
6847 
6848 //----------Conditional Move---------------------------------------------------
6849 // Jump
6850 // dummy instruction for generating temp registers
6851 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6852   match(Jump (LShiftL switch_val shift));
6853   ins_cost(350);
6854   predicate(false);
6855   effect(TEMP dest);
6856 
6857   format %{ "leaq    $dest, [$constantaddress]\n\t"
6858             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6859   ins_encode %{
6860     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6861     // to do that and the compiler is using that register as one it can allocate.
6862     // So we build it all by hand.
6863     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6864     // ArrayAddress dispatch(table, index);
6865     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6866     __ lea($dest$$Register, $constantaddress);
6867     __ jmp(dispatch);
6868   %}
6869   ins_pipe(pipe_jmp);
6870 %}
6871 
6872 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6873   match(Jump (AddL (LShiftL switch_val shift) offset));
6874   ins_cost(350);
6875   effect(TEMP dest);
6876 
6877   format %{ "leaq    $dest, [$constantaddress]\n\t"
6878             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6879   ins_encode %{
6880     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6881     // to do that and the compiler is using that register as one it can allocate.
6882     // So we build it all by hand.
6883     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6884     // ArrayAddress dispatch(table, index);
6885     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6886     __ lea($dest$$Register, $constantaddress);
6887     __ jmp(dispatch);
6888   %}
6889   ins_pipe(pipe_jmp);
6890 %}
6891 
6892 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6893   match(Jump switch_val);
6894   ins_cost(350);
6895   effect(TEMP dest);
6896 
6897   format %{ "leaq    $dest, [$constantaddress]\n\t"
6898             "jmp     [$dest + $switch_val]\n\t" %}
6899   ins_encode %{
6900     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6901     // to do that and the compiler is using that register as one it can allocate.
6902     // So we build it all by hand.
6903     // Address index(noreg, switch_reg, Address::times_1);
6904     // ArrayAddress dispatch(table, index);
6905     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6906     __ lea($dest$$Register, $constantaddress);
6907     __ jmp(dispatch);
6908   %}
6909   ins_pipe(pipe_jmp);
6910 %}
6911 
6912 // Conditional move
6913 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6914 %{
6915   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6916 
6917   ins_cost(200); // XXX
6918   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6919   opcode(0x0F, 0x40);
6920   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6921   ins_pipe(pipe_cmov_reg);
6922 %}
6923 
6924 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6925   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6926 
6927   ins_cost(200); // XXX
6928   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6929   opcode(0x0F, 0x40);
6930   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6931   ins_pipe(pipe_cmov_reg);
6932 %}
6933 
6934 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6935   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6936   ins_cost(200);
6937   expand %{
6938     cmovI_regU(cop, cr, dst, src);
6939   %}
6940 %}
6941 
6942 // Conditional move
6943 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6944   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6945 
6946   ins_cost(250); // XXX
6947   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6948   opcode(0x0F, 0x40);
6949   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6950   ins_pipe(pipe_cmov_mem);
6951 %}
6952 
6953 // Conditional move
6954 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6955 %{
6956   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6957 
6958   ins_cost(250); // XXX
6959   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6960   opcode(0x0F, 0x40);
6961   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6962   ins_pipe(pipe_cmov_mem);
6963 %}
6964 
6965 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6966   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6967   ins_cost(250);
6968   expand %{
6969     cmovI_memU(cop, cr, dst, src);
6970   %}
6971 %}
6972 
6973 // Conditional move
6974 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6975 %{
6976   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6977 
6978   ins_cost(200); // XXX
6979   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6980   opcode(0x0F, 0x40);
6981   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6982   ins_pipe(pipe_cmov_reg);
6983 %}
6984 
6985 // Conditional move
6986 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6987 %{
6988   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6989 
6990   ins_cost(200); // XXX
6991   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6992   opcode(0x0F, 0x40);
6993   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6994   ins_pipe(pipe_cmov_reg);
6995 %}
6996 
6997 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6998   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6999   ins_cost(200);
7000   expand %{
7001     cmovN_regU(cop, cr, dst, src);
7002   %}
7003 %}
7004 
7005 // Conditional move
7006 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7007 %{
7008   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7009 
7010   ins_cost(200); // XXX
7011   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7012   opcode(0x0F, 0x40);
7013   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7014   ins_pipe(pipe_cmov_reg);  // XXX
7015 %}
7016 
7017 // Conditional move
7018 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7019 %{
7020   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7021 
7022   ins_cost(200); // XXX
7023   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7024   opcode(0x0F, 0x40);
7025   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7026   ins_pipe(pipe_cmov_reg); // XXX
7027 %}
7028 
7029 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7030   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7031   ins_cost(200);
7032   expand %{
7033     cmovP_regU(cop, cr, dst, src);
7034   %}
7035 %}
7036 
7037 // DISABLED: Requires the ADLC to emit a bottom_type call that
7038 // correctly meets the two pointer arguments; one is an incoming
7039 // register but the other is a memory operand.  ALSO appears to
7040 // be buggy with implicit null checks.
7041 //
7042 //// Conditional move
7043 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7044 //%{
7045 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7046 //  ins_cost(250);
7047 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7048 //  opcode(0x0F,0x40);
7049 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7050 //  ins_pipe( pipe_cmov_mem );
7051 //%}
7052 //
7053 //// Conditional move
7054 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7055 //%{
7056 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7057 //  ins_cost(250);
7058 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7059 //  opcode(0x0F,0x40);
7060 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7061 //  ins_pipe( pipe_cmov_mem );
7062 //%}
7063 
7064 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7065 %{
7066   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7067 
7068   ins_cost(200); // XXX
7069   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7070   opcode(0x0F, 0x40);
7071   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7072   ins_pipe(pipe_cmov_reg);  // XXX
7073 %}
7074 
7075 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7076 %{
7077   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7078 
7079   ins_cost(200); // XXX
7080   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7081   opcode(0x0F, 0x40);
7082   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7083   ins_pipe(pipe_cmov_mem);  // XXX
7084 %}
7085 
7086 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7087 %{
7088   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7089 
7090   ins_cost(200); // XXX
7091   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7092   opcode(0x0F, 0x40);
7093   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7094   ins_pipe(pipe_cmov_reg); // XXX
7095 %}
7096 
7097 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7098   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7099   ins_cost(200);
7100   expand %{
7101     cmovL_regU(cop, cr, dst, src);
7102   %}
7103 %}
7104 
7105 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7106 %{
7107   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7108 
7109   ins_cost(200); // XXX
7110   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7111   opcode(0x0F, 0x40);
7112   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7113   ins_pipe(pipe_cmov_mem); // XXX
7114 %}
7115 
7116 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7117   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7118   ins_cost(200);
7119   expand %{
7120     cmovL_memU(cop, cr, dst, src);
7121   %}
7122 %}
7123 
7124 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7125 %{
7126   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7127 
7128   ins_cost(200); // XXX
7129   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7130             "movss     $dst, $src\n"
7131     "skip:" %}
7132   ins_encode %{
7133     Label Lskip;
7134     // Invert sense of branch from sense of CMOV
7135     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7136     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7137     __ bind(Lskip);
7138   %}
7139   ins_pipe(pipe_slow);
7140 %}
7141 
7142 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7143 // %{
7144 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7145 
7146 //   ins_cost(200); // XXX
7147 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7148 //             "movss     $dst, $src\n"
7149 //     "skip:" %}
7150 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7151 //   ins_pipe(pipe_slow);
7152 // %}
7153 
7154 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7155 %{
7156   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7157 
7158   ins_cost(200); // XXX
7159   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7160             "movss     $dst, $src\n"
7161     "skip:" %}
7162   ins_encode %{
7163     Label Lskip;
7164     // Invert sense of branch from sense of CMOV
7165     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7166     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7167     __ bind(Lskip);
7168   %}
7169   ins_pipe(pipe_slow);
7170 %}
7171 
7172 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7173   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7174   ins_cost(200);
7175   expand %{
7176     cmovF_regU(cop, cr, dst, src);
7177   %}
7178 %}
7179 
7180 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7181 %{
7182   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7183 
7184   ins_cost(200); // XXX
7185   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7186             "movsd     $dst, $src\n"
7187     "skip:" %}
7188   ins_encode %{
7189     Label Lskip;
7190     // Invert sense of branch from sense of CMOV
7191     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7192     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7193     __ bind(Lskip);
7194   %}
7195   ins_pipe(pipe_slow);
7196 %}
7197 
7198 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7199 %{
7200   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7201 
7202   ins_cost(200); // XXX
7203   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7204             "movsd     $dst, $src\n"
7205     "skip:" %}
7206   ins_encode %{
7207     Label Lskip;
7208     // Invert sense of branch from sense of CMOV
7209     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7210     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7211     __ bind(Lskip);
7212   %}
7213   ins_pipe(pipe_slow);
7214 %}
7215 
7216 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7217   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7218   ins_cost(200);
7219   expand %{
7220     cmovD_regU(cop, cr, dst, src);
7221   %}
7222 %}
7223 
7224 //----------Arithmetic Instructions--------------------------------------------
7225 //----------Addition Instructions----------------------------------------------
7226 
7227 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7228 %{
7229   match(Set dst (AddI dst src));
7230   effect(KILL cr);
7231 
7232   format %{ "addl    $dst, $src\t# int" %}
7233   opcode(0x03);
7234   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7235   ins_pipe(ialu_reg_reg);
7236 %}
7237 
7238 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7239 %{
7240   match(Set dst (AddI dst src));
7241   effect(KILL cr);
7242 
7243   format %{ "addl    $dst, $src\t# int" %}
7244   opcode(0x81, 0x00); /* /0 id */
7245   ins_encode(OpcSErm(dst, src), Con8or32(src));
7246   ins_pipe( ialu_reg );
7247 %}
7248 
7249 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7250 %{
7251   match(Set dst (AddI dst (LoadI src)));
7252   effect(KILL cr);
7253 
7254   ins_cost(125); // XXX
7255   format %{ "addl    $dst, $src\t# int" %}
7256   opcode(0x03);
7257   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7258   ins_pipe(ialu_reg_mem);
7259 %}
7260 
7261 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7262 %{
7263   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7264   effect(KILL cr);
7265 
7266   ins_cost(150); // XXX
7267   format %{ "addl    $dst, $src\t# int" %}
7268   opcode(0x01); /* Opcode 01 /r */
7269   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7270   ins_pipe(ialu_mem_reg);
7271 %}
7272 
7273 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7274 %{
7275   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7276   effect(KILL cr);
7277 
7278   ins_cost(125); // XXX
7279   format %{ "addl    $dst, $src\t# int" %}
7280   opcode(0x81); /* Opcode 81 /0 id */
7281   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7282   ins_pipe(ialu_mem_imm);
7283 %}
7284 
7285 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7286 %{
7287   predicate(UseIncDec);
7288   match(Set dst (AddI dst src));
7289   effect(KILL cr);
7290 
7291   format %{ "incl    $dst\t# int" %}
7292   opcode(0xFF, 0x00); // FF /0
7293   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7294   ins_pipe(ialu_reg);
7295 %}
7296 
7297 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7298 %{
7299   predicate(UseIncDec);
7300   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7301   effect(KILL cr);
7302 
7303   ins_cost(125); // XXX
7304   format %{ "incl    $dst\t# int" %}
7305   opcode(0xFF); /* Opcode FF /0 */
7306   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7307   ins_pipe(ialu_mem_imm);
7308 %}
7309 
7310 // XXX why does that use AddI
7311 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7312 %{
7313   predicate(UseIncDec);
7314   match(Set dst (AddI dst src));
7315   effect(KILL cr);
7316 
7317   format %{ "decl    $dst\t# int" %}
7318   opcode(0xFF, 0x01); // FF /1
7319   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7320   ins_pipe(ialu_reg);
7321 %}
7322 
7323 // XXX why does that use AddI
7324 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7325 %{
7326   predicate(UseIncDec);
7327   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7328   effect(KILL cr);
7329 
7330   ins_cost(125); // XXX
7331   format %{ "decl    $dst\t# int" %}
7332   opcode(0xFF); /* Opcode FF /1 */
7333   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7334   ins_pipe(ialu_mem_imm);
7335 %}
7336 
7337 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7338 %{
7339   match(Set dst (AddI src0 src1));
7340 
7341   ins_cost(110);
7342   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7343   opcode(0x8D); /* 0x8D /r */
7344   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7345   ins_pipe(ialu_reg_reg);
7346 %}
7347 
7348 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7349 %{
7350   match(Set dst (AddL dst src));
7351   effect(KILL cr);
7352 
7353   format %{ "addq    $dst, $src\t# long" %}
7354   opcode(0x03);
7355   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7356   ins_pipe(ialu_reg_reg);
7357 %}
7358 
7359 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7360 %{
7361   match(Set dst (AddL dst src));
7362   effect(KILL cr);
7363 
7364   format %{ "addq    $dst, $src\t# long" %}
7365   opcode(0x81, 0x00); /* /0 id */
7366   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7367   ins_pipe( ialu_reg );
7368 %}
7369 
7370 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7371 %{
7372   match(Set dst (AddL dst (LoadL src)));
7373   effect(KILL cr);
7374 
7375   ins_cost(125); // XXX
7376   format %{ "addq    $dst, $src\t# long" %}
7377   opcode(0x03);
7378   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7379   ins_pipe(ialu_reg_mem);
7380 %}
7381 
7382 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7383 %{
7384   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7385   effect(KILL cr);
7386 
7387   ins_cost(150); // XXX
7388   format %{ "addq    $dst, $src\t# long" %}
7389   opcode(0x01); /* Opcode 01 /r */
7390   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7391   ins_pipe(ialu_mem_reg);
7392 %}
7393 
7394 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7395 %{
7396   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7397   effect(KILL cr);
7398 
7399   ins_cost(125); // XXX
7400   format %{ "addq    $dst, $src\t# long" %}
7401   opcode(0x81); /* Opcode 81 /0 id */
7402   ins_encode(REX_mem_wide(dst),
7403              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7404   ins_pipe(ialu_mem_imm);
7405 %}
7406 
7407 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7408 %{
7409   predicate(UseIncDec);
7410   match(Set dst (AddL dst src));
7411   effect(KILL cr);
7412 
7413   format %{ "incq    $dst\t# long" %}
7414   opcode(0xFF, 0x00); // FF /0
7415   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7416   ins_pipe(ialu_reg);
7417 %}
7418 
7419 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7420 %{
7421   predicate(UseIncDec);
7422   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7423   effect(KILL cr);
7424 
7425   ins_cost(125); // XXX
7426   format %{ "incq    $dst\t# long" %}
7427   opcode(0xFF); /* Opcode FF /0 */
7428   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7429   ins_pipe(ialu_mem_imm);
7430 %}
7431 
7432 // XXX why does that use AddL
7433 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7434 %{
7435   predicate(UseIncDec);
7436   match(Set dst (AddL dst src));
7437   effect(KILL cr);
7438 
7439   format %{ "decq    $dst\t# long" %}
7440   opcode(0xFF, 0x01); // FF /1
7441   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7442   ins_pipe(ialu_reg);
7443 %}
7444 
7445 // XXX why does that use AddL
7446 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7447 %{
7448   predicate(UseIncDec);
7449   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7450   effect(KILL cr);
7451 
7452   ins_cost(125); // XXX
7453   format %{ "decq    $dst\t# long" %}
7454   opcode(0xFF); /* Opcode FF /1 */
7455   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7456   ins_pipe(ialu_mem_imm);
7457 %}
7458 
7459 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7460 %{
7461   match(Set dst (AddL src0 src1));
7462 
7463   ins_cost(110);
7464   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7465   opcode(0x8D); /* 0x8D /r */
7466   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7467   ins_pipe(ialu_reg_reg);
7468 %}
7469 
7470 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7471 %{
7472   match(Set dst (AddP dst src));
7473   effect(KILL cr);
7474 
7475   format %{ "addq    $dst, $src\t# ptr" %}
7476   opcode(0x03);
7477   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7478   ins_pipe(ialu_reg_reg);
7479 %}
7480 
7481 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7482 %{
7483   match(Set dst (AddP dst src));
7484   effect(KILL cr);
7485 
7486   format %{ "addq    $dst, $src\t# ptr" %}
7487   opcode(0x81, 0x00); /* /0 id */
7488   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7489   ins_pipe( ialu_reg );
7490 %}
7491 
7492 // XXX addP mem ops ????
7493 
7494 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7495 %{
7496   match(Set dst (AddP src0 src1));
7497 
7498   ins_cost(110);
7499   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7500   opcode(0x8D); /* 0x8D /r */
7501   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7502   ins_pipe(ialu_reg_reg);
7503 %}
7504 
7505 instruct checkCastPP(rRegP dst)
7506 %{
7507   match(Set dst (CheckCastPP dst));
7508 
7509   size(0);
7510   format %{ "# checkcastPP of $dst" %}
7511   ins_encode(/* empty encoding */);
7512   ins_pipe(empty);
7513 %}
7514 
7515 instruct castPP(rRegP dst)
7516 %{
7517   match(Set dst (CastPP dst));
7518 
7519   size(0);
7520   format %{ "# castPP of $dst" %}
7521   ins_encode(/* empty encoding */);
7522   ins_pipe(empty);
7523 %}
7524 
7525 instruct castII(rRegI dst)
7526 %{
7527   match(Set dst (CastII dst));
7528 
7529   size(0);
7530   format %{ "# castII of $dst" %}
7531   ins_encode(/* empty encoding */);
7532   ins_cost(0);
7533   ins_pipe(empty);
7534 %}
7535 
7536 // LoadP-locked same as a regular LoadP when used with compare-swap
7537 instruct loadPLocked(rRegP dst, memory mem)
7538 %{
7539   match(Set dst (LoadPLocked mem));
7540 
7541   ins_cost(125); // XXX
7542   format %{ "movq    $dst, $mem\t# ptr locked" %}
7543   opcode(0x8B);
7544   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7545   ins_pipe(ialu_reg_mem); // XXX
7546 %}
7547 
7548 // LoadL-locked - same as a regular LoadL when used with compare-swap
7549 instruct loadLLocked(rRegL dst, memory mem)
7550 %{
7551   match(Set dst (LoadLLocked mem));
7552 
7553   ins_cost(125); // XXX
7554   format %{ "movq    $dst, $mem\t# long locked" %}
7555   opcode(0x8B);
7556   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7557   ins_pipe(ialu_reg_mem); // XXX
7558 %}
7559 
7560 // Conditional-store of the updated heap-top.
7561 // Used during allocation of the shared heap.
7562 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7563 
7564 instruct storePConditional(memory heap_top_ptr,
7565                            rax_RegP oldval, rRegP newval,
7566                            rFlagsReg cr)
7567 %{
7568   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7569 
7570   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7571             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7572   opcode(0x0F, 0xB1);
7573   ins_encode(lock_prefix,
7574              REX_reg_mem_wide(newval, heap_top_ptr),
7575              OpcP, OpcS,
7576              reg_mem(newval, heap_top_ptr));
7577   ins_pipe(pipe_cmpxchg);
7578 %}
7579 
7580 // Conditional-store of an int value.
7581 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7582 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7583 %{
7584   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7585   effect(KILL oldval);
7586 
7587   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7588   opcode(0x0F, 0xB1);
7589   ins_encode(lock_prefix,
7590              REX_reg_mem(newval, mem),
7591              OpcP, OpcS,
7592              reg_mem(newval, mem));
7593   ins_pipe(pipe_cmpxchg);
7594 %}
7595 
7596 // Conditional-store of a long value.
7597 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7598 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7599 %{
7600   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7601   effect(KILL oldval);
7602 
7603   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7604   opcode(0x0F, 0xB1);
7605   ins_encode(lock_prefix,
7606              REX_reg_mem_wide(newval, mem),
7607              OpcP, OpcS,
7608              reg_mem(newval, mem));
7609   ins_pipe(pipe_cmpxchg);
7610 %}
7611 
7612 
7613 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7614 instruct compareAndSwapP(rRegI res,
7615                          memory mem_ptr,
7616                          rax_RegP oldval, rRegP newval,
7617                          rFlagsReg cr)
7618 %{
7619   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7620   effect(KILL cr, KILL oldval);
7621 
7622   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7623             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7624             "sete    $res\n\t"
7625             "movzbl  $res, $res" %}
7626   opcode(0x0F, 0xB1);
7627   ins_encode(lock_prefix,
7628              REX_reg_mem_wide(newval, mem_ptr),
7629              OpcP, OpcS,
7630              reg_mem(newval, mem_ptr),
7631              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7632              REX_reg_breg(res, res), // movzbl
7633              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7634   ins_pipe( pipe_cmpxchg );
7635 %}
7636 
7637 instruct compareAndSwapL(rRegI res,
7638                          memory mem_ptr,
7639                          rax_RegL oldval, rRegL newval,
7640                          rFlagsReg cr)
7641 %{
7642   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7643   effect(KILL cr, KILL oldval);
7644 
7645   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7646             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7647             "sete    $res\n\t"
7648             "movzbl  $res, $res" %}
7649   opcode(0x0F, 0xB1);
7650   ins_encode(lock_prefix,
7651              REX_reg_mem_wide(newval, mem_ptr),
7652              OpcP, OpcS,
7653              reg_mem(newval, mem_ptr),
7654              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7655              REX_reg_breg(res, res), // movzbl
7656              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7657   ins_pipe( pipe_cmpxchg );
7658 %}
7659 
7660 instruct compareAndSwapI(rRegI res,
7661                          memory mem_ptr,
7662                          rax_RegI oldval, rRegI newval,
7663                          rFlagsReg cr)
7664 %{
7665   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7666   effect(KILL cr, KILL oldval);
7667 
7668   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7669             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7670             "sete    $res\n\t"
7671             "movzbl  $res, $res" %}
7672   opcode(0x0F, 0xB1);
7673   ins_encode(lock_prefix,
7674              REX_reg_mem(newval, mem_ptr),
7675              OpcP, OpcS,
7676              reg_mem(newval, mem_ptr),
7677              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7678              REX_reg_breg(res, res), // movzbl
7679              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7680   ins_pipe( pipe_cmpxchg );
7681 %}
7682 
7683 
7684 instruct compareAndSwapN(rRegI res,
7685                           memory mem_ptr,
7686                           rax_RegN oldval, rRegN newval,
7687                           rFlagsReg cr) %{
7688   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7689   effect(KILL cr, KILL oldval);
7690 
7691   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7692             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7693             "sete    $res\n\t"
7694             "movzbl  $res, $res" %}
7695   opcode(0x0F, 0xB1);
7696   ins_encode(lock_prefix,
7697              REX_reg_mem(newval, mem_ptr),
7698              OpcP, OpcS,
7699              reg_mem(newval, mem_ptr),
7700              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7701              REX_reg_breg(res, res), // movzbl
7702              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7703   ins_pipe( pipe_cmpxchg );
7704 %}
7705 
7706 //----------Subtraction Instructions-------------------------------------------
7707 
7708 // Integer Subtraction Instructions
7709 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7710 %{
7711   match(Set dst (SubI dst src));
7712   effect(KILL cr);
7713 
7714   format %{ "subl    $dst, $src\t# int" %}
7715   opcode(0x2B);
7716   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7717   ins_pipe(ialu_reg_reg);
7718 %}
7719 
7720 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7721 %{
7722   match(Set dst (SubI dst src));
7723   effect(KILL cr);
7724 
7725   format %{ "subl    $dst, $src\t# int" %}
7726   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7727   ins_encode(OpcSErm(dst, src), Con8or32(src));
7728   ins_pipe(ialu_reg);
7729 %}
7730 
7731 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7732 %{
7733   match(Set dst (SubI dst (LoadI src)));
7734   effect(KILL cr);
7735 
7736   ins_cost(125);
7737   format %{ "subl    $dst, $src\t# int" %}
7738   opcode(0x2B);
7739   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7740   ins_pipe(ialu_reg_mem);
7741 %}
7742 
7743 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7744 %{
7745   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7746   effect(KILL cr);
7747 
7748   ins_cost(150);
7749   format %{ "subl    $dst, $src\t# int" %}
7750   opcode(0x29); /* Opcode 29 /r */
7751   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7752   ins_pipe(ialu_mem_reg);
7753 %}
7754 
7755 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7756 %{
7757   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7758   effect(KILL cr);
7759 
7760   ins_cost(125); // XXX
7761   format %{ "subl    $dst, $src\t# int" %}
7762   opcode(0x81); /* Opcode 81 /5 id */
7763   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7764   ins_pipe(ialu_mem_imm);
7765 %}
7766 
7767 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7768 %{
7769   match(Set dst (SubL dst src));
7770   effect(KILL cr);
7771 
7772   format %{ "subq    $dst, $src\t# long" %}
7773   opcode(0x2B);
7774   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7775   ins_pipe(ialu_reg_reg);
7776 %}
7777 
7778 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7779 %{
7780   match(Set dst (SubL dst src));
7781   effect(KILL cr);
7782 
7783   format %{ "subq    $dst, $src\t# long" %}
7784   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7785   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7786   ins_pipe(ialu_reg);
7787 %}
7788 
7789 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7790 %{
7791   match(Set dst (SubL dst (LoadL src)));
7792   effect(KILL cr);
7793 
7794   ins_cost(125);
7795   format %{ "subq    $dst, $src\t# long" %}
7796   opcode(0x2B);
7797   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7798   ins_pipe(ialu_reg_mem);
7799 %}
7800 
7801 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7802 %{
7803   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7804   effect(KILL cr);
7805 
7806   ins_cost(150);
7807   format %{ "subq    $dst, $src\t# long" %}
7808   opcode(0x29); /* Opcode 29 /r */
7809   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7810   ins_pipe(ialu_mem_reg);
7811 %}
7812 
7813 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7814 %{
7815   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7816   effect(KILL cr);
7817 
7818   ins_cost(125); // XXX
7819   format %{ "subq    $dst, $src\t# long" %}
7820   opcode(0x81); /* Opcode 81 /5 id */
7821   ins_encode(REX_mem_wide(dst),
7822              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7823   ins_pipe(ialu_mem_imm);
7824 %}
7825 
7826 // Subtract from a pointer
7827 // XXX hmpf???
7828 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7829 %{
7830   match(Set dst (AddP dst (SubI zero src)));
7831   effect(KILL cr);
7832 
7833   format %{ "subq    $dst, $src\t# ptr - int" %}
7834   opcode(0x2B);
7835   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7836   ins_pipe(ialu_reg_reg);
7837 %}
7838 
7839 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
7840 %{
7841   match(Set dst (SubI zero dst));
7842   effect(KILL cr);
7843 
7844   format %{ "negl    $dst\t# int" %}
7845   opcode(0xF7, 0x03);  // Opcode F7 /3
7846   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7847   ins_pipe(ialu_reg);
7848 %}
7849 
7850 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
7851 %{
7852   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
7853   effect(KILL cr);
7854 
7855   format %{ "negl    $dst\t# int" %}
7856   opcode(0xF7, 0x03);  // Opcode F7 /3
7857   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
7858   ins_pipe(ialu_reg);
7859 %}
7860 
7861 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
7862 %{
7863   match(Set dst (SubL zero dst));
7864   effect(KILL cr);
7865 
7866   format %{ "negq    $dst\t# long" %}
7867   opcode(0xF7, 0x03);  // Opcode F7 /3
7868   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7869   ins_pipe(ialu_reg);
7870 %}
7871 
7872 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
7873 %{
7874   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
7875   effect(KILL cr);
7876 
7877   format %{ "negq    $dst\t# long" %}
7878   opcode(0xF7, 0x03);  // Opcode F7 /3
7879   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
7880   ins_pipe(ialu_reg);
7881 %}
7882 
7883 
7884 //----------Multiplication/Division Instructions-------------------------------
7885 // Integer Multiplication Instructions
7886 // Multiply Register
7887 
7888 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7889 %{
7890   match(Set dst (MulI dst src));
7891   effect(KILL cr);
7892 
7893   ins_cost(300);
7894   format %{ "imull   $dst, $src\t# int" %}
7895   opcode(0x0F, 0xAF);
7896   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
7897   ins_pipe(ialu_reg_reg_alu0);
7898 %}
7899 
7900 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
7901 %{
7902   match(Set dst (MulI src imm));
7903   effect(KILL cr);
7904 
7905   ins_cost(300);
7906   format %{ "imull   $dst, $src, $imm\t# int" %}
7907   opcode(0x69); /* 69 /r id */
7908   ins_encode(REX_reg_reg(dst, src),
7909              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7910   ins_pipe(ialu_reg_reg_alu0);
7911 %}
7912 
7913 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
7914 %{
7915   match(Set dst (MulI dst (LoadI src)));
7916   effect(KILL cr);
7917 
7918   ins_cost(350);
7919   format %{ "imull   $dst, $src\t# int" %}
7920   opcode(0x0F, 0xAF);
7921   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
7922   ins_pipe(ialu_reg_mem_alu0);
7923 %}
7924 
7925 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
7926 %{
7927   match(Set dst (MulI (LoadI src) imm));
7928   effect(KILL cr);
7929 
7930   ins_cost(300);
7931   format %{ "imull   $dst, $src, $imm\t# int" %}
7932   opcode(0x69); /* 69 /r id */
7933   ins_encode(REX_reg_mem(dst, src),
7934              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7935   ins_pipe(ialu_reg_mem_alu0);
7936 %}
7937 
7938 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7939 %{
7940   match(Set dst (MulL dst src));
7941   effect(KILL cr);
7942 
7943   ins_cost(300);
7944   format %{ "imulq   $dst, $src\t# long" %}
7945   opcode(0x0F, 0xAF);
7946   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
7947   ins_pipe(ialu_reg_reg_alu0);
7948 %}
7949 
7950 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
7951 %{
7952   match(Set dst (MulL src imm));
7953   effect(KILL cr);
7954 
7955   ins_cost(300);
7956   format %{ "imulq   $dst, $src, $imm\t# long" %}
7957   opcode(0x69); /* 69 /r id */
7958   ins_encode(REX_reg_reg_wide(dst, src),
7959              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
7960   ins_pipe(ialu_reg_reg_alu0);
7961 %}
7962 
7963 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
7964 %{
7965   match(Set dst (MulL dst (LoadL src)));
7966   effect(KILL cr);
7967 
7968   ins_cost(350);
7969   format %{ "imulq   $dst, $src\t# long" %}
7970   opcode(0x0F, 0xAF);
7971   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
7972   ins_pipe(ialu_reg_mem_alu0);
7973 %}
7974 
7975 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
7976 %{
7977   match(Set dst (MulL (LoadL src) imm));
7978   effect(KILL cr);
7979 
7980   ins_cost(300);
7981   format %{ "imulq   $dst, $src, $imm\t# long" %}
7982   opcode(0x69); /* 69 /r id */
7983   ins_encode(REX_reg_mem_wide(dst, src),
7984              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
7985   ins_pipe(ialu_reg_mem_alu0);
7986 %}
7987 
7988 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
7989 %{
7990   match(Set dst (MulHiL src rax));
7991   effect(USE_KILL rax, KILL cr);
7992 
7993   ins_cost(300);
7994   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
7995   opcode(0xF7, 0x5); /* Opcode F7 /5 */
7996   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
7997   ins_pipe(ialu_reg_reg_alu0);
7998 %}
7999 
8000 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8001                    rFlagsReg cr)
8002 %{
8003   match(Set rax (DivI rax div));
8004   effect(KILL rdx, KILL cr);
8005 
8006   ins_cost(30*100+10*100); // XXX
8007   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8008             "jne,s   normal\n\t"
8009             "xorl    rdx, rdx\n\t"
8010             "cmpl    $div, -1\n\t"
8011             "je,s    done\n"
8012     "normal: cdql\n\t"
8013             "idivl   $div\n"
8014     "done:"        %}
8015   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8016   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8017   ins_pipe(ialu_reg_reg_alu0);
8018 %}
8019 
8020 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8021                    rFlagsReg cr)
8022 %{
8023   match(Set rax (DivL rax div));
8024   effect(KILL rdx, KILL cr);
8025 
8026   ins_cost(30*100+10*100); // XXX
8027   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8028             "cmpq    rax, rdx\n\t"
8029             "jne,s   normal\n\t"
8030             "xorl    rdx, rdx\n\t"
8031             "cmpq    $div, -1\n\t"
8032             "je,s    done\n"
8033     "normal: cdqq\n\t"
8034             "idivq   $div\n"
8035     "done:"        %}
8036   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8037   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8038   ins_pipe(ialu_reg_reg_alu0);
8039 %}
8040 
8041 // Integer DIVMOD with Register, both quotient and mod results
8042 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8043                              rFlagsReg cr)
8044 %{
8045   match(DivModI rax div);
8046   effect(KILL cr);
8047 
8048   ins_cost(30*100+10*100); // XXX
8049   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8050             "jne,s   normal\n\t"
8051             "xorl    rdx, rdx\n\t"
8052             "cmpl    $div, -1\n\t"
8053             "je,s    done\n"
8054     "normal: cdql\n\t"
8055             "idivl   $div\n"
8056     "done:"        %}
8057   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8058   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8059   ins_pipe(pipe_slow);
8060 %}
8061 
8062 // Long DIVMOD with Register, both quotient and mod results
8063 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8064                              rFlagsReg cr)
8065 %{
8066   match(DivModL rax div);
8067   effect(KILL cr);
8068 
8069   ins_cost(30*100+10*100); // XXX
8070   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8071             "cmpq    rax, rdx\n\t"
8072             "jne,s   normal\n\t"
8073             "xorl    rdx, rdx\n\t"
8074             "cmpq    $div, -1\n\t"
8075             "je,s    done\n"
8076     "normal: cdqq\n\t"
8077             "idivq   $div\n"
8078     "done:"        %}
8079   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8080   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8081   ins_pipe(pipe_slow);
8082 %}
8083 
8084 //----------- DivL-By-Constant-Expansions--------------------------------------
8085 // DivI cases are handled by the compiler
8086 
8087 // Magic constant, reciprocal of 10
8088 instruct loadConL_0x6666666666666667(rRegL dst)
8089 %{
8090   effect(DEF dst);
8091 
8092   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8093   ins_encode(load_immL(dst, 0x6666666666666667));
8094   ins_pipe(ialu_reg);
8095 %}
8096 
8097 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8098 %{
8099   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8100 
8101   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8102   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8103   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8104   ins_pipe(ialu_reg_reg_alu0);
8105 %}
8106 
8107 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8108 %{
8109   effect(USE_DEF dst, KILL cr);
8110 
8111   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8112   opcode(0xC1, 0x7); /* C1 /7 ib */
8113   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8114   ins_pipe(ialu_reg);
8115 %}
8116 
8117 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8118 %{
8119   effect(USE_DEF dst, KILL cr);
8120 
8121   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8122   opcode(0xC1, 0x7); /* C1 /7 ib */
8123   ins_encode(reg_opc_imm_wide(dst, 0x2));
8124   ins_pipe(ialu_reg);
8125 %}
8126 
8127 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8128 %{
8129   match(Set dst (DivL src div));
8130 
8131   ins_cost((5+8)*100);
8132   expand %{
8133     rax_RegL rax;                     // Killed temp
8134     rFlagsReg cr;                     // Killed
8135     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8136     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8137     sarL_rReg_63(src, cr);            // sarq  src, 63
8138     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8139     subL_rReg(dst, src, cr);          // subl  rdx, src
8140   %}
8141 %}
8142 
8143 //-----------------------------------------------------------------------------
8144 
8145 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8146                    rFlagsReg cr)
8147 %{
8148   match(Set rdx (ModI rax div));
8149   effect(KILL rax, KILL cr);
8150 
8151   ins_cost(300); // XXX
8152   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8153             "jne,s   normal\n\t"
8154             "xorl    rdx, rdx\n\t"
8155             "cmpl    $div, -1\n\t"
8156             "je,s    done\n"
8157     "normal: cdql\n\t"
8158             "idivl   $div\n"
8159     "done:"        %}
8160   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8161   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8162   ins_pipe(ialu_reg_reg_alu0);
8163 %}
8164 
8165 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8166                    rFlagsReg cr)
8167 %{
8168   match(Set rdx (ModL rax div));
8169   effect(KILL rax, KILL cr);
8170 
8171   ins_cost(300); // XXX
8172   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8173             "cmpq    rax, rdx\n\t"
8174             "jne,s   normal\n\t"
8175             "xorl    rdx, rdx\n\t"
8176             "cmpq    $div, -1\n\t"
8177             "je,s    done\n"
8178     "normal: cdqq\n\t"
8179             "idivq   $div\n"
8180     "done:"        %}
8181   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8182   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8183   ins_pipe(ialu_reg_reg_alu0);
8184 %}
8185 
8186 // Integer Shift Instructions
8187 // Shift Left by one
8188 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8189 %{
8190   match(Set dst (LShiftI dst shift));
8191   effect(KILL cr);
8192 
8193   format %{ "sall    $dst, $shift" %}
8194   opcode(0xD1, 0x4); /* D1 /4 */
8195   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8196   ins_pipe(ialu_reg);
8197 %}
8198 
8199 // Shift Left by one
8200 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8201 %{
8202   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8203   effect(KILL cr);
8204 
8205   format %{ "sall    $dst, $shift\t" %}
8206   opcode(0xD1, 0x4); /* D1 /4 */
8207   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8208   ins_pipe(ialu_mem_imm);
8209 %}
8210 
8211 // Shift Left by 8-bit immediate
8212 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8213 %{
8214   match(Set dst (LShiftI dst shift));
8215   effect(KILL cr);
8216 
8217   format %{ "sall    $dst, $shift" %}
8218   opcode(0xC1, 0x4); /* C1 /4 ib */
8219   ins_encode(reg_opc_imm(dst, shift));
8220   ins_pipe(ialu_reg);
8221 %}
8222 
8223 // Shift Left by 8-bit immediate
8224 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8225 %{
8226   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8227   effect(KILL cr);
8228 
8229   format %{ "sall    $dst, $shift" %}
8230   opcode(0xC1, 0x4); /* C1 /4 ib */
8231   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8232   ins_pipe(ialu_mem_imm);
8233 %}
8234 
8235 // Shift Left by variable
8236 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8237 %{
8238   match(Set dst (LShiftI dst shift));
8239   effect(KILL cr);
8240 
8241   format %{ "sall    $dst, $shift" %}
8242   opcode(0xD3, 0x4); /* D3 /4 */
8243   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8244   ins_pipe(ialu_reg_reg);
8245 %}
8246 
8247 // Shift Left by variable
8248 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8249 %{
8250   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8251   effect(KILL cr);
8252 
8253   format %{ "sall    $dst, $shift" %}
8254   opcode(0xD3, 0x4); /* D3 /4 */
8255   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8256   ins_pipe(ialu_mem_reg);
8257 %}
8258 
8259 // Arithmetic shift right by one
8260 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8261 %{
8262   match(Set dst (RShiftI dst shift));
8263   effect(KILL cr);
8264 
8265   format %{ "sarl    $dst, $shift" %}
8266   opcode(0xD1, 0x7); /* D1 /7 */
8267   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8268   ins_pipe(ialu_reg);
8269 %}
8270 
8271 // Arithmetic shift right by one
8272 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8273 %{
8274   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8275   effect(KILL cr);
8276 
8277   format %{ "sarl    $dst, $shift" %}
8278   opcode(0xD1, 0x7); /* D1 /7 */
8279   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8280   ins_pipe(ialu_mem_imm);
8281 %}
8282 
8283 // Arithmetic Shift Right by 8-bit immediate
8284 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8285 %{
8286   match(Set dst (RShiftI dst shift));
8287   effect(KILL cr);
8288 
8289   format %{ "sarl    $dst, $shift" %}
8290   opcode(0xC1, 0x7); /* C1 /7 ib */
8291   ins_encode(reg_opc_imm(dst, shift));
8292   ins_pipe(ialu_mem_imm);
8293 %}
8294 
8295 // Arithmetic Shift Right by 8-bit immediate
8296 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8297 %{
8298   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8299   effect(KILL cr);
8300 
8301   format %{ "sarl    $dst, $shift" %}
8302   opcode(0xC1, 0x7); /* C1 /7 ib */
8303   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8304   ins_pipe(ialu_mem_imm);
8305 %}
8306 
8307 // Arithmetic Shift Right by variable
8308 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8309 %{
8310   match(Set dst (RShiftI dst shift));
8311   effect(KILL cr);
8312 
8313   format %{ "sarl    $dst, $shift" %}
8314   opcode(0xD3, 0x7); /* D3 /7 */
8315   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8316   ins_pipe(ialu_reg_reg);
8317 %}
8318 
8319 // Arithmetic Shift Right by variable
8320 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8321 %{
8322   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8323   effect(KILL cr);
8324 
8325   format %{ "sarl    $dst, $shift" %}
8326   opcode(0xD3, 0x7); /* D3 /7 */
8327   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8328   ins_pipe(ialu_mem_reg);
8329 %}
8330 
8331 // Logical shift right by one
8332 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8333 %{
8334   match(Set dst (URShiftI dst shift));
8335   effect(KILL cr);
8336 
8337   format %{ "shrl    $dst, $shift" %}
8338   opcode(0xD1, 0x5); /* D1 /5 */
8339   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8340   ins_pipe(ialu_reg);
8341 %}
8342 
8343 // Logical shift right by one
8344 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8345 %{
8346   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8347   effect(KILL cr);
8348 
8349   format %{ "shrl    $dst, $shift" %}
8350   opcode(0xD1, 0x5); /* D1 /5 */
8351   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8352   ins_pipe(ialu_mem_imm);
8353 %}
8354 
8355 // Logical Shift Right by 8-bit immediate
8356 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8357 %{
8358   match(Set dst (URShiftI dst shift));
8359   effect(KILL cr);
8360 
8361   format %{ "shrl    $dst, $shift" %}
8362   opcode(0xC1, 0x5); /* C1 /5 ib */
8363   ins_encode(reg_opc_imm(dst, shift));
8364   ins_pipe(ialu_reg);
8365 %}
8366 
8367 // Logical Shift Right by 8-bit immediate
8368 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8369 %{
8370   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8371   effect(KILL cr);
8372 
8373   format %{ "shrl    $dst, $shift" %}
8374   opcode(0xC1, 0x5); /* C1 /5 ib */
8375   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8376   ins_pipe(ialu_mem_imm);
8377 %}
8378 
8379 // Logical Shift Right by variable
8380 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8381 %{
8382   match(Set dst (URShiftI dst shift));
8383   effect(KILL cr);
8384 
8385   format %{ "shrl    $dst, $shift" %}
8386   opcode(0xD3, 0x5); /* D3 /5 */
8387   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8388   ins_pipe(ialu_reg_reg);
8389 %}
8390 
8391 // Logical Shift Right by variable
8392 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8393 %{
8394   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8395   effect(KILL cr);
8396 
8397   format %{ "shrl    $dst, $shift" %}
8398   opcode(0xD3, 0x5); /* D3 /5 */
8399   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8400   ins_pipe(ialu_mem_reg);
8401 %}
8402 
8403 // Long Shift Instructions
8404 // Shift Left by one
8405 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8406 %{
8407   match(Set dst (LShiftL dst shift));
8408   effect(KILL cr);
8409 
8410   format %{ "salq    $dst, $shift" %}
8411   opcode(0xD1, 0x4); /* D1 /4 */
8412   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8413   ins_pipe(ialu_reg);
8414 %}
8415 
8416 // Shift Left by one
8417 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8418 %{
8419   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8420   effect(KILL cr);
8421 
8422   format %{ "salq    $dst, $shift" %}
8423   opcode(0xD1, 0x4); /* D1 /4 */
8424   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8425   ins_pipe(ialu_mem_imm);
8426 %}
8427 
8428 // Shift Left by 8-bit immediate
8429 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8430 %{
8431   match(Set dst (LShiftL dst shift));
8432   effect(KILL cr);
8433 
8434   format %{ "salq    $dst, $shift" %}
8435   opcode(0xC1, 0x4); /* C1 /4 ib */
8436   ins_encode(reg_opc_imm_wide(dst, shift));
8437   ins_pipe(ialu_reg);
8438 %}
8439 
8440 // Shift Left by 8-bit immediate
8441 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8442 %{
8443   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8444   effect(KILL cr);
8445 
8446   format %{ "salq    $dst, $shift" %}
8447   opcode(0xC1, 0x4); /* C1 /4 ib */
8448   ins_encode(REX_mem_wide(dst), OpcP,
8449              RM_opc_mem(secondary, dst), Con8or32(shift));
8450   ins_pipe(ialu_mem_imm);
8451 %}
8452 
8453 // Shift Left by variable
8454 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8455 %{
8456   match(Set dst (LShiftL dst shift));
8457   effect(KILL cr);
8458 
8459   format %{ "salq    $dst, $shift" %}
8460   opcode(0xD3, 0x4); /* D3 /4 */
8461   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8462   ins_pipe(ialu_reg_reg);
8463 %}
8464 
8465 // Shift Left by variable
8466 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8467 %{
8468   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8469   effect(KILL cr);
8470 
8471   format %{ "salq    $dst, $shift" %}
8472   opcode(0xD3, 0x4); /* D3 /4 */
8473   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8474   ins_pipe(ialu_mem_reg);
8475 %}
8476 
8477 // Arithmetic shift right by one
8478 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8479 %{
8480   match(Set dst (RShiftL dst shift));
8481   effect(KILL cr);
8482 
8483   format %{ "sarq    $dst, $shift" %}
8484   opcode(0xD1, 0x7); /* D1 /7 */
8485   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8486   ins_pipe(ialu_reg);
8487 %}
8488 
8489 // Arithmetic shift right by one
8490 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8491 %{
8492   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8493   effect(KILL cr);
8494 
8495   format %{ "sarq    $dst, $shift" %}
8496   opcode(0xD1, 0x7); /* D1 /7 */
8497   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8498   ins_pipe(ialu_mem_imm);
8499 %}
8500 
8501 // Arithmetic Shift Right by 8-bit immediate
8502 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8503 %{
8504   match(Set dst (RShiftL dst shift));
8505   effect(KILL cr);
8506 
8507   format %{ "sarq    $dst, $shift" %}
8508   opcode(0xC1, 0x7); /* C1 /7 ib */
8509   ins_encode(reg_opc_imm_wide(dst, shift));
8510   ins_pipe(ialu_mem_imm);
8511 %}
8512 
8513 // Arithmetic Shift Right by 8-bit immediate
8514 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8515 %{
8516   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8517   effect(KILL cr);
8518 
8519   format %{ "sarq    $dst, $shift" %}
8520   opcode(0xC1, 0x7); /* C1 /7 ib */
8521   ins_encode(REX_mem_wide(dst), OpcP,
8522              RM_opc_mem(secondary, dst), Con8or32(shift));
8523   ins_pipe(ialu_mem_imm);
8524 %}
8525 
8526 // Arithmetic Shift Right by variable
8527 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8528 %{
8529   match(Set dst (RShiftL dst shift));
8530   effect(KILL cr);
8531 
8532   format %{ "sarq    $dst, $shift" %}
8533   opcode(0xD3, 0x7); /* D3 /7 */
8534   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8535   ins_pipe(ialu_reg_reg);
8536 %}
8537 
8538 // Arithmetic Shift Right by variable
8539 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8540 %{
8541   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8542   effect(KILL cr);
8543 
8544   format %{ "sarq    $dst, $shift" %}
8545   opcode(0xD3, 0x7); /* D3 /7 */
8546   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8547   ins_pipe(ialu_mem_reg);
8548 %}
8549 
8550 // Logical shift right by one
8551 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8552 %{
8553   match(Set dst (URShiftL dst shift));
8554   effect(KILL cr);
8555 
8556   format %{ "shrq    $dst, $shift" %}
8557   opcode(0xD1, 0x5); /* D1 /5 */
8558   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8559   ins_pipe(ialu_reg);
8560 %}
8561 
8562 // Logical shift right by one
8563 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8564 %{
8565   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8566   effect(KILL cr);
8567 
8568   format %{ "shrq    $dst, $shift" %}
8569   opcode(0xD1, 0x5); /* D1 /5 */
8570   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8571   ins_pipe(ialu_mem_imm);
8572 %}
8573 
8574 // Logical Shift Right by 8-bit immediate
8575 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8576 %{
8577   match(Set dst (URShiftL dst shift));
8578   effect(KILL cr);
8579 
8580   format %{ "shrq    $dst, $shift" %}
8581   opcode(0xC1, 0x5); /* C1 /5 ib */
8582   ins_encode(reg_opc_imm_wide(dst, shift));
8583   ins_pipe(ialu_reg);
8584 %}
8585 
8586 
8587 // Logical Shift Right by 8-bit immediate
8588 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8589 %{
8590   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8591   effect(KILL cr);
8592 
8593   format %{ "shrq    $dst, $shift" %}
8594   opcode(0xC1, 0x5); /* C1 /5 ib */
8595   ins_encode(REX_mem_wide(dst), OpcP,
8596              RM_opc_mem(secondary, dst), Con8or32(shift));
8597   ins_pipe(ialu_mem_imm);
8598 %}
8599 
8600 // Logical Shift Right by variable
8601 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8602 %{
8603   match(Set dst (URShiftL dst shift));
8604   effect(KILL cr);
8605 
8606   format %{ "shrq    $dst, $shift" %}
8607   opcode(0xD3, 0x5); /* D3 /5 */
8608   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8609   ins_pipe(ialu_reg_reg);
8610 %}
8611 
8612 // Logical Shift Right by variable
8613 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8614 %{
8615   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8616   effect(KILL cr);
8617 
8618   format %{ "shrq    $dst, $shift" %}
8619   opcode(0xD3, 0x5); /* D3 /5 */
8620   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8621   ins_pipe(ialu_mem_reg);
8622 %}
8623 
8624 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8625 // This idiom is used by the compiler for the i2b bytecode.
8626 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8627 %{
8628   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8629 
8630   format %{ "movsbl  $dst, $src\t# i2b" %}
8631   opcode(0x0F, 0xBE);
8632   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8633   ins_pipe(ialu_reg_reg);
8634 %}
8635 
8636 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8637 // This idiom is used by the compiler the i2s bytecode.
8638 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8639 %{
8640   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8641 
8642   format %{ "movswl  $dst, $src\t# i2s" %}
8643   opcode(0x0F, 0xBF);
8644   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8645   ins_pipe(ialu_reg_reg);
8646 %}
8647 
8648 // ROL/ROR instructions
8649 
8650 // ROL expand
8651 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8652   effect(KILL cr, USE_DEF dst);
8653 
8654   format %{ "roll    $dst" %}
8655   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8656   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8657   ins_pipe(ialu_reg);
8658 %}
8659 
8660 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8661   effect(USE_DEF dst, USE shift, KILL cr);
8662 
8663   format %{ "roll    $dst, $shift" %}
8664   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8665   ins_encode( reg_opc_imm(dst, shift) );
8666   ins_pipe(ialu_reg);
8667 %}
8668 
8669 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8670 %{
8671   effect(USE_DEF dst, USE shift, KILL cr);
8672 
8673   format %{ "roll    $dst, $shift" %}
8674   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8675   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8676   ins_pipe(ialu_reg_reg);
8677 %}
8678 // end of ROL expand
8679 
8680 // Rotate Left by one
8681 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8682 %{
8683   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8684 
8685   expand %{
8686     rolI_rReg_imm1(dst, cr);
8687   %}
8688 %}
8689 
8690 // Rotate Left by 8-bit immediate
8691 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8692 %{
8693   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8694   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8695 
8696   expand %{
8697     rolI_rReg_imm8(dst, lshift, cr);
8698   %}
8699 %}
8700 
8701 // Rotate Left by variable
8702 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8703 %{
8704   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8705 
8706   expand %{
8707     rolI_rReg_CL(dst, shift, cr);
8708   %}
8709 %}
8710 
8711 // Rotate Left by variable
8712 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8713 %{
8714   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8715 
8716   expand %{
8717     rolI_rReg_CL(dst, shift, cr);
8718   %}
8719 %}
8720 
8721 // ROR expand
8722 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8723 %{
8724   effect(USE_DEF dst, KILL cr);
8725 
8726   format %{ "rorl    $dst" %}
8727   opcode(0xD1, 0x1); /* D1 /1 */
8728   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8729   ins_pipe(ialu_reg);
8730 %}
8731 
8732 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8733 %{
8734   effect(USE_DEF dst, USE shift, KILL cr);
8735 
8736   format %{ "rorl    $dst, $shift" %}
8737   opcode(0xC1, 0x1); /* C1 /1 ib */
8738   ins_encode(reg_opc_imm(dst, shift));
8739   ins_pipe(ialu_reg);
8740 %}
8741 
8742 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8743 %{
8744   effect(USE_DEF dst, USE shift, KILL cr);
8745 
8746   format %{ "rorl    $dst, $shift" %}
8747   opcode(0xD3, 0x1); /* D3 /1 */
8748   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8749   ins_pipe(ialu_reg_reg);
8750 %}
8751 // end of ROR expand
8752 
8753 // Rotate Right by one
8754 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8755 %{
8756   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8757 
8758   expand %{
8759     rorI_rReg_imm1(dst, cr);
8760   %}
8761 %}
8762 
8763 // Rotate Right by 8-bit immediate
8764 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8765 %{
8766   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8767   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8768 
8769   expand %{
8770     rorI_rReg_imm8(dst, rshift, cr);
8771   %}
8772 %}
8773 
8774 // Rotate Right by variable
8775 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8776 %{
8777   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8778 
8779   expand %{
8780     rorI_rReg_CL(dst, shift, cr);
8781   %}
8782 %}
8783 
8784 // Rotate Right by variable
8785 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8786 %{
8787   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8788 
8789   expand %{
8790     rorI_rReg_CL(dst, shift, cr);
8791   %}
8792 %}
8793 
8794 // for long rotate
8795 // ROL expand
8796 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8797   effect(USE_DEF dst, KILL cr);
8798 
8799   format %{ "rolq    $dst" %}
8800   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8801   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8802   ins_pipe(ialu_reg);
8803 %}
8804 
8805 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8806   effect(USE_DEF dst, USE shift, KILL cr);
8807 
8808   format %{ "rolq    $dst, $shift" %}
8809   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8810   ins_encode( reg_opc_imm_wide(dst, shift) );
8811   ins_pipe(ialu_reg);
8812 %}
8813 
8814 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8815 %{
8816   effect(USE_DEF dst, USE shift, KILL cr);
8817 
8818   format %{ "rolq    $dst, $shift" %}
8819   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8820   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8821   ins_pipe(ialu_reg_reg);
8822 %}
8823 // end of ROL expand
8824 
8825 // Rotate Left by one
8826 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8827 %{
8828   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8829 
8830   expand %{
8831     rolL_rReg_imm1(dst, cr);
8832   %}
8833 %}
8834 
8835 // Rotate Left by 8-bit immediate
8836 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8837 %{
8838   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8839   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8840 
8841   expand %{
8842     rolL_rReg_imm8(dst, lshift, cr);
8843   %}
8844 %}
8845 
8846 // Rotate Left by variable
8847 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8848 %{
8849   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
8850 
8851   expand %{
8852     rolL_rReg_CL(dst, shift, cr);
8853   %}
8854 %}
8855 
8856 // Rotate Left by variable
8857 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8858 %{
8859   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
8860 
8861   expand %{
8862     rolL_rReg_CL(dst, shift, cr);
8863   %}
8864 %}
8865 
8866 // ROR expand
8867 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
8868 %{
8869   effect(USE_DEF dst, KILL cr);
8870 
8871   format %{ "rorq    $dst" %}
8872   opcode(0xD1, 0x1); /* D1 /1 */
8873   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8874   ins_pipe(ialu_reg);
8875 %}
8876 
8877 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
8878 %{
8879   effect(USE_DEF dst, USE shift, KILL cr);
8880 
8881   format %{ "rorq    $dst, $shift" %}
8882   opcode(0xC1, 0x1); /* C1 /1 ib */
8883   ins_encode(reg_opc_imm_wide(dst, shift));
8884   ins_pipe(ialu_reg);
8885 %}
8886 
8887 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8888 %{
8889   effect(USE_DEF dst, USE shift, KILL cr);
8890 
8891   format %{ "rorq    $dst, $shift" %}
8892   opcode(0xD3, 0x1); /* D3 /1 */
8893   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8894   ins_pipe(ialu_reg_reg);
8895 %}
8896 // end of ROR expand
8897 
8898 // Rotate Right by one
8899 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8900 %{
8901   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8902 
8903   expand %{
8904     rorL_rReg_imm1(dst, cr);
8905   %}
8906 %}
8907 
8908 // Rotate Right by 8-bit immediate
8909 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8910 %{
8911   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
8912   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
8913 
8914   expand %{
8915     rorL_rReg_imm8(dst, rshift, cr);
8916   %}
8917 %}
8918 
8919 // Rotate Right by variable
8920 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8921 %{
8922   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
8923 
8924   expand %{
8925     rorL_rReg_CL(dst, shift, cr);
8926   %}
8927 %}
8928 
8929 // Rotate Right by variable
8930 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
8931 %{
8932   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
8933 
8934   expand %{
8935     rorL_rReg_CL(dst, shift, cr);
8936   %}
8937 %}
8938 
8939 // Logical Instructions
8940 
8941 // Integer Logical Instructions
8942 
8943 // And Instructions
8944 // And Register with Register
8945 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8946 %{
8947   match(Set dst (AndI dst src));
8948   effect(KILL cr);
8949 
8950   format %{ "andl    $dst, $src\t# int" %}
8951   opcode(0x23);
8952   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8953   ins_pipe(ialu_reg_reg);
8954 %}
8955 
8956 // And Register with Immediate 255
8957 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
8958 %{
8959   match(Set dst (AndI dst src));
8960 
8961   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
8962   opcode(0x0F, 0xB6);
8963   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8964   ins_pipe(ialu_reg);
8965 %}
8966 
8967 // And Register with Immediate 255 and promote to long
8968 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
8969 %{
8970   match(Set dst (ConvI2L (AndI src mask)));
8971 
8972   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
8973   opcode(0x0F, 0xB6);
8974   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8975   ins_pipe(ialu_reg);
8976 %}
8977 
8978 // And Register with Immediate 65535
8979 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
8980 %{
8981   match(Set dst (AndI dst src));
8982 
8983   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
8984   opcode(0x0F, 0xB7);
8985   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
8986   ins_pipe(ialu_reg);
8987 %}
8988 
8989 // And Register with Immediate 65535 and promote to long
8990 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
8991 %{
8992   match(Set dst (ConvI2L (AndI src mask)));
8993 
8994   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
8995   opcode(0x0F, 0xB7);
8996   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8997   ins_pipe(ialu_reg);
8998 %}
8999 
9000 // And Register with Immediate
9001 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9002 %{
9003   match(Set dst (AndI dst src));
9004   effect(KILL cr);
9005 
9006   format %{ "andl    $dst, $src\t# int" %}
9007   opcode(0x81, 0x04); /* Opcode 81 /4 */
9008   ins_encode(OpcSErm(dst, src), Con8or32(src));
9009   ins_pipe(ialu_reg);
9010 %}
9011 
9012 // And Register with Memory
9013 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9014 %{
9015   match(Set dst (AndI dst (LoadI src)));
9016   effect(KILL cr);
9017 
9018   ins_cost(125);
9019   format %{ "andl    $dst, $src\t# int" %}
9020   opcode(0x23);
9021   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9022   ins_pipe(ialu_reg_mem);
9023 %}
9024 
9025 // And Memory with Register
9026 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9027 %{
9028   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9029   effect(KILL cr);
9030 
9031   ins_cost(150);
9032   format %{ "andl    $dst, $src\t# int" %}
9033   opcode(0x21); /* Opcode 21 /r */
9034   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9035   ins_pipe(ialu_mem_reg);
9036 %}
9037 
9038 // And Memory with Immediate
9039 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9040 %{
9041   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9042   effect(KILL cr);
9043 
9044   ins_cost(125);
9045   format %{ "andl    $dst, $src\t# int" %}
9046   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9047   ins_encode(REX_mem(dst), OpcSE(src),
9048              RM_opc_mem(secondary, dst), Con8or32(src));
9049   ins_pipe(ialu_mem_imm);
9050 %}
9051 
9052 // Or Instructions
9053 // Or Register with Register
9054 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9055 %{
9056   match(Set dst (OrI dst src));
9057   effect(KILL cr);
9058 
9059   format %{ "orl     $dst, $src\t# int" %}
9060   opcode(0x0B);
9061   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9062   ins_pipe(ialu_reg_reg);
9063 %}
9064 
9065 // Or Register with Immediate
9066 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9067 %{
9068   match(Set dst (OrI dst src));
9069   effect(KILL cr);
9070 
9071   format %{ "orl     $dst, $src\t# int" %}
9072   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9073   ins_encode(OpcSErm(dst, src), Con8or32(src));
9074   ins_pipe(ialu_reg);
9075 %}
9076 
9077 // Or Register with Memory
9078 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9079 %{
9080   match(Set dst (OrI dst (LoadI src)));
9081   effect(KILL cr);
9082 
9083   ins_cost(125);
9084   format %{ "orl     $dst, $src\t# int" %}
9085   opcode(0x0B);
9086   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9087   ins_pipe(ialu_reg_mem);
9088 %}
9089 
9090 // Or Memory with Register
9091 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9092 %{
9093   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9094   effect(KILL cr);
9095 
9096   ins_cost(150);
9097   format %{ "orl     $dst, $src\t# int" %}
9098   opcode(0x09); /* Opcode 09 /r */
9099   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9100   ins_pipe(ialu_mem_reg);
9101 %}
9102 
9103 // Or Memory with Immediate
9104 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9105 %{
9106   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9107   effect(KILL cr);
9108 
9109   ins_cost(125);
9110   format %{ "orl     $dst, $src\t# int" %}
9111   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9112   ins_encode(REX_mem(dst), OpcSE(src),
9113              RM_opc_mem(secondary, dst), Con8or32(src));
9114   ins_pipe(ialu_mem_imm);
9115 %}
9116 
9117 // Xor Instructions
9118 // Xor Register with Register
9119 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9120 %{
9121   match(Set dst (XorI dst src));
9122   effect(KILL cr);
9123 
9124   format %{ "xorl    $dst, $src\t# int" %}
9125   opcode(0x33);
9126   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9127   ins_pipe(ialu_reg_reg);
9128 %}
9129 
9130 // Xor Register with Immediate -1
9131 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9132   match(Set dst (XorI dst imm));
9133 
9134   format %{ "not    $dst" %}
9135   ins_encode %{
9136      __ notl($dst$$Register);
9137   %}
9138   ins_pipe(ialu_reg);
9139 %}
9140 
9141 // Xor Register with Immediate
9142 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9143 %{
9144   match(Set dst (XorI dst src));
9145   effect(KILL cr);
9146 
9147   format %{ "xorl    $dst, $src\t# int" %}
9148   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9149   ins_encode(OpcSErm(dst, src), Con8or32(src));
9150   ins_pipe(ialu_reg);
9151 %}
9152 
9153 // Xor Register with Memory
9154 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9155 %{
9156   match(Set dst (XorI dst (LoadI src)));
9157   effect(KILL cr);
9158 
9159   ins_cost(125);
9160   format %{ "xorl    $dst, $src\t# int" %}
9161   opcode(0x33);
9162   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9163   ins_pipe(ialu_reg_mem);
9164 %}
9165 
9166 // Xor Memory with Register
9167 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9168 %{
9169   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9170   effect(KILL cr);
9171 
9172   ins_cost(150);
9173   format %{ "xorl    $dst, $src\t# int" %}
9174   opcode(0x31); /* Opcode 31 /r */
9175   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9176   ins_pipe(ialu_mem_reg);
9177 %}
9178 
9179 // Xor Memory with Immediate
9180 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9181 %{
9182   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9183   effect(KILL cr);
9184 
9185   ins_cost(125);
9186   format %{ "xorl    $dst, $src\t# int" %}
9187   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9188   ins_encode(REX_mem(dst), OpcSE(src),
9189              RM_opc_mem(secondary, dst), Con8or32(src));
9190   ins_pipe(ialu_mem_imm);
9191 %}
9192 
9193 
9194 // Long Logical Instructions
9195 
9196 // And Instructions
9197 // And Register with Register
9198 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9199 %{
9200   match(Set dst (AndL dst src));
9201   effect(KILL cr);
9202 
9203   format %{ "andq    $dst, $src\t# long" %}
9204   opcode(0x23);
9205   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9206   ins_pipe(ialu_reg_reg);
9207 %}
9208 
9209 // And Register with Immediate 255
9210 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9211 %{
9212   match(Set dst (AndL dst src));
9213 
9214   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9215   opcode(0x0F, 0xB6);
9216   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9217   ins_pipe(ialu_reg);
9218 %}
9219 
9220 // And Register with Immediate 65535
9221 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9222 %{
9223   match(Set dst (AndL dst src));
9224 
9225   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9226   opcode(0x0F, 0xB7);
9227   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9228   ins_pipe(ialu_reg);
9229 %}
9230 
9231 // And Register with Immediate
9232 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9233 %{
9234   match(Set dst (AndL dst src));
9235   effect(KILL cr);
9236 
9237   format %{ "andq    $dst, $src\t# long" %}
9238   opcode(0x81, 0x04); /* Opcode 81 /4 */
9239   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9240   ins_pipe(ialu_reg);
9241 %}
9242 
9243 // And Register with Memory
9244 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9245 %{
9246   match(Set dst (AndL dst (LoadL src)));
9247   effect(KILL cr);
9248 
9249   ins_cost(125);
9250   format %{ "andq    $dst, $src\t# long" %}
9251   opcode(0x23);
9252   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9253   ins_pipe(ialu_reg_mem);
9254 %}
9255 
9256 // And Memory with Register
9257 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9258 %{
9259   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9260   effect(KILL cr);
9261 
9262   ins_cost(150);
9263   format %{ "andq    $dst, $src\t# long" %}
9264   opcode(0x21); /* Opcode 21 /r */
9265   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9266   ins_pipe(ialu_mem_reg);
9267 %}
9268 
9269 // And Memory with Immediate
9270 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9271 %{
9272   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9273   effect(KILL cr);
9274 
9275   ins_cost(125);
9276   format %{ "andq    $dst, $src\t# long" %}
9277   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9278   ins_encode(REX_mem_wide(dst), OpcSE(src),
9279              RM_opc_mem(secondary, dst), Con8or32(src));
9280   ins_pipe(ialu_mem_imm);
9281 %}
9282 
9283 // Or Instructions
9284 // Or Register with Register
9285 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9286 %{
9287   match(Set dst (OrL dst src));
9288   effect(KILL cr);
9289 
9290   format %{ "orq     $dst, $src\t# long" %}
9291   opcode(0x0B);
9292   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9293   ins_pipe(ialu_reg_reg);
9294 %}
9295 
9296 // Use any_RegP to match R15 (TLS register) without spilling.
9297 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9298   match(Set dst (OrL dst (CastP2X src)));
9299   effect(KILL cr);
9300 
9301   format %{ "orq     $dst, $src\t# long" %}
9302   opcode(0x0B);
9303   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9304   ins_pipe(ialu_reg_reg);
9305 %}
9306 
9307 
9308 // Or Register with Immediate
9309 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9310 %{
9311   match(Set dst (OrL dst src));
9312   effect(KILL cr);
9313 
9314   format %{ "orq     $dst, $src\t# long" %}
9315   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9316   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9317   ins_pipe(ialu_reg);
9318 %}
9319 
9320 // Or Register with Memory
9321 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9322 %{
9323   match(Set dst (OrL dst (LoadL src)));
9324   effect(KILL cr);
9325 
9326   ins_cost(125);
9327   format %{ "orq     $dst, $src\t# long" %}
9328   opcode(0x0B);
9329   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9330   ins_pipe(ialu_reg_mem);
9331 %}
9332 
9333 // Or Memory with Register
9334 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9335 %{
9336   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9337   effect(KILL cr);
9338 
9339   ins_cost(150);
9340   format %{ "orq     $dst, $src\t# long" %}
9341   opcode(0x09); /* Opcode 09 /r */
9342   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9343   ins_pipe(ialu_mem_reg);
9344 %}
9345 
9346 // Or Memory with Immediate
9347 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9348 %{
9349   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9350   effect(KILL cr);
9351 
9352   ins_cost(125);
9353   format %{ "orq     $dst, $src\t# long" %}
9354   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9355   ins_encode(REX_mem_wide(dst), OpcSE(src),
9356              RM_opc_mem(secondary, dst), Con8or32(src));
9357   ins_pipe(ialu_mem_imm);
9358 %}
9359 
9360 // Xor Instructions
9361 // Xor Register with Register
9362 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9363 %{
9364   match(Set dst (XorL dst src));
9365   effect(KILL cr);
9366 
9367   format %{ "xorq    $dst, $src\t# long" %}
9368   opcode(0x33);
9369   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9370   ins_pipe(ialu_reg_reg);
9371 %}
9372 
9373 // Xor Register with Immediate -1
9374 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9375   match(Set dst (XorL dst imm));
9376 
9377   format %{ "notq   $dst" %}
9378   ins_encode %{
9379      __ notq($dst$$Register);
9380   %}
9381   ins_pipe(ialu_reg);
9382 %}
9383 
9384 // Xor Register with Immediate
9385 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9386 %{
9387   match(Set dst (XorL dst src));
9388   effect(KILL cr);
9389 
9390   format %{ "xorq    $dst, $src\t# long" %}
9391   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9392   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9393   ins_pipe(ialu_reg);
9394 %}
9395 
9396 // Xor Register with Memory
9397 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9398 %{
9399   match(Set dst (XorL dst (LoadL src)));
9400   effect(KILL cr);
9401 
9402   ins_cost(125);
9403   format %{ "xorq    $dst, $src\t# long" %}
9404   opcode(0x33);
9405   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9406   ins_pipe(ialu_reg_mem);
9407 %}
9408 
9409 // Xor Memory with Register
9410 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9411 %{
9412   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9413   effect(KILL cr);
9414 
9415   ins_cost(150);
9416   format %{ "xorq    $dst, $src\t# long" %}
9417   opcode(0x31); /* Opcode 31 /r */
9418   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9419   ins_pipe(ialu_mem_reg);
9420 %}
9421 
9422 // Xor Memory with Immediate
9423 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9424 %{
9425   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9426   effect(KILL cr);
9427 
9428   ins_cost(125);
9429   format %{ "xorq    $dst, $src\t# long" %}
9430   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9431   ins_encode(REX_mem_wide(dst), OpcSE(src),
9432              RM_opc_mem(secondary, dst), Con8or32(src));
9433   ins_pipe(ialu_mem_imm);
9434 %}
9435 
9436 // Convert Int to Boolean
9437 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9438 %{
9439   match(Set dst (Conv2B src));
9440   effect(KILL cr);
9441 
9442   format %{ "testl   $src, $src\t# ci2b\n\t"
9443             "setnz   $dst\n\t"
9444             "movzbl  $dst, $dst" %}
9445   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9446              setNZ_reg(dst),
9447              REX_reg_breg(dst, dst), // movzbl
9448              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9449   ins_pipe(pipe_slow); // XXX
9450 %}
9451 
9452 // Convert Pointer to Boolean
9453 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9454 %{
9455   match(Set dst (Conv2B src));
9456   effect(KILL cr);
9457 
9458   format %{ "testq   $src, $src\t# cp2b\n\t"
9459             "setnz   $dst\n\t"
9460             "movzbl  $dst, $dst" %}
9461   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9462              setNZ_reg(dst),
9463              REX_reg_breg(dst, dst), // movzbl
9464              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9465   ins_pipe(pipe_slow); // XXX
9466 %}
9467 
9468 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9469 %{
9470   match(Set dst (CmpLTMask p q));
9471   effect(KILL cr);
9472 
9473   ins_cost(400); // XXX
9474   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9475             "setlt   $dst\n\t"
9476             "movzbl  $dst, $dst\n\t"
9477             "negl    $dst" %}
9478   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9479              setLT_reg(dst),
9480              REX_reg_breg(dst, dst), // movzbl
9481              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9482              neg_reg(dst));
9483   ins_pipe(pipe_slow);
9484 %}
9485 
9486 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9487 %{
9488   match(Set dst (CmpLTMask dst zero));
9489   effect(KILL cr);
9490 
9491   ins_cost(100); // XXX
9492   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9493   opcode(0xC1, 0x7);  /* C1 /7 ib */
9494   ins_encode(reg_opc_imm(dst, 0x1F));
9495   ins_pipe(ialu_reg);
9496 %}
9497 
9498 
9499 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rRegI tmp, rFlagsReg cr)
9500 %{
9501   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9502   effect(TEMP tmp, KILL cr);
9503 
9504   ins_cost(400); // XXX
9505   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
9506             "sbbl    $tmp, $tmp\n\t"
9507             "andl    $tmp, $y\n\t"
9508             "addl    $p, $tmp" %}
9509   ins_encode %{
9510     Register Rp = $p$$Register;
9511     Register Rq = $q$$Register;
9512     Register Ry = $y$$Register;
9513     Register Rt = $tmp$$Register;
9514     __ subl(Rp, Rq);
9515     __ sbbl(Rt, Rt);
9516     __ andl(Rt, Ry);
9517     __ addl(Rp, Rt);
9518   %}
9519   ins_pipe(pipe_cmplt);
9520 %}
9521 
9522 //---------- FP Instructions------------------------------------------------
9523 
9524 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9525 %{
9526   match(Set cr (CmpF src1 src2));
9527 
9528   ins_cost(145);
9529   format %{ "ucomiss $src1, $src2\n\t"
9530             "jnp,s   exit\n\t"
9531             "pushfq\t# saw NaN, set CF\n\t"
9532             "andq    [rsp], #0xffffff2b\n\t"
9533             "popfq\n"
9534     "exit:" %}
9535   ins_encode %{
9536     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9537     emit_cmpfp_fixup(_masm);
9538   %}
9539   ins_pipe(pipe_slow);
9540 %}
9541 
9542 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
9543   match(Set cr (CmpF src1 src2));
9544 
9545   ins_cost(100);
9546   format %{ "ucomiss $src1, $src2" %}
9547   ins_encode %{
9548     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9549   %}
9550   ins_pipe(pipe_slow);
9551 %}
9552 
9553 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9554 %{
9555   match(Set cr (CmpF src1 (LoadF src2)));
9556 
9557   ins_cost(145);
9558   format %{ "ucomiss $src1, $src2\n\t"
9559             "jnp,s   exit\n\t"
9560             "pushfq\t# saw NaN, set CF\n\t"
9561             "andq    [rsp], #0xffffff2b\n\t"
9562             "popfq\n"
9563     "exit:" %}
9564   ins_encode %{
9565     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9566     emit_cmpfp_fixup(_masm);
9567   %}
9568   ins_pipe(pipe_slow);
9569 %}
9570 
9571 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
9572   match(Set cr (CmpF src1 (LoadF src2)));
9573 
9574   ins_cost(100);
9575   format %{ "ucomiss $src1, $src2" %}
9576   ins_encode %{
9577     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9578   %}
9579   ins_pipe(pipe_slow);
9580 %}
9581 
9582 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
9583   match(Set cr (CmpF src con));
9584 
9585   ins_cost(145);
9586   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9587             "jnp,s   exit\n\t"
9588             "pushfq\t# saw NaN, set CF\n\t"
9589             "andq    [rsp], #0xffffff2b\n\t"
9590             "popfq\n"
9591     "exit:" %}
9592   ins_encode %{
9593     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9594     emit_cmpfp_fixup(_masm);
9595   %}
9596   ins_pipe(pipe_slow);
9597 %}
9598 
9599 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
9600   match(Set cr (CmpF src con));
9601   ins_cost(100);
9602   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
9603   ins_encode %{
9604     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9605   %}
9606   ins_pipe(pipe_slow);
9607 %}
9608 
9609 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
9610 %{
9611   match(Set cr (CmpD src1 src2));
9612 
9613   ins_cost(145);
9614   format %{ "ucomisd $src1, $src2\n\t"
9615             "jnp,s   exit\n\t"
9616             "pushfq\t# saw NaN, set CF\n\t"
9617             "andq    [rsp], #0xffffff2b\n\t"
9618             "popfq\n"
9619     "exit:" %}
9620   ins_encode %{
9621     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9622     emit_cmpfp_fixup(_masm);
9623   %}
9624   ins_pipe(pipe_slow);
9625 %}
9626 
9627 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
9628   match(Set cr (CmpD src1 src2));
9629 
9630   ins_cost(100);
9631   format %{ "ucomisd $src1, $src2 test" %}
9632   ins_encode %{
9633     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9634   %}
9635   ins_pipe(pipe_slow);
9636 %}
9637 
9638 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
9639 %{
9640   match(Set cr (CmpD src1 (LoadD src2)));
9641 
9642   ins_cost(145);
9643   format %{ "ucomisd $src1, $src2\n\t"
9644             "jnp,s   exit\n\t"
9645             "pushfq\t# saw NaN, set CF\n\t"
9646             "andq    [rsp], #0xffffff2b\n\t"
9647             "popfq\n"
9648     "exit:" %}
9649   ins_encode %{
9650     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9651     emit_cmpfp_fixup(_masm);
9652   %}
9653   ins_pipe(pipe_slow);
9654 %}
9655 
9656 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
9657   match(Set cr (CmpD src1 (LoadD src2)));
9658 
9659   ins_cost(100);
9660   format %{ "ucomisd $src1, $src2" %}
9661   ins_encode %{
9662     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9663   %}
9664   ins_pipe(pipe_slow);
9665 %}
9666 
9667 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
9668   match(Set cr (CmpD src con));
9669 
9670   ins_cost(145);
9671   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9672             "jnp,s   exit\n\t"
9673             "pushfq\t# saw NaN, set CF\n\t"
9674             "andq    [rsp], #0xffffff2b\n\t"
9675             "popfq\n"
9676     "exit:" %}
9677   ins_encode %{
9678     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9679     emit_cmpfp_fixup(_masm);
9680   %}
9681   ins_pipe(pipe_slow);
9682 %}
9683 
9684 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
9685   match(Set cr (CmpD src con));
9686   ins_cost(100);
9687   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
9688   ins_encode %{
9689     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9690   %}
9691   ins_pipe(pipe_slow);
9692 %}
9693 
9694 // Compare into -1,0,1
9695 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
9696 %{
9697   match(Set dst (CmpF3 src1 src2));
9698   effect(KILL cr);
9699 
9700   ins_cost(275);
9701   format %{ "ucomiss $src1, $src2\n\t"
9702             "movl    $dst, #-1\n\t"
9703             "jp,s    done\n\t"
9704             "jb,s    done\n\t"
9705             "setne   $dst\n\t"
9706             "movzbl  $dst, $dst\n"
9707     "done:" %}
9708   ins_encode %{
9709     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9710     emit_cmpfp3(_masm, $dst$$Register);
9711   %}
9712   ins_pipe(pipe_slow);
9713 %}
9714 
9715 // Compare into -1,0,1
9716 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
9717 %{
9718   match(Set dst (CmpF3 src1 (LoadF src2)));
9719   effect(KILL cr);
9720 
9721   ins_cost(275);
9722   format %{ "ucomiss $src1, $src2\n\t"
9723             "movl    $dst, #-1\n\t"
9724             "jp,s    done\n\t"
9725             "jb,s    done\n\t"
9726             "setne   $dst\n\t"
9727             "movzbl  $dst, $dst\n"
9728     "done:" %}
9729   ins_encode %{
9730     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9731     emit_cmpfp3(_masm, $dst$$Register);
9732   %}
9733   ins_pipe(pipe_slow);
9734 %}
9735 
9736 // Compare into -1,0,1
9737 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
9738   match(Set dst (CmpF3 src con));
9739   effect(KILL cr);
9740 
9741   ins_cost(275);
9742   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
9743             "movl    $dst, #-1\n\t"
9744             "jp,s    done\n\t"
9745             "jb,s    done\n\t"
9746             "setne   $dst\n\t"
9747             "movzbl  $dst, $dst\n"
9748     "done:" %}
9749   ins_encode %{
9750     __ ucomiss($src$$XMMRegister, $constantaddress($con));
9751     emit_cmpfp3(_masm, $dst$$Register);
9752   %}
9753   ins_pipe(pipe_slow);
9754 %}
9755 
9756 // Compare into -1,0,1
9757 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
9758 %{
9759   match(Set dst (CmpD3 src1 src2));
9760   effect(KILL cr);
9761 
9762   ins_cost(275);
9763   format %{ "ucomisd $src1, $src2\n\t"
9764             "movl    $dst, #-1\n\t"
9765             "jp,s    done\n\t"
9766             "jb,s    done\n\t"
9767             "setne   $dst\n\t"
9768             "movzbl  $dst, $dst\n"
9769     "done:" %}
9770   ins_encode %{
9771     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9772     emit_cmpfp3(_masm, $dst$$Register);
9773   %}
9774   ins_pipe(pipe_slow);
9775 %}
9776 
9777 // Compare into -1,0,1
9778 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
9779 %{
9780   match(Set dst (CmpD3 src1 (LoadD src2)));
9781   effect(KILL cr);
9782 
9783   ins_cost(275);
9784   format %{ "ucomisd $src1, $src2\n\t"
9785             "movl    $dst, #-1\n\t"
9786             "jp,s    done\n\t"
9787             "jb,s    done\n\t"
9788             "setne   $dst\n\t"
9789             "movzbl  $dst, $dst\n"
9790     "done:" %}
9791   ins_encode %{
9792     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9793     emit_cmpfp3(_masm, $dst$$Register);
9794   %}
9795   ins_pipe(pipe_slow);
9796 %}
9797 
9798 // Compare into -1,0,1
9799 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
9800   match(Set dst (CmpD3 src con));
9801   effect(KILL cr);
9802 
9803   ins_cost(275);
9804   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
9805             "movl    $dst, #-1\n\t"
9806             "jp,s    done\n\t"
9807             "jb,s    done\n\t"
9808             "setne   $dst\n\t"
9809             "movzbl  $dst, $dst\n"
9810     "done:" %}
9811   ins_encode %{
9812     __ ucomisd($src$$XMMRegister, $constantaddress($con));
9813     emit_cmpfp3(_masm, $dst$$Register);
9814   %}
9815   ins_pipe(pipe_slow);
9816 %}
9817 
9818 // -----------Trig and Trancendental Instructions------------------------------
9819 instruct cosD_reg(regD dst) %{
9820   match(Set dst (CosD dst));
9821 
9822   format %{ "dcos   $dst\n\t" %}
9823   opcode(0xD9, 0xFF);
9824   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9825   ins_pipe( pipe_slow );
9826 %}
9827 
9828 instruct sinD_reg(regD dst) %{
9829   match(Set dst (SinD dst));
9830 
9831   format %{ "dsin   $dst\n\t" %}
9832   opcode(0xD9, 0xFE);
9833   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9834   ins_pipe( pipe_slow );
9835 %}
9836 
9837 instruct tanD_reg(regD dst) %{
9838   match(Set dst (TanD dst));
9839 
9840   format %{ "dtan   $dst\n\t" %}
9841   ins_encode( Push_SrcXD(dst),
9842               Opcode(0xD9), Opcode(0xF2),   //fptan
9843               Opcode(0xDD), Opcode(0xD8),   //fstp st
9844               Push_ResultXD(dst) );
9845   ins_pipe( pipe_slow );
9846 %}
9847 
9848 instruct log10D_reg(regD dst) %{
9849   // The source and result Double operands in XMM registers
9850   match(Set dst (Log10D dst));
9851   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9852   // fyl2x        ; compute log_10(2) * log_2(x)
9853   format %{ "fldlg2\t\t\t#Log10\n\t"
9854             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
9855          %}
9856    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
9857               Push_SrcXD(dst),
9858               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9859               Push_ResultXD(dst));
9860 
9861   ins_pipe( pipe_slow );
9862 %}
9863 
9864 instruct logD_reg(regD dst) %{
9865   // The source and result Double operands in XMM registers
9866   match(Set dst (LogD dst));
9867   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9868   // fyl2x        ; compute log_e(2) * log_2(x)
9869   format %{ "fldln2\t\t\t#Log_e\n\t"
9870             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
9871          %}
9872   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9873               Push_SrcXD(dst),
9874               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9875               Push_ResultXD(dst));
9876   ins_pipe( pipe_slow );
9877 %}
9878 
9879 
9880 
9881 //----------Arithmetic Conversion Instructions---------------------------------
9882 
9883 instruct roundFloat_nop(regF dst)
9884 %{
9885   match(Set dst (RoundFloat dst));
9886 
9887   ins_cost(0);
9888   ins_encode();
9889   ins_pipe(empty);
9890 %}
9891 
9892 instruct roundDouble_nop(regD dst)
9893 %{
9894   match(Set dst (RoundDouble dst));
9895 
9896   ins_cost(0);
9897   ins_encode();
9898   ins_pipe(empty);
9899 %}
9900 
9901 instruct convF2D_reg_reg(regD dst, regF src)
9902 %{
9903   match(Set dst (ConvF2D src));
9904 
9905   format %{ "cvtss2sd $dst, $src" %}
9906   ins_encode %{
9907     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
9908   %}
9909   ins_pipe(pipe_slow); // XXX
9910 %}
9911 
9912 instruct convF2D_reg_mem(regD dst, memory src)
9913 %{
9914   match(Set dst (ConvF2D (LoadF src)));
9915 
9916   format %{ "cvtss2sd $dst, $src" %}
9917   ins_encode %{
9918     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
9919   %}
9920   ins_pipe(pipe_slow); // XXX
9921 %}
9922 
9923 instruct convD2F_reg_reg(regF dst, regD src)
9924 %{
9925   match(Set dst (ConvD2F src));
9926 
9927   format %{ "cvtsd2ss $dst, $src" %}
9928   ins_encode %{
9929     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
9930   %}
9931   ins_pipe(pipe_slow); // XXX
9932 %}
9933 
9934 instruct convD2F_reg_mem(regF dst, memory src)
9935 %{
9936   match(Set dst (ConvD2F (LoadD src)));
9937 
9938   format %{ "cvtsd2ss $dst, $src" %}
9939   ins_encode %{
9940     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
9941   %}
9942   ins_pipe(pipe_slow); // XXX
9943 %}
9944 
9945 // XXX do mem variants
9946 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
9947 %{
9948   match(Set dst (ConvF2I src));
9949   effect(KILL cr);
9950 
9951   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
9952             "cmpl    $dst, #0x80000000\n\t"
9953             "jne,s   done\n\t"
9954             "subq    rsp, #8\n\t"
9955             "movss   [rsp], $src\n\t"
9956             "call    f2i_fixup\n\t"
9957             "popq    $dst\n"
9958     "done:   "%}
9959   ins_encode %{
9960     Label done;
9961     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
9962     __ cmpl($dst$$Register, 0x80000000);
9963     __ jccb(Assembler::notEqual, done);
9964     __ subptr(rsp, 8);
9965     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9966     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
9967     __ pop($dst$$Register);
9968     __ bind(done);
9969   %}
9970   ins_pipe(pipe_slow);
9971 %}
9972 
9973 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
9974 %{
9975   match(Set dst (ConvF2L src));
9976   effect(KILL cr);
9977 
9978   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
9979             "cmpq    $dst, [0x8000000000000000]\n\t"
9980             "jne,s   done\n\t"
9981             "subq    rsp, #8\n\t"
9982             "movss   [rsp], $src\n\t"
9983             "call    f2l_fixup\n\t"
9984             "popq    $dst\n"
9985     "done:   "%}
9986   ins_encode %{
9987     Label done;
9988     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
9989     __ cmp64($dst$$Register,
9990              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
9991     __ jccb(Assembler::notEqual, done);
9992     __ subptr(rsp, 8);
9993     __ movflt(Address(rsp, 0), $src$$XMMRegister);
9994     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
9995     __ pop($dst$$Register);
9996     __ bind(done);
9997   %}
9998   ins_pipe(pipe_slow);
9999 %}
10000 
10001 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10002 %{
10003   match(Set dst (ConvD2I src));
10004   effect(KILL cr);
10005 
10006   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10007             "cmpl    $dst, #0x80000000\n\t"
10008             "jne,s   done\n\t"
10009             "subq    rsp, #8\n\t"
10010             "movsd   [rsp], $src\n\t"
10011             "call    d2i_fixup\n\t"
10012             "popq    $dst\n"
10013     "done:   "%}
10014   ins_encode %{
10015     Label done;
10016     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10017     __ cmpl($dst$$Register, 0x80000000);
10018     __ jccb(Assembler::notEqual, done);
10019     __ subptr(rsp, 8);
10020     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10021     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
10022     __ pop($dst$$Register);
10023     __ bind(done);
10024   %}
10025   ins_pipe(pipe_slow);
10026 %}
10027 
10028 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10029 %{
10030   match(Set dst (ConvD2L src));
10031   effect(KILL cr);
10032 
10033   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10034             "cmpq    $dst, [0x8000000000000000]\n\t"
10035             "jne,s   done\n\t"
10036             "subq    rsp, #8\n\t"
10037             "movsd   [rsp], $src\n\t"
10038             "call    d2l_fixup\n\t"
10039             "popq    $dst\n"
10040     "done:   "%}
10041   ins_encode %{
10042     Label done;
10043     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
10044     __ cmp64($dst$$Register,
10045              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10046     __ jccb(Assembler::notEqual, done);
10047     __ subptr(rsp, 8);
10048     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10049     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
10050     __ pop($dst$$Register);
10051     __ bind(done);
10052   %}
10053   ins_pipe(pipe_slow);
10054 %}
10055 
10056 instruct convI2F_reg_reg(regF dst, rRegI src)
10057 %{
10058   predicate(!UseXmmI2F);
10059   match(Set dst (ConvI2F src));
10060 
10061   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10062   ins_encode %{
10063     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10064   %}
10065   ins_pipe(pipe_slow); // XXX
10066 %}
10067 
10068 instruct convI2F_reg_mem(regF dst, memory src)
10069 %{
10070   match(Set dst (ConvI2F (LoadI src)));
10071 
10072   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10073   ins_encode %{
10074     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10075   %}
10076   ins_pipe(pipe_slow); // XXX
10077 %}
10078 
10079 instruct convI2D_reg_reg(regD dst, rRegI src)
10080 %{
10081   predicate(!UseXmmI2D);
10082   match(Set dst (ConvI2D src));
10083 
10084   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10085   ins_encode %{
10086     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10087   %}
10088   ins_pipe(pipe_slow); // XXX
10089 %}
10090 
10091 instruct convI2D_reg_mem(regD dst, memory src)
10092 %{
10093   match(Set dst (ConvI2D (LoadI src)));
10094 
10095   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10096   ins_encode %{
10097     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10098   %}
10099   ins_pipe(pipe_slow); // XXX
10100 %}
10101 
10102 instruct convXI2F_reg(regF dst, rRegI src)
10103 %{
10104   predicate(UseXmmI2F);
10105   match(Set dst (ConvI2F src));
10106 
10107   format %{ "movdl $dst, $src\n\t"
10108             "cvtdq2psl $dst, $dst\t# i2f" %}
10109   ins_encode %{
10110     __ movdl($dst$$XMMRegister, $src$$Register);
10111     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10112   %}
10113   ins_pipe(pipe_slow); // XXX
10114 %}
10115 
10116 instruct convXI2D_reg(regD dst, rRegI src)
10117 %{
10118   predicate(UseXmmI2D);
10119   match(Set dst (ConvI2D src));
10120 
10121   format %{ "movdl $dst, $src\n\t"
10122             "cvtdq2pdl $dst, $dst\t# i2d" %}
10123   ins_encode %{
10124     __ movdl($dst$$XMMRegister, $src$$Register);
10125     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10126   %}
10127   ins_pipe(pipe_slow); // XXX
10128 %}
10129 
10130 instruct convL2F_reg_reg(regF dst, rRegL src)
10131 %{
10132   match(Set dst (ConvL2F src));
10133 
10134   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10135   ins_encode %{
10136     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10137   %}
10138   ins_pipe(pipe_slow); // XXX
10139 %}
10140 
10141 instruct convL2F_reg_mem(regF dst, memory src)
10142 %{
10143   match(Set dst (ConvL2F (LoadL src)));
10144 
10145   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10146   ins_encode %{
10147     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10148   %}
10149   ins_pipe(pipe_slow); // XXX
10150 %}
10151 
10152 instruct convL2D_reg_reg(regD dst, rRegL src)
10153 %{
10154   match(Set dst (ConvL2D src));
10155 
10156   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10157   ins_encode %{
10158     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10159   %}
10160   ins_pipe(pipe_slow); // XXX
10161 %}
10162 
10163 instruct convL2D_reg_mem(regD dst, memory src)
10164 %{
10165   match(Set dst (ConvL2D (LoadL src)));
10166 
10167   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10168   ins_encode %{
10169     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10170   %}
10171   ins_pipe(pipe_slow); // XXX
10172 %}
10173 
10174 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10175 %{
10176   match(Set dst (ConvI2L src));
10177 
10178   ins_cost(125);
10179   format %{ "movslq  $dst, $src\t# i2l" %}
10180   ins_encode %{
10181     __ movslq($dst$$Register, $src$$Register);
10182   %}
10183   ins_pipe(ialu_reg_reg);
10184 %}
10185 
10186 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10187 // %{
10188 //   match(Set dst (ConvI2L src));
10189 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10190 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10191 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10192 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10193 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10194 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10195 
10196 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10197 //   ins_encode(enc_copy(dst, src));
10198 // //   opcode(0x63); // needs REX.W
10199 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10200 //   ins_pipe(ialu_reg_reg);
10201 // %}
10202 
10203 // Zero-extend convert int to long
10204 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10205 %{
10206   match(Set dst (AndL (ConvI2L src) mask));
10207 
10208   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10209   ins_encode %{
10210     if ($dst$$reg != $src$$reg) {
10211       __ movl($dst$$Register, $src$$Register);
10212     }
10213   %}
10214   ins_pipe(ialu_reg_reg);
10215 %}
10216 
10217 // Zero-extend convert int to long
10218 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10219 %{
10220   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10221 
10222   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10223   ins_encode %{
10224     __ movl($dst$$Register, $src$$Address);
10225   %}
10226   ins_pipe(ialu_reg_mem);
10227 %}
10228 
10229 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10230 %{
10231   match(Set dst (AndL src mask));
10232 
10233   format %{ "movl    $dst, $src\t# zero-extend long" %}
10234   ins_encode %{
10235     __ movl($dst$$Register, $src$$Register);
10236   %}
10237   ins_pipe(ialu_reg_reg);
10238 %}
10239 
10240 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10241 %{
10242   match(Set dst (ConvL2I src));
10243 
10244   format %{ "movl    $dst, $src\t# l2i" %}
10245   ins_encode %{
10246     __ movl($dst$$Register, $src$$Register);
10247   %}
10248   ins_pipe(ialu_reg_reg);
10249 %}
10250 
10251 
10252 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10253   match(Set dst (MoveF2I src));
10254   effect(DEF dst, USE src);
10255 
10256   ins_cost(125);
10257   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10258   ins_encode %{
10259     __ movl($dst$$Register, Address(rsp, $src$$disp));
10260   %}
10261   ins_pipe(ialu_reg_mem);
10262 %}
10263 
10264 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10265   match(Set dst (MoveI2F src));
10266   effect(DEF dst, USE src);
10267 
10268   ins_cost(125);
10269   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10270   ins_encode %{
10271     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10272   %}
10273   ins_pipe(pipe_slow);
10274 %}
10275 
10276 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10277   match(Set dst (MoveD2L src));
10278   effect(DEF dst, USE src);
10279 
10280   ins_cost(125);
10281   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10282   ins_encode %{
10283     __ movq($dst$$Register, Address(rsp, $src$$disp));
10284   %}
10285   ins_pipe(ialu_reg_mem);
10286 %}
10287 
10288 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10289   predicate(!UseXmmLoadAndClearUpper);
10290   match(Set dst (MoveL2D src));
10291   effect(DEF dst, USE src);
10292 
10293   ins_cost(125);
10294   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10295   ins_encode %{
10296     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10297   %}
10298   ins_pipe(pipe_slow);
10299 %}
10300 
10301 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10302   predicate(UseXmmLoadAndClearUpper);
10303   match(Set dst (MoveL2D src));
10304   effect(DEF dst, USE src);
10305 
10306   ins_cost(125);
10307   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10308   ins_encode %{
10309     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10310   %}
10311   ins_pipe(pipe_slow);
10312 %}
10313 
10314 
10315 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10316   match(Set dst (MoveF2I src));
10317   effect(DEF dst, USE src);
10318 
10319   ins_cost(95); // XXX
10320   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10321   ins_encode %{
10322     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10323   %}
10324   ins_pipe(pipe_slow);
10325 %}
10326 
10327 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10328   match(Set dst (MoveI2F src));
10329   effect(DEF dst, USE src);
10330 
10331   ins_cost(100);
10332   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10333   ins_encode %{
10334     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10335   %}
10336   ins_pipe( ialu_mem_reg );
10337 %}
10338 
10339 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10340   match(Set dst (MoveD2L src));
10341   effect(DEF dst, USE src);
10342 
10343   ins_cost(95); // XXX
10344   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10345   ins_encode %{
10346     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10347   %}
10348   ins_pipe(pipe_slow);
10349 %}
10350 
10351 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10352   match(Set dst (MoveL2D src));
10353   effect(DEF dst, USE src);
10354 
10355   ins_cost(100);
10356   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10357   ins_encode %{
10358     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10359   %}
10360   ins_pipe(ialu_mem_reg);
10361 %}
10362 
10363 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10364   match(Set dst (MoveF2I src));
10365   effect(DEF dst, USE src);
10366   ins_cost(85);
10367   format %{ "movd    $dst,$src\t# MoveF2I" %}
10368   ins_encode %{
10369     __ movdl($dst$$Register, $src$$XMMRegister);
10370   %}
10371   ins_pipe( pipe_slow );
10372 %}
10373 
10374 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10375   match(Set dst (MoveD2L src));
10376   effect(DEF dst, USE src);
10377   ins_cost(85);
10378   format %{ "movd    $dst,$src\t# MoveD2L" %}
10379   ins_encode %{
10380     __ movdq($dst$$Register, $src$$XMMRegister);
10381   %}
10382   ins_pipe( pipe_slow );
10383 %}
10384 
10385 // The next instructions have long latency and use Int unit. Set high cost.
10386 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10387   match(Set dst (MoveI2F src));
10388   effect(DEF dst, USE src);
10389   ins_cost(300);
10390   format %{ "movd    $dst,$src\t# MoveI2F" %}
10391   ins_encode %{
10392     __ movdl($dst$$XMMRegister, $src$$Register);
10393   %}
10394   ins_pipe( pipe_slow );
10395 %}
10396 
10397 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10398   match(Set dst (MoveL2D src));
10399   effect(DEF dst, USE src);
10400   ins_cost(300);
10401   format %{ "movd    $dst,$src\t# MoveL2D" %}
10402   ins_encode %{
10403      __ movdq($dst$$XMMRegister, $src$$Register);
10404   %}
10405   ins_pipe( pipe_slow );
10406 %}
10407 
10408 // Replicate scalar to packed byte (1 byte) values in xmm
10409 instruct Repl8B_reg(regD dst, regD src) %{
10410   match(Set dst (Replicate8B src));
10411   format %{ "MOVDQA  $dst,$src\n\t"
10412             "PUNPCKLBW $dst,$dst\n\t"
10413             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10414   ins_encode %{
10415     if ($dst$$reg != $src$$reg) {
10416       __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
10417     }
10418     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
10419     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10420   %}
10421   ins_pipe( pipe_slow );
10422 %}
10423 
10424 // Replicate scalar to packed byte (1 byte) values in xmm
10425 instruct Repl8B_rRegI(regD dst, rRegI src) %{
10426   match(Set dst (Replicate8B src));
10427   format %{ "MOVD    $dst,$src\n\t"
10428             "PUNPCKLBW $dst,$dst\n\t"
10429             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10430   ins_encode %{
10431     __ movdl($dst$$XMMRegister, $src$$Register);
10432     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
10433     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10434   %}
10435   ins_pipe( pipe_slow );
10436 %}
10437 
10438 // Replicate scalar zero to packed byte (1 byte) values in xmm
10439 instruct Repl8B_immI0(regD dst, immI0 zero) %{
10440   match(Set dst (Replicate8B zero));
10441   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
10442   ins_encode %{
10443     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10444   %}
10445   ins_pipe( fpu_reg_reg );
10446 %}
10447 
10448 // Replicate scalar to packed shore (2 byte) values in xmm
10449 instruct Repl4S_reg(regD dst, regD src) %{
10450   match(Set dst (Replicate4S src));
10451   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
10452   ins_encode %{
10453     __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
10454   %}
10455   ins_pipe( fpu_reg_reg );
10456 %}
10457 
10458 // Replicate scalar to packed shore (2 byte) values in xmm
10459 instruct Repl4S_rRegI(regD dst, rRegI src) %{
10460   match(Set dst (Replicate4S src));
10461   format %{ "MOVD    $dst,$src\n\t"
10462             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
10463   ins_encode %{
10464     __ movdl($dst$$XMMRegister, $src$$Register);
10465     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10466   %}
10467   ins_pipe( fpu_reg_reg );
10468 %}
10469 
10470 // Replicate scalar zero to packed short (2 byte) values in xmm
10471 instruct Repl4S_immI0(regD dst, immI0 zero) %{
10472   match(Set dst (Replicate4S zero));
10473   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
10474   ins_encode %{
10475     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10476   %}
10477   ins_pipe( fpu_reg_reg );
10478 %}
10479 
10480 // Replicate scalar to packed char (2 byte) values in xmm
10481 instruct Repl4C_reg(regD dst, regD src) %{
10482   match(Set dst (Replicate4C src));
10483   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
10484   ins_encode %{
10485     __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
10486   %}
10487   ins_pipe( fpu_reg_reg );
10488 %}
10489 
10490 // Replicate scalar to packed char (2 byte) values in xmm
10491 instruct Repl4C_rRegI(regD dst, rRegI src) %{
10492   match(Set dst (Replicate4C src));
10493   format %{ "MOVD    $dst,$src\n\t"
10494             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
10495   ins_encode %{
10496     __ movdl($dst$$XMMRegister, $src$$Register);
10497     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10498   %}
10499   ins_pipe( fpu_reg_reg );
10500 %}
10501 
10502 // Replicate scalar zero to packed char (2 byte) values in xmm
10503 instruct Repl4C_immI0(regD dst, immI0 zero) %{
10504   match(Set dst (Replicate4C zero));
10505   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
10506   ins_encode %{
10507     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10508   %}
10509   ins_pipe( fpu_reg_reg );
10510 %}
10511 
10512 // Replicate scalar to packed integer (4 byte) values in xmm
10513 instruct Repl2I_reg(regD dst, regD src) %{
10514   match(Set dst (Replicate2I src));
10515   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
10516   ins_encode %{
10517     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
10518   %}
10519   ins_pipe( fpu_reg_reg );
10520 %}
10521 
10522 // Replicate scalar to packed integer (4 byte) values in xmm
10523 instruct Repl2I_rRegI(regD dst, rRegI src) %{
10524   match(Set dst (Replicate2I src));
10525   format %{ "MOVD   $dst,$src\n\t"
10526             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
10527   ins_encode %{
10528     __ movdl($dst$$XMMRegister, $src$$Register);
10529     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10530   %}
10531   ins_pipe( fpu_reg_reg );
10532 %}
10533 
10534 // Replicate scalar zero to packed integer (2 byte) values in xmm
10535 instruct Repl2I_immI0(regD dst, immI0 zero) %{
10536   match(Set dst (Replicate2I zero));
10537   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
10538   ins_encode %{
10539     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10540   %}
10541   ins_pipe( fpu_reg_reg );
10542 %}
10543 
10544 // Replicate scalar to packed single precision floating point values in xmm
10545 instruct Repl2F_reg(regD dst, regD src) %{
10546   match(Set dst (Replicate2F src));
10547   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10548   ins_encode %{
10549     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
10550   %}
10551   ins_pipe( fpu_reg_reg );
10552 %}
10553 
10554 // Replicate scalar to packed single precision floating point values in xmm
10555 instruct Repl2F_regF(regD dst, regF src) %{
10556   match(Set dst (Replicate2F src));
10557   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10558   ins_encode %{
10559     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
10560   %}
10561   ins_pipe( fpu_reg_reg );
10562 %}
10563 
10564 // Replicate scalar to packed single precision floating point values in xmm
10565 instruct Repl2F_immF0(regD dst, immF0 zero) %{
10566   match(Set dst (Replicate2F zero));
10567   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
10568   ins_encode %{
10569     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10570   %}
10571   ins_pipe( fpu_reg_reg );
10572 %}
10573 
10574 
10575 // =======================================================================
10576 // fast clearing of an array
10577 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
10578                   rFlagsReg cr)
10579 %{
10580   match(Set dummy (ClearArray cnt base));
10581   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
10582 
10583   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
10584             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
10585   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
10586              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
10587   ins_pipe(pipe_slow);
10588 %}
10589 
10590 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10591                         rax_RegI result, regD tmp1, rFlagsReg cr)
10592 %{
10593   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10594   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10595 
10596   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10597   ins_encode %{
10598     __ string_compare($str1$$Register, $str2$$Register,
10599                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10600                       $tmp1$$XMMRegister);
10601   %}
10602   ins_pipe( pipe_slow );
10603 %}
10604 
10605 // fast search of substring with known size.
10606 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
10607                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
10608 %{
10609   predicate(UseSSE42Intrinsics);
10610   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
10611   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
10612 
10613   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
10614   ins_encode %{
10615     int icnt2 = (int)$int_cnt2$$constant;
10616     if (icnt2 >= 8) {
10617       // IndexOf for constant substrings with size >= 8 elements
10618       // which don't need to be loaded through stack.
10619       __ string_indexofC8($str1$$Register, $str2$$Register,
10620                           $cnt1$$Register, $cnt2$$Register,
10621                           icnt2, $result$$Register,
10622                           $vec$$XMMRegister, $tmp$$Register);
10623     } else {
10624       // Small strings are loaded through stack if they cross page boundary.
10625       __ string_indexof($str1$$Register, $str2$$Register,
10626                         $cnt1$$Register, $cnt2$$Register,
10627                         icnt2, $result$$Register,
10628                         $vec$$XMMRegister, $tmp$$Register);
10629     }
10630   %}
10631   ins_pipe( pipe_slow );
10632 %}
10633 
10634 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
10635                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
10636 %{
10637   predicate(UseSSE42Intrinsics);
10638   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
10639   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
10640 
10641   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
10642   ins_encode %{
10643     __ string_indexof($str1$$Register, $str2$$Register,
10644                       $cnt1$$Register, $cnt2$$Register,
10645                       (-1), $result$$Register,
10646                       $vec$$XMMRegister, $tmp$$Register);
10647   %}
10648   ins_pipe( pipe_slow );
10649 %}
10650 
10651 // fast string equals
10652 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
10653                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
10654 %{
10655   match(Set result (StrEquals (Binary str1 str2) cnt));
10656   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
10657 
10658   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
10659   ins_encode %{
10660     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
10661                           $cnt$$Register, $result$$Register, $tmp3$$Register,
10662                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10663   %}
10664   ins_pipe( pipe_slow );
10665 %}
10666 
10667 // fast array equals
10668 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
10669                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
10670 %{
10671   match(Set result (AryEq ary1 ary2));
10672   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
10673   //ins_cost(300);
10674 
10675   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
10676   ins_encode %{
10677     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
10678                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
10679                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
10680   %}
10681   ins_pipe( pipe_slow );
10682 %}
10683 
10684 //----------Control Flow Instructions------------------------------------------
10685 // Signed compare Instructions
10686 
10687 // XXX more variants!!
10688 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
10689 %{
10690   match(Set cr (CmpI op1 op2));
10691   effect(DEF cr, USE op1, USE op2);
10692 
10693   format %{ "cmpl    $op1, $op2" %}
10694   opcode(0x3B);  /* Opcode 3B /r */
10695   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10696   ins_pipe(ialu_cr_reg_reg);
10697 %}
10698 
10699 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
10700 %{
10701   match(Set cr (CmpI op1 op2));
10702 
10703   format %{ "cmpl    $op1, $op2" %}
10704   opcode(0x81, 0x07); /* Opcode 81 /7 */
10705   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10706   ins_pipe(ialu_cr_reg_imm);
10707 %}
10708 
10709 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
10710 %{
10711   match(Set cr (CmpI op1 (LoadI op2)));
10712 
10713   ins_cost(500); // XXX
10714   format %{ "cmpl    $op1, $op2" %}
10715   opcode(0x3B); /* Opcode 3B /r */
10716   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10717   ins_pipe(ialu_cr_reg_mem);
10718 %}
10719 
10720 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
10721 %{
10722   match(Set cr (CmpI src zero));
10723 
10724   format %{ "testl   $src, $src" %}
10725   opcode(0x85);
10726   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10727   ins_pipe(ialu_cr_reg_imm);
10728 %}
10729 
10730 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
10731 %{
10732   match(Set cr (CmpI (AndI src con) zero));
10733 
10734   format %{ "testl   $src, $con" %}
10735   opcode(0xF7, 0x00);
10736   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
10737   ins_pipe(ialu_cr_reg_imm);
10738 %}
10739 
10740 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
10741 %{
10742   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
10743 
10744   format %{ "testl   $src, $mem" %}
10745   opcode(0x85);
10746   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
10747   ins_pipe(ialu_cr_reg_mem);
10748 %}
10749 
10750 // Unsigned compare Instructions; really, same as signed except they
10751 // produce an rFlagsRegU instead of rFlagsReg.
10752 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
10753 %{
10754   match(Set cr (CmpU op1 op2));
10755 
10756   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10757   opcode(0x3B); /* Opcode 3B /r */
10758   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
10759   ins_pipe(ialu_cr_reg_reg);
10760 %}
10761 
10762 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
10763 %{
10764   match(Set cr (CmpU op1 op2));
10765 
10766   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10767   opcode(0x81,0x07); /* Opcode 81 /7 */
10768   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
10769   ins_pipe(ialu_cr_reg_imm);
10770 %}
10771 
10772 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
10773 %{
10774   match(Set cr (CmpU op1 (LoadI op2)));
10775 
10776   ins_cost(500); // XXX
10777   format %{ "cmpl    $op1, $op2\t# unsigned" %}
10778   opcode(0x3B); /* Opcode 3B /r */
10779   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
10780   ins_pipe(ialu_cr_reg_mem);
10781 %}
10782 
10783 // // // Cisc-spilled version of cmpU_rReg
10784 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
10785 // //%{
10786 // //  match(Set cr (CmpU (LoadI op1) op2));
10787 // //
10788 // //  format %{ "CMPu   $op1,$op2" %}
10789 // //  ins_cost(500);
10790 // //  opcode(0x39);  /* Opcode 39 /r */
10791 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10792 // //%}
10793 
10794 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
10795 %{
10796   match(Set cr (CmpU src zero));
10797 
10798   format %{ "testl  $src, $src\t# unsigned" %}
10799   opcode(0x85);
10800   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
10801   ins_pipe(ialu_cr_reg_imm);
10802 %}
10803 
10804 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
10805 %{
10806   match(Set cr (CmpP op1 op2));
10807 
10808   format %{ "cmpq    $op1, $op2\t# ptr" %}
10809   opcode(0x3B); /* Opcode 3B /r */
10810   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10811   ins_pipe(ialu_cr_reg_reg);
10812 %}
10813 
10814 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
10815 %{
10816   match(Set cr (CmpP op1 (LoadP op2)));
10817 
10818   ins_cost(500); // XXX
10819   format %{ "cmpq    $op1, $op2\t# ptr" %}
10820   opcode(0x3B); /* Opcode 3B /r */
10821   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10822   ins_pipe(ialu_cr_reg_mem);
10823 %}
10824 
10825 // // // Cisc-spilled version of cmpP_rReg
10826 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
10827 // //%{
10828 // //  match(Set cr (CmpP (LoadP op1) op2));
10829 // //
10830 // //  format %{ "CMPu   $op1,$op2" %}
10831 // //  ins_cost(500);
10832 // //  opcode(0x39);  /* Opcode 39 /r */
10833 // //  ins_encode( OpcP, reg_mem( op1, op2) );
10834 // //%}
10835 
10836 // XXX this is generalized by compP_rReg_mem???
10837 // Compare raw pointer (used in out-of-heap check).
10838 // Only works because non-oop pointers must be raw pointers
10839 // and raw pointers have no anti-dependencies.
10840 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
10841 %{
10842   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
10843   match(Set cr (CmpP op1 (LoadP op2)));
10844 
10845   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
10846   opcode(0x3B); /* Opcode 3B /r */
10847   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10848   ins_pipe(ialu_cr_reg_mem);
10849 %}
10850 
10851 // This will generate a signed flags result. This should be OK since
10852 // any compare to a zero should be eq/neq.
10853 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
10854 %{
10855   match(Set cr (CmpP src zero));
10856 
10857   format %{ "testq   $src, $src\t# ptr" %}
10858   opcode(0x85);
10859   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
10860   ins_pipe(ialu_cr_reg_imm);
10861 %}
10862 
10863 // This will generate a signed flags result. This should be OK since
10864 // any compare to a zero should be eq/neq.
10865 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
10866 %{
10867   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
10868   match(Set cr (CmpP (LoadP op) zero));
10869 
10870   ins_cost(500); // XXX
10871   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
10872   opcode(0xF7); /* Opcode F7 /0 */
10873   ins_encode(REX_mem_wide(op),
10874              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
10875   ins_pipe(ialu_cr_reg_imm);
10876 %}
10877 
10878 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
10879 %{
10880   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
10881   match(Set cr (CmpP (LoadP mem) zero));
10882 
10883   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
10884   ins_encode %{
10885     __ cmpq(r12, $mem$$Address);
10886   %}
10887   ins_pipe(ialu_cr_reg_mem);
10888 %}
10889 
10890 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
10891 %{
10892   match(Set cr (CmpN op1 op2));
10893 
10894   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10895   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
10896   ins_pipe(ialu_cr_reg_reg);
10897 %}
10898 
10899 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
10900 %{
10901   match(Set cr (CmpN src (LoadN mem)));
10902 
10903   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
10904   ins_encode %{
10905     __ cmpl($src$$Register, $mem$$Address);
10906   %}
10907   ins_pipe(ialu_cr_reg_mem);
10908 %}
10909 
10910 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
10911   match(Set cr (CmpN op1 op2));
10912 
10913   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
10914   ins_encode %{
10915     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
10916   %}
10917   ins_pipe(ialu_cr_reg_imm);
10918 %}
10919 
10920 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
10921 %{
10922   match(Set cr (CmpN src (LoadN mem)));
10923 
10924   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
10925   ins_encode %{
10926     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
10927   %}
10928   ins_pipe(ialu_cr_reg_mem);
10929 %}
10930 
10931 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
10932   match(Set cr (CmpN src zero));
10933 
10934   format %{ "testl   $src, $src\t# compressed ptr" %}
10935   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
10936   ins_pipe(ialu_cr_reg_imm);
10937 %}
10938 
10939 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
10940 %{
10941   predicate(Universe::narrow_oop_base() != NULL);
10942   match(Set cr (CmpN (LoadN mem) zero));
10943 
10944   ins_cost(500); // XXX
10945   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
10946   ins_encode %{
10947     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
10948   %}
10949   ins_pipe(ialu_cr_reg_mem);
10950 %}
10951 
10952 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
10953 %{
10954   predicate(Universe::narrow_oop_base() == NULL);
10955   match(Set cr (CmpN (LoadN mem) zero));
10956 
10957   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
10958   ins_encode %{
10959     __ cmpl(r12, $mem$$Address);
10960   %}
10961   ins_pipe(ialu_cr_reg_mem);
10962 %}
10963 
10964 // Yanked all unsigned pointer compare operations.
10965 // Pointer compares are done with CmpP which is already unsigned.
10966 
10967 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
10968 %{
10969   match(Set cr (CmpL op1 op2));
10970 
10971   format %{ "cmpq    $op1, $op2" %}
10972   opcode(0x3B);  /* Opcode 3B /r */
10973   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
10974   ins_pipe(ialu_cr_reg_reg);
10975 %}
10976 
10977 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
10978 %{
10979   match(Set cr (CmpL op1 op2));
10980 
10981   format %{ "cmpq    $op1, $op2" %}
10982   opcode(0x81, 0x07); /* Opcode 81 /7 */
10983   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
10984   ins_pipe(ialu_cr_reg_imm);
10985 %}
10986 
10987 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
10988 %{
10989   match(Set cr (CmpL op1 (LoadL op2)));
10990 
10991   format %{ "cmpq    $op1, $op2" %}
10992   opcode(0x3B); /* Opcode 3B /r */
10993   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
10994   ins_pipe(ialu_cr_reg_mem);
10995 %}
10996 
10997 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
10998 %{
10999   match(Set cr (CmpL src zero));
11000 
11001   format %{ "testq   $src, $src" %}
11002   opcode(0x85);
11003   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11004   ins_pipe(ialu_cr_reg_imm);
11005 %}
11006 
11007 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11008 %{
11009   match(Set cr (CmpL (AndL src con) zero));
11010 
11011   format %{ "testq   $src, $con\t# long" %}
11012   opcode(0xF7, 0x00);
11013   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11014   ins_pipe(ialu_cr_reg_imm);
11015 %}
11016 
11017 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11018 %{
11019   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11020 
11021   format %{ "testq   $src, $mem" %}
11022   opcode(0x85);
11023   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11024   ins_pipe(ialu_cr_reg_mem);
11025 %}
11026 
11027 // Manifest a CmpL result in an integer register.  Very painful.
11028 // This is the test to avoid.
11029 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11030 %{
11031   match(Set dst (CmpL3 src1 src2));
11032   effect(KILL flags);
11033 
11034   ins_cost(275); // XXX
11035   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11036             "movl    $dst, -1\n\t"
11037             "jl,s    done\n\t"
11038             "setne   $dst\n\t"
11039             "movzbl  $dst, $dst\n\t"
11040     "done:" %}
11041   ins_encode(cmpl3_flag(src1, src2, dst));
11042   ins_pipe(pipe_slow);
11043 %}
11044 
11045 //----------Max and Min--------------------------------------------------------
11046 // Min Instructions
11047 
11048 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
11049 %{
11050   effect(USE_DEF dst, USE src, USE cr);
11051 
11052   format %{ "cmovlgt $dst, $src\t# min" %}
11053   opcode(0x0F, 0x4F);
11054   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11055   ins_pipe(pipe_cmov_reg);
11056 %}
11057 
11058 
11059 instruct minI_rReg(rRegI dst, rRegI src)
11060 %{
11061   match(Set dst (MinI dst src));
11062 
11063   ins_cost(200);
11064   expand %{
11065     rFlagsReg cr;
11066     compI_rReg(cr, dst, src);
11067     cmovI_reg_g(dst, src, cr);
11068   %}
11069 %}
11070 
11071 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
11072 %{
11073   effect(USE_DEF dst, USE src, USE cr);
11074 
11075   format %{ "cmovllt $dst, $src\t# max" %}
11076   opcode(0x0F, 0x4C);
11077   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
11078   ins_pipe(pipe_cmov_reg);
11079 %}
11080 
11081 
11082 instruct maxI_rReg(rRegI dst, rRegI src)
11083 %{
11084   match(Set dst (MaxI dst src));
11085 
11086   ins_cost(200);
11087   expand %{
11088     rFlagsReg cr;
11089     compI_rReg(cr, dst, src);
11090     cmovI_reg_l(dst, src, cr);
11091   %}
11092 %}
11093 
11094 // ============================================================================
11095 // Branch Instructions
11096 
11097 // Jump Direct - Label defines a relative address from JMP+1
11098 instruct jmpDir(label labl)
11099 %{
11100   match(Goto);
11101   effect(USE labl);
11102 
11103   ins_cost(300);
11104   format %{ "jmp     $labl" %}
11105   size(5);
11106   ins_encode %{
11107     Label* L = $labl$$label;
11108     __ jmp(*L, false); // Always long jump
11109   %}
11110   ins_pipe(pipe_jmp);
11111 %}
11112 
11113 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11114 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
11115 %{
11116   match(If cop cr);
11117   effect(USE labl);
11118 
11119   ins_cost(300);
11120   format %{ "j$cop     $labl" %}
11121   size(6);
11122   ins_encode %{
11123     Label* L = $labl$$label;
11124     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11125   %}
11126   ins_pipe(pipe_jcc);
11127 %}
11128 
11129 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11130 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
11131 %{
11132   match(CountedLoopEnd cop cr);
11133   effect(USE labl);
11134 
11135   ins_cost(300);
11136   format %{ "j$cop     $labl\t# loop end" %}
11137   size(6);
11138   ins_encode %{
11139     Label* L = $labl$$label;
11140     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11141   %}
11142   ins_pipe(pipe_jcc);
11143 %}
11144 
11145 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11146 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11147   match(CountedLoopEnd cop cmp);
11148   effect(USE labl);
11149 
11150   ins_cost(300);
11151   format %{ "j$cop,u   $labl\t# loop end" %}
11152   size(6);
11153   ins_encode %{
11154     Label* L = $labl$$label;
11155     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11156   %}
11157   ins_pipe(pipe_jcc);
11158 %}
11159 
11160 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11161   match(CountedLoopEnd cop cmp);
11162   effect(USE labl);
11163 
11164   ins_cost(200);
11165   format %{ "j$cop,u   $labl\t# loop end" %}
11166   size(6);
11167   ins_encode %{
11168     Label* L = $labl$$label;
11169     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11170   %}
11171   ins_pipe(pipe_jcc);
11172 %}
11173 
11174 // Jump Direct Conditional - using unsigned comparison
11175 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11176   match(If cop cmp);
11177   effect(USE labl);
11178 
11179   ins_cost(300);
11180   format %{ "j$cop,u  $labl" %}
11181   size(6);
11182   ins_encode %{
11183     Label* L = $labl$$label;
11184     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11185   %}
11186   ins_pipe(pipe_jcc);
11187 %}
11188 
11189 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11190   match(If cop cmp);
11191   effect(USE labl);
11192 
11193   ins_cost(200);
11194   format %{ "j$cop,u  $labl" %}
11195   size(6);
11196   ins_encode %{
11197     Label* L = $labl$$label;
11198     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11199   %}
11200   ins_pipe(pipe_jcc);
11201 %}
11202 
11203 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11204   match(If cop cmp);
11205   effect(USE labl);
11206 
11207   ins_cost(200);
11208   format %{ $$template
11209     if ($cop$$cmpcode == Assembler::notEqual) {
11210       $$emit$$"jp,u   $labl\n\t"
11211       $$emit$$"j$cop,u   $labl"
11212     } else {
11213       $$emit$$"jp,u   done\n\t"
11214       $$emit$$"j$cop,u   $labl\n\t"
11215       $$emit$$"done:"
11216     }
11217   %}
11218   ins_encode %{
11219     Label* l = $labl$$label;
11220     if ($cop$$cmpcode == Assembler::notEqual) {
11221       __ jcc(Assembler::parity, *l, false);
11222       __ jcc(Assembler::notEqual, *l, false);
11223     } else if ($cop$$cmpcode == Assembler::equal) {
11224       Label done;
11225       __ jccb(Assembler::parity, done);
11226       __ jcc(Assembler::equal, *l, false);
11227       __ bind(done);
11228     } else {
11229        ShouldNotReachHere();
11230     }
11231   %}
11232   ins_pipe(pipe_jcc);
11233 %}
11234 
11235 // ============================================================================
11236 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
11237 // superklass array for an instance of the superklass.  Set a hidden
11238 // internal cache on a hit (cache is checked with exposed code in
11239 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
11240 // encoding ALSO sets flags.
11241 
11242 instruct partialSubtypeCheck(rdi_RegP result,
11243                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11244                              rFlagsReg cr)
11245 %{
11246   match(Set result (PartialSubtypeCheck sub super));
11247   effect(KILL rcx, KILL cr);
11248 
11249   ins_cost(1100);  // slightly larger than the next version
11250   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11251             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
11252             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
11253             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
11254             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
11255             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11256             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
11257     "miss:\t" %}
11258 
11259   opcode(0x1); // Force a XOR of RDI
11260   ins_encode(enc_PartialSubtypeCheck());
11261   ins_pipe(pipe_slow);
11262 %}
11263 
11264 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
11265                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
11266                                      immP0 zero,
11267                                      rdi_RegP result)
11268 %{
11269   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
11270   effect(KILL rcx, KILL result);
11271 
11272   ins_cost(1000);
11273   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
11274             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
11275             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
11276             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
11277             "jne,s   miss\t\t# Missed: flags nz\n\t"
11278             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
11279     "miss:\t" %}
11280 
11281   opcode(0x0); // No need to XOR RDI
11282   ins_encode(enc_PartialSubtypeCheck());
11283   ins_pipe(pipe_slow);
11284 %}
11285 
11286 // ============================================================================
11287 // Branch Instructions -- short offset versions
11288 //
11289 // These instructions are used to replace jumps of a long offset (the default
11290 // match) with jumps of a shorter offset.  These instructions are all tagged
11291 // with the ins_short_branch attribute, which causes the ADLC to suppress the
11292 // match rules in general matching.  Instead, the ADLC generates a conversion
11293 // method in the MachNode which can be used to do in-place replacement of the
11294 // long variant with the shorter variant.  The compiler will determine if a
11295 // branch can be taken by the is_short_branch_offset() predicate in the machine
11296 // specific code section of the file.
11297 
11298 // Jump Direct - Label defines a relative address from JMP+1
11299 instruct jmpDir_short(label labl) %{
11300   match(Goto);
11301   effect(USE labl);
11302 
11303   ins_cost(300);
11304   format %{ "jmp,s   $labl" %}
11305   size(2);
11306   ins_encode %{
11307     Label* L = $labl$$label;
11308     __ jmpb(*L);
11309   %}
11310   ins_pipe(pipe_jmp);
11311   ins_short_branch(1);
11312 %}
11313 
11314 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11315 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
11316   match(If cop cr);
11317   effect(USE labl);
11318 
11319   ins_cost(300);
11320   format %{ "j$cop,s   $labl" %}
11321   size(2);
11322   ins_encode %{
11323     Label* L = $labl$$label;
11324     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11325   %}
11326   ins_pipe(pipe_jcc);
11327   ins_short_branch(1);
11328 %}
11329 
11330 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11331 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
11332   match(CountedLoopEnd cop cr);
11333   effect(USE labl);
11334 
11335   ins_cost(300);
11336   format %{ "j$cop,s   $labl\t# loop end" %}
11337   size(2);
11338   ins_encode %{
11339     Label* L = $labl$$label;
11340     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11341   %}
11342   ins_pipe(pipe_jcc);
11343   ins_short_branch(1);
11344 %}
11345 
11346 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11347 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11348   match(CountedLoopEnd cop cmp);
11349   effect(USE labl);
11350 
11351   ins_cost(300);
11352   format %{ "j$cop,us  $labl\t# loop end" %}
11353   size(2);
11354   ins_encode %{
11355     Label* L = $labl$$label;
11356     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11357   %}
11358   ins_pipe(pipe_jcc);
11359   ins_short_branch(1);
11360 %}
11361 
11362 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11363   match(CountedLoopEnd cop cmp);
11364   effect(USE labl);
11365 
11366   ins_cost(300);
11367   format %{ "j$cop,us  $labl\t# loop end" %}
11368   size(2);
11369   ins_encode %{
11370     Label* L = $labl$$label;
11371     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11372   %}
11373   ins_pipe(pipe_jcc);
11374   ins_short_branch(1);
11375 %}
11376 
11377 // Jump Direct Conditional - using unsigned comparison
11378 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
11379   match(If cop cmp);
11380   effect(USE labl);
11381 
11382   ins_cost(300);
11383   format %{ "j$cop,us  $labl" %}
11384   size(2);
11385   ins_encode %{
11386     Label* L = $labl$$label;
11387     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11388   %}
11389   ins_pipe(pipe_jcc);
11390   ins_short_branch(1);
11391 %}
11392 
11393 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
11394   match(If cop cmp);
11395   effect(USE labl);
11396 
11397   ins_cost(300);
11398   format %{ "j$cop,us  $labl" %}
11399   size(2);
11400   ins_encode %{
11401     Label* L = $labl$$label;
11402     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
11403   %}
11404   ins_pipe(pipe_jcc);
11405   ins_short_branch(1);
11406 %}
11407 
11408 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
11409   match(If cop cmp);
11410   effect(USE labl);
11411 
11412   ins_cost(300);
11413   format %{ $$template
11414     if ($cop$$cmpcode == Assembler::notEqual) {
11415       $$emit$$"jp,u,s   $labl\n\t"
11416       $$emit$$"j$cop,u,s   $labl"
11417     } else {
11418       $$emit$$"jp,u,s   done\n\t"
11419       $$emit$$"j$cop,u,s  $labl\n\t"
11420       $$emit$$"done:"
11421     }
11422   %}
11423   size(4);
11424   ins_encode %{
11425     Label* l = $labl$$label;
11426     if ($cop$$cmpcode == Assembler::notEqual) {
11427       __ jccb(Assembler::parity, *l);
11428       __ jccb(Assembler::notEqual, *l);
11429     } else if ($cop$$cmpcode == Assembler::equal) {
11430       Label done;
11431       __ jccb(Assembler::parity, done);
11432       __ jccb(Assembler::equal, *l);
11433       __ bind(done);
11434     } else {
11435        ShouldNotReachHere();
11436     }
11437   %}
11438   ins_pipe(pipe_jcc);
11439   ins_short_branch(1);
11440 %}
11441 
11442 // ============================================================================
11443 // inlined locking and unlocking
11444 
11445 instruct cmpFastLock(rFlagsReg cr,
11446                      rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
11447 %{
11448   match(Set cr (FastLock object box));
11449   effect(TEMP tmp, TEMP scr, USE_KILL box);
11450 
11451   ins_cost(300);
11452   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
11453   ins_encode(Fast_Lock(object, box, tmp, scr));
11454   ins_pipe(pipe_slow);
11455 %}
11456 
11457 instruct cmpFastUnlock(rFlagsReg cr,
11458                        rRegP object, rax_RegP box, rRegP tmp)
11459 %{
11460   match(Set cr (FastUnlock object box));
11461   effect(TEMP tmp, USE_KILL box);
11462 
11463   ins_cost(300);
11464   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
11465   ins_encode(Fast_Unlock(object, box, tmp));
11466   ins_pipe(pipe_slow);
11467 %}
11468 
11469 
11470 // ============================================================================
11471 // Safepoint Instructions
11472 instruct safePoint_poll(rFlagsReg cr)
11473 %{
11474   predicate(!Assembler::is_polling_page_far());
11475   match(SafePoint);
11476   effect(KILL cr);
11477 
11478   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
11479             "# Safepoint: poll for GC" %}
11480   ins_cost(125);
11481   ins_encode %{
11482     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
11483     __ testl(rax, addr);
11484   %}
11485   ins_pipe(ialu_reg_mem);
11486 %}
11487 
11488 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
11489 %{
11490   predicate(Assembler::is_polling_page_far());
11491   match(SafePoint poll);
11492   effect(KILL cr, USE poll);
11493 
11494   format %{ "testl  rax, [$poll]\t"
11495             "# Safepoint: poll for GC" %}
11496   ins_cost(125);
11497   ins_encode %{
11498     __ relocate(relocInfo::poll_type);
11499     __ testl(rax, Address($poll$$Register, 0));
11500   %}
11501   ins_pipe(ialu_reg_mem);
11502 %}
11503 
11504 // ============================================================================
11505 // Procedure Call/Return Instructions
11506 // Call Java Static Instruction
11507 // Note: If this code changes, the corresponding ret_addr_offset() and
11508 //       compute_padding() functions will have to be adjusted.
11509 instruct CallStaticJavaDirect(method meth) %{
11510   match(CallStaticJava);
11511   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
11512   effect(USE meth);
11513 
11514   ins_cost(300);
11515   format %{ "call,static " %}
11516   opcode(0xE8); /* E8 cd */
11517   ins_encode(Java_Static_Call(meth), call_epilog);
11518   ins_pipe(pipe_slow);
11519   ins_alignment(4);
11520 %}
11521 
11522 // Call Java Static Instruction (method handle version)
11523 // Note: If this code changes, the corresponding ret_addr_offset() and
11524 //       compute_padding() functions will have to be adjusted.
11525 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
11526   match(CallStaticJava);
11527   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
11528   effect(USE meth);
11529   // RBP is saved by all callees (for interpreter stack correction).
11530   // We use it here for a similar purpose, in {preserve,restore}_SP.
11531 
11532   ins_cost(300);
11533   format %{ "call,static/MethodHandle " %}
11534   opcode(0xE8); /* E8 cd */
11535   ins_encode(preserve_SP,
11536              Java_Static_Call(meth),
11537              restore_SP,
11538              call_epilog);
11539   ins_pipe(pipe_slow);
11540   ins_alignment(4);
11541 %}
11542 
11543 // Call Java Dynamic Instruction
11544 // Note: If this code changes, the corresponding ret_addr_offset() and
11545 //       compute_padding() functions will have to be adjusted.
11546 instruct CallDynamicJavaDirect(method meth)
11547 %{
11548   match(CallDynamicJava);
11549   effect(USE meth);
11550 
11551   ins_cost(300);
11552   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
11553             "call,dynamic " %}
11554   opcode(0xE8); /* E8 cd */
11555   ins_encode(Java_Dynamic_Call(meth), call_epilog);
11556   ins_pipe(pipe_slow);
11557   ins_alignment(4);
11558 %}
11559 
11560 // Call Runtime Instruction
11561 instruct CallRuntimeDirect(method meth)
11562 %{
11563   match(CallRuntime);
11564   effect(USE meth);
11565 
11566   ins_cost(300);
11567   format %{ "call,runtime " %}
11568   opcode(0xE8); /* E8 cd */
11569   ins_encode(Java_To_Runtime(meth));
11570   ins_pipe(pipe_slow);
11571 %}
11572 
11573 // Call runtime without safepoint
11574 instruct CallLeafDirect(method meth)
11575 %{
11576   match(CallLeaf);
11577   effect(USE meth);
11578 
11579   ins_cost(300);
11580   format %{ "call_leaf,runtime " %}
11581   opcode(0xE8); /* E8 cd */
11582   ins_encode(Java_To_Runtime(meth));
11583   ins_pipe(pipe_slow);
11584 %}
11585 
11586 // Call runtime without safepoint
11587 instruct CallLeafNoFPDirect(method meth)
11588 %{
11589   match(CallLeafNoFP);
11590   effect(USE meth);
11591 
11592   ins_cost(300);
11593   format %{ "call_leaf_nofp,runtime " %}
11594   opcode(0xE8); /* E8 cd */
11595   ins_encode(Java_To_Runtime(meth));
11596   ins_pipe(pipe_slow);
11597 %}
11598 
11599 // Return Instruction
11600 // Remove the return address & jump to it.
11601 // Notice: We always emit a nop after a ret to make sure there is room
11602 // for safepoint patching
11603 instruct Ret()
11604 %{
11605   match(Return);
11606 
11607   format %{ "ret" %}
11608   opcode(0xC3);
11609   ins_encode(OpcP);
11610   ins_pipe(pipe_jmp);
11611 %}
11612 
11613 // Tail Call; Jump from runtime stub to Java code.
11614 // Also known as an 'interprocedural jump'.
11615 // Target of jump will eventually return to caller.
11616 // TailJump below removes the return address.
11617 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
11618 %{
11619   match(TailCall jump_target method_oop);
11620 
11621   ins_cost(300);
11622   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
11623   opcode(0xFF, 0x4); /* Opcode FF /4 */
11624   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
11625   ins_pipe(pipe_jmp);
11626 %}
11627 
11628 // Tail Jump; remove the return address; jump to target.
11629 // TailCall above leaves the return address around.
11630 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
11631 %{
11632   match(TailJump jump_target ex_oop);
11633 
11634   ins_cost(300);
11635   format %{ "popq    rdx\t# pop return address\n\t"
11636             "jmp     $jump_target" %}
11637   opcode(0xFF, 0x4); /* Opcode FF /4 */
11638   ins_encode(Opcode(0x5a), // popq rdx
11639              REX_reg(jump_target), OpcP, reg_opc(jump_target));
11640   ins_pipe(pipe_jmp);
11641 %}
11642 
11643 // Create exception oop: created by stack-crawling runtime code.
11644 // Created exception is now available to this handler, and is setup
11645 // just prior to jumping to this handler.  No code emitted.
11646 instruct CreateException(rax_RegP ex_oop)
11647 %{
11648   match(Set ex_oop (CreateEx));
11649 
11650   size(0);
11651   // use the following format syntax
11652   format %{ "# exception oop is in rax; no code emitted" %}
11653   ins_encode();
11654   ins_pipe(empty);
11655 %}
11656 
11657 // Rethrow exception:
11658 // The exception oop will come in the first argument position.
11659 // Then JUMP (not call) to the rethrow stub code.
11660 instruct RethrowException()
11661 %{
11662   match(Rethrow);
11663 
11664   // use the following format syntax
11665   format %{ "jmp     rethrow_stub" %}
11666   ins_encode(enc_rethrow);
11667   ins_pipe(pipe_jmp);
11668 %}
11669 
11670 
11671 //----------PEEPHOLE RULES-----------------------------------------------------
11672 // These must follow all instruction definitions as they use the names
11673 // defined in the instructions definitions.
11674 //
11675 // peepmatch ( root_instr_name [preceding_instruction]* );
11676 //
11677 // peepconstraint %{
11678 // (instruction_number.operand_name relational_op instruction_number.operand_name
11679 //  [, ...] );
11680 // // instruction numbers are zero-based using left to right order in peepmatch
11681 //
11682 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
11683 // // provide an instruction_number.operand_name for each operand that appears
11684 // // in the replacement instruction's match rule
11685 //
11686 // ---------VM FLAGS---------------------------------------------------------
11687 //
11688 // All peephole optimizations can be turned off using -XX:-OptoPeephole
11689 //
11690 // Each peephole rule is given an identifying number starting with zero and
11691 // increasing by one in the order seen by the parser.  An individual peephole
11692 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
11693 // on the command-line.
11694 //
11695 // ---------CURRENT LIMITATIONS----------------------------------------------
11696 //
11697 // Only match adjacent instructions in same basic block
11698 // Only equality constraints
11699 // Only constraints between operands, not (0.dest_reg == RAX_enc)
11700 // Only one replacement instruction
11701 //
11702 // ---------EXAMPLE----------------------------------------------------------
11703 //
11704 // // pertinent parts of existing instructions in architecture description
11705 // instruct movI(rRegI dst, rRegI src)
11706 // %{
11707 //   match(Set dst (CopyI src));
11708 // %}
11709 //
11710 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
11711 // %{
11712 //   match(Set dst (AddI dst src));
11713 //   effect(KILL cr);
11714 // %}
11715 //
11716 // // Change (inc mov) to lea
11717 // peephole %{
11718 //   // increment preceeded by register-register move
11719 //   peepmatch ( incI_rReg movI );
11720 //   // require that the destination register of the increment
11721 //   // match the destination register of the move
11722 //   peepconstraint ( 0.dst == 1.dst );
11723 //   // construct a replacement instruction that sets
11724 //   // the destination to ( move's source register + one )
11725 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
11726 // %}
11727 //
11728 
11729 // Implementation no longer uses movX instructions since
11730 // machine-independent system no longer uses CopyX nodes.
11731 //
11732 // peephole
11733 // %{
11734 //   peepmatch (incI_rReg movI);
11735 //   peepconstraint (0.dst == 1.dst);
11736 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11737 // %}
11738 
11739 // peephole
11740 // %{
11741 //   peepmatch (decI_rReg movI);
11742 //   peepconstraint (0.dst == 1.dst);
11743 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11744 // %}
11745 
11746 // peephole
11747 // %{
11748 //   peepmatch (addI_rReg_imm movI);
11749 //   peepconstraint (0.dst == 1.dst);
11750 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
11751 // %}
11752 
11753 // peephole
11754 // %{
11755 //   peepmatch (incL_rReg movL);
11756 //   peepconstraint (0.dst == 1.dst);
11757 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11758 // %}
11759 
11760 // peephole
11761 // %{
11762 //   peepmatch (decL_rReg movL);
11763 //   peepconstraint (0.dst == 1.dst);
11764 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11765 // %}
11766 
11767 // peephole
11768 // %{
11769 //   peepmatch (addL_rReg_imm movL);
11770 //   peepconstraint (0.dst == 1.dst);
11771 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
11772 // %}
11773 
11774 // peephole
11775 // %{
11776 //   peepmatch (addP_rReg_imm movP);
11777 //   peepconstraint (0.dst == 1.dst);
11778 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
11779 // %}
11780 
11781 // // Change load of spilled value to only a spill
11782 // instruct storeI(memory mem, rRegI src)
11783 // %{
11784 //   match(Set mem (StoreI mem src));
11785 // %}
11786 //
11787 // instruct loadI(rRegI dst, memory mem)
11788 // %{
11789 //   match(Set dst (LoadI mem));
11790 // %}
11791 //
11792 
11793 peephole
11794 %{
11795   peepmatch (loadI storeI);
11796   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11797   peepreplace (storeI(1.mem 1.mem 1.src));
11798 %}
11799 
11800 peephole
11801 %{
11802   peepmatch (loadL storeL);
11803   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
11804   peepreplace (storeL(1.mem 1.mem 1.src));
11805 %}
11806 
11807 //----------SMARTSPILL RULES---------------------------------------------------
11808 // These must follow all instruction definitions as they use the names
11809 // defined in the instructions definitions.