1 //
   2 // Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 // !!!!! Special hack to get all types of calls to specify the byte offset
 555 //       from the start of the call to the point where the return address
 556 //       will point.
 557 int MachCallStaticJavaNode::ret_addr_offset()
 558 {
 559   return 5; // 5 bytes from start of call to where return address points
 560 }
 561 
 562 int MachCallDynamicJavaNode::ret_addr_offset()
 563 {
 564   return 15; // 15 bytes from start of call to where return address points
 565 }
 566 
 567 // In os_cpu .ad file
 568 // int MachCallRuntimeNode::ret_addr_offset()
 569 
 570 // Indicate if the safepoint node needs the polling page as an input.
 571 // Since amd64 does not have absolute addressing but RIP-relative
 572 // addressing and the polling page is within 2G, it doesn't.
 573 bool SafePointNode::needs_polling_address_input()
 574 {
 575   return false;
 576 }
 577 
 578 //
 579 // Compute padding required for nodes which need alignment
 580 //
 581 
 582 // The address of the call instruction needs to be 4-byte aligned to
 583 // ensure that it does not span a cache line so that it can be patched.
 584 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 585 {
 586   current_offset += 1; // skip call opcode byte
 587   return round_to(current_offset, alignment_required()) - current_offset;
 588 }
 589 
 590 // The address of the call instruction needs to be 4-byte aligned to
 591 // ensure that it does not span a cache line so that it can be patched.
 592 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 593 {
 594   current_offset += 11; // skip movq instruction + call opcode byte
 595   return round_to(current_offset, alignment_required()) - current_offset;
 596 }
 597 
 598 #ifndef PRODUCT
 599 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 600 {
 601   st->print("INT3");
 602 }
 603 #endif
 604 
 605 // EMIT_RM()
 606 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 607 {
 608   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 609   *(cbuf.code_end()) = c;
 610   cbuf.set_code_end(cbuf.code_end() + 1);
 611 }
 612 
 613 // EMIT_CC()
 614 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 615 {
 616   unsigned char c = (unsigned char) (f1 | f2);
 617   *(cbuf.code_end()) = c;
 618   cbuf.set_code_end(cbuf.code_end() + 1);
 619 }
 620 
 621 // EMIT_OPCODE()
 622 void emit_opcode(CodeBuffer &cbuf, int code)
 623 {
 624   *(cbuf.code_end()) = (unsigned char) code;
 625   cbuf.set_code_end(cbuf.code_end() + 1);
 626 }
 627 
 628 // EMIT_OPCODE() w/ relocation information
 629 void emit_opcode(CodeBuffer &cbuf,
 630                  int code, relocInfo::relocType reloc, int offset, int format)
 631 {
 632   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 633   emit_opcode(cbuf, code);
 634 }
 635 
 636 // EMIT_D8()
 637 void emit_d8(CodeBuffer &cbuf, int d8)
 638 {
 639   *(cbuf.code_end()) = (unsigned char) d8;
 640   cbuf.set_code_end(cbuf.code_end() + 1);
 641 }
 642 
 643 // EMIT_D16()
 644 void emit_d16(CodeBuffer &cbuf, int d16)
 645 {
 646   *((short *)(cbuf.code_end())) = d16;
 647   cbuf.set_code_end(cbuf.code_end() + 2);
 648 }
 649 
 650 // EMIT_D32()
 651 void emit_d32(CodeBuffer &cbuf, int d32)
 652 {
 653   *((int *)(cbuf.code_end())) = d32;
 654   cbuf.set_code_end(cbuf.code_end() + 4);
 655 }
 656 
 657 // EMIT_D64()
 658 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 659 {
 660   *((int64_t*) (cbuf.code_end())) = d64;
 661   cbuf.set_code_end(cbuf.code_end() + 8);
 662 }
 663 
 664 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 665 void emit_d32_reloc(CodeBuffer& cbuf,
 666                     int d32,
 667                     relocInfo::relocType reloc,
 668                     int format)
 669 {
 670   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 671   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 672 
 673   *((int*) (cbuf.code_end())) = d32;
 674   cbuf.set_code_end(cbuf.code_end() + 4);
 675 }
 676 
 677 // emit 32 bit value and construct relocation entry from RelocationHolder
 678 void emit_d32_reloc(CodeBuffer& cbuf,
 679                     int d32,
 680                     RelocationHolder const& rspec,
 681                     int format)
 682 {
 683 #ifdef ASSERT
 684   if (rspec.reloc()->type() == relocInfo::oop_type &&
 685       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 686     assert(oop((intptr_t)d32)->is_oop() && oop((intptr_t)d32)->is_perm(), "cannot embed non-perm oops in code");
 687   }
 688 #endif
 689   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 690 
 691   *((int* )(cbuf.code_end())) = d32;
 692   cbuf.set_code_end(cbuf.code_end() + 4);
 693 }
 694 
 695 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 696   address next_ip = cbuf.code_end() + 4;
 697   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 698                  external_word_Relocation::spec(addr),
 699                  RELOC_DISP32);
 700 }
 701 
 702 
 703 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 704 void emit_d64_reloc(CodeBuffer& cbuf,
 705                     int64_t d64,
 706                     relocInfo::relocType reloc,
 707                     int format)
 708 {
 709   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 710 
 711   *((int64_t*) (cbuf.code_end())) = d64;
 712   cbuf.set_code_end(cbuf.code_end() + 8);
 713 }
 714 
 715 // emit 64 bit value and construct relocation entry from RelocationHolder
 716 void emit_d64_reloc(CodeBuffer& cbuf,
 717                     int64_t d64,
 718                     RelocationHolder const& rspec,
 719                     int format)
 720 {
 721 #ifdef ASSERT
 722   if (rspec.reloc()->type() == relocInfo::oop_type &&
 723       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 724     assert(oop(d64)->is_oop() && oop(d64)->is_perm(),
 725            "cannot embed non-perm oops in code");
 726   }
 727 #endif
 728   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 729 
 730   *((int64_t*) (cbuf.code_end())) = d64;
 731   cbuf.set_code_end(cbuf.code_end() + 8);
 732 }
 733 
 734 // Access stack slot for load or store
 735 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 736 {
 737   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 738   if (-0x80 <= disp && disp < 0x80) {
 739     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 740     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 741     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 742   } else {
 743     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 744     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 745     emit_d32(cbuf, disp);     // Displacement // R/M byte
 746   }
 747 }
 748 
 749    // rRegI ereg, memory mem) %{    // emit_reg_mem
 750 void encode_RegMem(CodeBuffer &cbuf,
 751                    int reg,
 752                    int base, int index, int scale, int disp, bool disp_is_oop)
 753 {
 754   assert(!disp_is_oop, "cannot have disp");
 755   int regenc = reg & 7;
 756   int baseenc = base & 7;
 757   int indexenc = index & 7;
 758 
 759   // There is no index & no scale, use form without SIB byte
 760   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 761     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 762     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 763       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 764     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 765       // If 8-bit displacement, mode 0x1
 766       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 767       emit_d8(cbuf, disp);
 768     } else {
 769       // If 32-bit displacement
 770       if (base == -1) { // Special flag for absolute address
 771         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 772         if (disp_is_oop) {
 773           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 774         } else {
 775           emit_d32(cbuf, disp);
 776         }
 777       } else {
 778         // Normal base + offset
 779         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 780         if (disp_is_oop) {
 781           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 782         } else {
 783           emit_d32(cbuf, disp);
 784         }
 785       }
 786     }
 787   } else {
 788     // Else, encode with the SIB byte
 789     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 790     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 791       // If no displacement
 792       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 793       emit_rm(cbuf, scale, indexenc, baseenc);
 794     } else {
 795       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 796         // If 8-bit displacement, mode 0x1
 797         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 798         emit_rm(cbuf, scale, indexenc, baseenc);
 799         emit_d8(cbuf, disp);
 800       } else {
 801         // If 32-bit displacement
 802         if (base == 0x04 ) {
 803           emit_rm(cbuf, 0x2, regenc, 0x4);
 804           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 805         } else {
 806           emit_rm(cbuf, 0x2, regenc, 0x4);
 807           emit_rm(cbuf, scale, indexenc, baseenc); // *
 808         }
 809         if (disp_is_oop) {
 810           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 811         } else {
 812           emit_d32(cbuf, disp);
 813         }
 814       }
 815     }
 816   }
 817 }
 818 
 819 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 820 {
 821   if (dstenc != srcenc) {
 822     if (dstenc < 8) {
 823       if (srcenc >= 8) {
 824         emit_opcode(cbuf, Assembler::REX_B);
 825         srcenc -= 8;
 826       }
 827     } else {
 828       if (srcenc < 8) {
 829         emit_opcode(cbuf, Assembler::REX_R);
 830       } else {
 831         emit_opcode(cbuf, Assembler::REX_RB);
 832         srcenc -= 8;
 833       }
 834       dstenc -= 8;
 835     }
 836 
 837     emit_opcode(cbuf, 0x8B);
 838     emit_rm(cbuf, 0x3, dstenc, srcenc);
 839   }
 840 }
 841 
 842 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 843   if( dst_encoding == src_encoding ) {
 844     // reg-reg copy, use an empty encoding
 845   } else {
 846     MacroAssembler _masm(&cbuf);
 847 
 848     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 849   }
 850 }
 851 
 852 
 853 //=============================================================================
 854 #ifndef PRODUCT
 855 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 856 {
 857   Compile* C = ra_->C;
 858 
 859   int framesize = C->frame_slots() << LogBytesPerInt;
 860   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 861   // Remove wordSize for return adr already pushed
 862   // and another for the RBP we are going to save
 863   framesize -= 2*wordSize;
 864   bool need_nop = true;
 865 
 866   // Calls to C2R adapters often do not accept exceptional returns.
 867   // We require that their callers must bang for them.  But be
 868   // careful, because some VM calls (such as call site linkage) can
 869   // use several kilobytes of stack.  But the stack safety zone should
 870   // account for that.  See bugs 4446381, 4468289, 4497237.
 871   if (C->need_stack_bang(framesize)) {
 872     st->print_cr("# stack bang"); st->print("\t");
 873     need_nop = false;
 874   }
 875   st->print_cr("pushq   rbp"); st->print("\t");
 876 
 877   if (VerifyStackAtCalls) {
 878     // Majik cookie to verify stack depth
 879     st->print_cr("pushq   0xffffffffbadb100d"
 880                   "\t# Majik cookie for stack depth check");
 881     st->print("\t");
 882     framesize -= wordSize; // Remove 2 for cookie
 883     need_nop = false;
 884   }
 885 
 886   if (framesize) {
 887     st->print("subq    rsp, #%d\t# Create frame", framesize);
 888     if (framesize < 0x80 && need_nop) {
 889       st->print("\n\tnop\t# nop for patch_verified_entry");
 890     }
 891   }
 892 }
 893 #endif
 894 
 895 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 896 {
 897   Compile* C = ra_->C;
 898 
 899   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 900   // NativeJump::patch_verified_entry will be able to patch out the entry
 901   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 902   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 903   // 6 bytes. So if we don't do the fldcw or the push then we must
 904   // use the 6 byte frame allocation even if we have no frame. :-(
 905   // If method sets FPU control word do it now
 906 
 907   int framesize = C->frame_slots() << LogBytesPerInt;
 908   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 909   // Remove wordSize for return adr already pushed
 910   // and another for the RBP we are going to save
 911   framesize -= 2*wordSize;
 912   bool need_nop = true;
 913 
 914   // Calls to C2R adapters often do not accept exceptional returns.
 915   // We require that their callers must bang for them.  But be
 916   // careful, because some VM calls (such as call site linkage) can
 917   // use several kilobytes of stack.  But the stack safety zone should
 918   // account for that.  See bugs 4446381, 4468289, 4497237.
 919   if (C->need_stack_bang(framesize)) {
 920     MacroAssembler masm(&cbuf);
 921     masm.generate_stack_overflow_check(framesize);
 922     need_nop = false;
 923   }
 924 
 925   // We always push rbp so that on return to interpreter rbp will be
 926   // restored correctly and we can correct the stack.
 927   emit_opcode(cbuf, 0x50 | RBP_enc);
 928 
 929   if (VerifyStackAtCalls) {
 930     // Majik cookie to verify stack depth
 931     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 932     emit_d32(cbuf, 0xbadb100d);
 933     framesize -= wordSize; // Remove 2 for cookie
 934     need_nop = false;
 935   }
 936 
 937   if (framesize) {
 938     emit_opcode(cbuf, Assembler::REX_W);
 939     if (framesize < 0x80) {
 940       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 941       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 942       emit_d8(cbuf, framesize);
 943       if (need_nop) {
 944         emit_opcode(cbuf, 0x90); // nop
 945       }
 946     } else {
 947       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 948       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 949       emit_d32(cbuf, framesize);
 950     }
 951   }
 952 
 953   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 954 
 955 #ifdef ASSERT
 956   if (VerifyStackAtCalls) {
 957     Label L;
 958     MacroAssembler masm(&cbuf);
 959     masm.push(rax);
 960     masm.mov(rax, rsp);
 961     masm.andptr(rax, StackAlignmentInBytes-1);
 962     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 963     masm.pop(rax);
 964     masm.jcc(Assembler::equal, L);
 965     masm.stop("Stack is not properly aligned!");
 966     masm.bind(L);
 967   }
 968 #endif
 969 }
 970 
 971 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 972 {
 973   return MachNode::size(ra_); // too many variables; just compute it
 974                               // the hard way
 975 }
 976 
 977 int MachPrologNode::reloc() const
 978 {
 979   return 0; // a large enough number
 980 }
 981 
 982 //=============================================================================
 983 #ifndef PRODUCT
 984 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 985 {
 986   Compile* C = ra_->C;
 987   int framesize = C->frame_slots() << LogBytesPerInt;
 988   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 989   // Remove word for return adr already pushed
 990   // and RBP
 991   framesize -= 2*wordSize;
 992 
 993   if (framesize) {
 994     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
 995     st->print("\t");
 996   }
 997 
 998   st->print_cr("popq\trbp");
 999   if (do_polling() && C->is_method_compilation()) {
1000     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1001                   "# Safepoint: poll for GC");
1002     st->print("\t");
1003   }
1004 }
1005 #endif
1006 
1007 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1008 {
1009   Compile* C = ra_->C;
1010   int framesize = C->frame_slots() << LogBytesPerInt;
1011   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1012   // Remove word for return adr already pushed
1013   // and RBP
1014   framesize -= 2*wordSize;
1015 
1016   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1017 
1018   if (framesize) {
1019     emit_opcode(cbuf, Assembler::REX_W);
1020     if (framesize < 0x80) {
1021       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1022       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1023       emit_d8(cbuf, framesize);
1024     } else {
1025       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1026       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1027       emit_d32(cbuf, framesize);
1028     }
1029   }
1030 
1031   // popq rbp
1032   emit_opcode(cbuf, 0x58 | RBP_enc);
1033 
1034   if (do_polling() && C->is_method_compilation()) {
1035     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1036     // XXX reg_mem doesn't support RIP-relative addressing yet
1037     cbuf.set_inst_mark();
1038     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1039     emit_opcode(cbuf, 0x85); // testl
1040     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1041     // cbuf.inst_mark() is beginning of instruction
1042     emit_d32_reloc(cbuf, os::get_polling_page());
1043 //                    relocInfo::poll_return_type,
1044   }
1045 }
1046 
1047 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1048 {
1049   Compile* C = ra_->C;
1050   int framesize = C->frame_slots() << LogBytesPerInt;
1051   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1052   // Remove word for return adr already pushed
1053   // and RBP
1054   framesize -= 2*wordSize;
1055 
1056   uint size = 0;
1057 
1058   if (do_polling() && C->is_method_compilation()) {
1059     size += 6;
1060   }
1061 
1062   // count popq rbp
1063   size++;
1064 
1065   if (framesize) {
1066     if (framesize < 0x80) {
1067       size += 4;
1068     } else if (framesize) {
1069       size += 7;
1070     }
1071   }
1072 
1073   return size;
1074 }
1075 
1076 int MachEpilogNode::reloc() const
1077 {
1078   return 2; // a large enough number
1079 }
1080 
1081 const Pipeline* MachEpilogNode::pipeline() const
1082 {
1083   return MachNode::pipeline_class();
1084 }
1085 
1086 int MachEpilogNode::safepoint_offset() const
1087 {
1088   return 0;
1089 }
1090 
1091 //=============================================================================
1092 
1093 enum RC {
1094   rc_bad,
1095   rc_int,
1096   rc_float,
1097   rc_stack
1098 };
1099 
1100 static enum RC rc_class(OptoReg::Name reg)
1101 {
1102   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1103 
1104   if (OptoReg::is_stack(reg)) return rc_stack;
1105 
1106   VMReg r = OptoReg::as_VMReg(reg);
1107 
1108   if (r->is_Register()) return rc_int;
1109 
1110   assert(r->is_XMMRegister(), "must be");
1111   return rc_float;
1112 }
1113 
1114 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1115                                        PhaseRegAlloc* ra_,
1116                                        bool do_size,
1117                                        outputStream* st) const
1118 {
1119 
1120   // Get registers to move
1121   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1122   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1123   OptoReg::Name dst_second = ra_->get_reg_second(this);
1124   OptoReg::Name dst_first = ra_->get_reg_first(this);
1125 
1126   enum RC src_second_rc = rc_class(src_second);
1127   enum RC src_first_rc = rc_class(src_first);
1128   enum RC dst_second_rc = rc_class(dst_second);
1129   enum RC dst_first_rc = rc_class(dst_first);
1130 
1131   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1132          "must move at least 1 register" );
1133 
1134   if (src_first == dst_first && src_second == dst_second) {
1135     // Self copy, no move
1136     return 0;
1137   } else if (src_first_rc == rc_stack) {
1138     // mem ->
1139     if (dst_first_rc == rc_stack) {
1140       // mem -> mem
1141       assert(src_second != dst_first, "overlap");
1142       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1143           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1144         // 64-bit
1145         int src_offset = ra_->reg2offset(src_first);
1146         int dst_offset = ra_->reg2offset(dst_first);
1147         if (cbuf) {
1148           emit_opcode(*cbuf, 0xFF);
1149           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1150 
1151           emit_opcode(*cbuf, 0x8F);
1152           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1153 
1154 #ifndef PRODUCT
1155         } else if (!do_size) {
1156           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1157                      "popq    [rsp + #%d]",
1158                      src_offset,
1159                      dst_offset);
1160 #endif
1161         }
1162         return
1163           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1164           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1165       } else {
1166         // 32-bit
1167         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1168         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1169         // No pushl/popl, so:
1170         int src_offset = ra_->reg2offset(src_first);
1171         int dst_offset = ra_->reg2offset(dst_first);
1172         if (cbuf) {
1173           emit_opcode(*cbuf, Assembler::REX_W);
1174           emit_opcode(*cbuf, 0x89);
1175           emit_opcode(*cbuf, 0x44);
1176           emit_opcode(*cbuf, 0x24);
1177           emit_opcode(*cbuf, 0xF8);
1178 
1179           emit_opcode(*cbuf, 0x8B);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, src_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, 0x89);
1186           encode_RegMem(*cbuf,
1187                         RAX_enc,
1188                         RSP_enc, 0x4, 0, dst_offset,
1189                         false);
1190 
1191           emit_opcode(*cbuf, Assembler::REX_W);
1192           emit_opcode(*cbuf, 0x8B);
1193           emit_opcode(*cbuf, 0x44);
1194           emit_opcode(*cbuf, 0x24);
1195           emit_opcode(*cbuf, 0xF8);
1196 
1197 #ifndef PRODUCT
1198         } else if (!do_size) {
1199           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1200                      "movl    rax, [rsp + #%d]\n\t"
1201                      "movl    [rsp + #%d], rax\n\t"
1202                      "movq    rax, [rsp - #8]",
1203                      src_offset,
1204                      dst_offset);
1205 #endif
1206         }
1207         return
1208           5 + // movq
1209           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1210           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1211           5; // movq
1212       }
1213     } else if (dst_first_rc == rc_int) {
1214       // mem -> gpr
1215       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1216           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1217         // 64-bit
1218         int offset = ra_->reg2offset(src_first);
1219         if (cbuf) {
1220           if (Matcher::_regEncode[dst_first] < 8) {
1221             emit_opcode(*cbuf, Assembler::REX_W);
1222           } else {
1223             emit_opcode(*cbuf, Assembler::REX_WR);
1224           }
1225           emit_opcode(*cbuf, 0x8B);
1226           encode_RegMem(*cbuf,
1227                         Matcher::_regEncode[dst_first],
1228                         RSP_enc, 0x4, 0, offset,
1229                         false);
1230 #ifndef PRODUCT
1231         } else if (!do_size) {
1232           st->print("movq    %s, [rsp + #%d]\t# spill",
1233                      Matcher::regName[dst_first],
1234                      offset);
1235 #endif
1236         }
1237         return
1238           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1239       } else {
1240         // 32-bit
1241         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1242         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1243         int offset = ra_->reg2offset(src_first);
1244         if (cbuf) {
1245           if (Matcher::_regEncode[dst_first] >= 8) {
1246             emit_opcode(*cbuf, Assembler::REX_R);
1247           }
1248           emit_opcode(*cbuf, 0x8B);
1249           encode_RegMem(*cbuf,
1250                         Matcher::_regEncode[dst_first],
1251                         RSP_enc, 0x4, 0, offset,
1252                         false);
1253 #ifndef PRODUCT
1254         } else if (!do_size) {
1255           st->print("movl    %s, [rsp + #%d]\t# spill",
1256                      Matcher::regName[dst_first],
1257                      offset);
1258 #endif
1259         }
1260         return
1261           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1262           ((Matcher::_regEncode[dst_first] < 8)
1263            ? 3
1264            : 4); // REX
1265       }
1266     } else if (dst_first_rc == rc_float) {
1267       // mem-> xmm
1268       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1269           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1270         // 64-bit
1271         int offset = ra_->reg2offset(src_first);
1272         if (cbuf) {
1273           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1274           if (Matcher::_regEncode[dst_first] >= 8) {
1275             emit_opcode(*cbuf, Assembler::REX_R);
1276           }
1277           emit_opcode(*cbuf, 0x0F);
1278           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1279           encode_RegMem(*cbuf,
1280                         Matcher::_regEncode[dst_first],
1281                         RSP_enc, 0x4, 0, offset,
1282                         false);
1283 #ifndef PRODUCT
1284         } else if (!do_size) {
1285           st->print("%s  %s, [rsp + #%d]\t# spill",
1286                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1287                      Matcher::regName[dst_first],
1288                      offset);
1289 #endif
1290         }
1291         return
1292           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1293           ((Matcher::_regEncode[dst_first] < 8)
1294            ? 5
1295            : 6); // REX
1296       } else {
1297         // 32-bit
1298         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1299         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1300         int offset = ra_->reg2offset(src_first);
1301         if (cbuf) {
1302           emit_opcode(*cbuf, 0xF3);
1303           if (Matcher::_regEncode[dst_first] >= 8) {
1304             emit_opcode(*cbuf, Assembler::REX_R);
1305           }
1306           emit_opcode(*cbuf, 0x0F);
1307           emit_opcode(*cbuf, 0x10);
1308           encode_RegMem(*cbuf,
1309                         Matcher::_regEncode[dst_first],
1310                         RSP_enc, 0x4, 0, offset,
1311                         false);
1312 #ifndef PRODUCT
1313         } else if (!do_size) {
1314           st->print("movss   %s, [rsp + #%d]\t# spill",
1315                      Matcher::regName[dst_first],
1316                      offset);
1317 #endif
1318         }
1319         return
1320           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1321           ((Matcher::_regEncode[dst_first] < 8)
1322            ? 5
1323            : 6); // REX
1324       }
1325     }
1326   } else if (src_first_rc == rc_int) {
1327     // gpr ->
1328     if (dst_first_rc == rc_stack) {
1329       // gpr -> mem
1330       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1331           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1332         // 64-bit
1333         int offset = ra_->reg2offset(dst_first);
1334         if (cbuf) {
1335           if (Matcher::_regEncode[src_first] < 8) {
1336             emit_opcode(*cbuf, Assembler::REX_W);
1337           } else {
1338             emit_opcode(*cbuf, Assembler::REX_WR);
1339           }
1340           emit_opcode(*cbuf, 0x89);
1341           encode_RegMem(*cbuf,
1342                         Matcher::_regEncode[src_first],
1343                         RSP_enc, 0x4, 0, offset,
1344                         false);
1345 #ifndef PRODUCT
1346         } else if (!do_size) {
1347           st->print("movq    [rsp + #%d], %s\t# spill",
1348                      offset,
1349                      Matcher::regName[src_first]);
1350 #endif
1351         }
1352         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1353       } else {
1354         // 32-bit
1355         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1356         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1357         int offset = ra_->reg2offset(dst_first);
1358         if (cbuf) {
1359           if (Matcher::_regEncode[src_first] >= 8) {
1360             emit_opcode(*cbuf, Assembler::REX_R);
1361           }
1362           emit_opcode(*cbuf, 0x89);
1363           encode_RegMem(*cbuf,
1364                         Matcher::_regEncode[src_first],
1365                         RSP_enc, 0x4, 0, offset,
1366                         false);
1367 #ifndef PRODUCT
1368         } else if (!do_size) {
1369           st->print("movl    [rsp + #%d], %s\t# spill",
1370                      offset,
1371                      Matcher::regName[src_first]);
1372 #endif
1373         }
1374         return
1375           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1376           ((Matcher::_regEncode[src_first] < 8)
1377            ? 3
1378            : 4); // REX
1379       }
1380     } else if (dst_first_rc == rc_int) {
1381       // gpr -> gpr
1382       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1383           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1384         // 64-bit
1385         if (cbuf) {
1386           if (Matcher::_regEncode[dst_first] < 8) {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_W);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WB);
1391             }
1392           } else {
1393             if (Matcher::_regEncode[src_first] < 8) {
1394               emit_opcode(*cbuf, Assembler::REX_WR);
1395             } else {
1396               emit_opcode(*cbuf, Assembler::REX_WRB);
1397             }
1398           }
1399           emit_opcode(*cbuf, 0x8B);
1400           emit_rm(*cbuf, 0x3,
1401                   Matcher::_regEncode[dst_first] & 7,
1402                   Matcher::_regEncode[src_first] & 7);
1403 #ifndef PRODUCT
1404         } else if (!do_size) {
1405           st->print("movq    %s, %s\t# spill",
1406                      Matcher::regName[dst_first],
1407                      Matcher::regName[src_first]);
1408 #endif
1409         }
1410         return 3; // REX
1411       } else {
1412         // 32-bit
1413         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1414         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1415         if (cbuf) {
1416           if (Matcher::_regEncode[dst_first] < 8) {
1417             if (Matcher::_regEncode[src_first] >= 8) {
1418               emit_opcode(*cbuf, Assembler::REX_B);
1419             }
1420           } else {
1421             if (Matcher::_regEncode[src_first] < 8) {
1422               emit_opcode(*cbuf, Assembler::REX_R);
1423             } else {
1424               emit_opcode(*cbuf, Assembler::REX_RB);
1425             }
1426           }
1427           emit_opcode(*cbuf, 0x8B);
1428           emit_rm(*cbuf, 0x3,
1429                   Matcher::_regEncode[dst_first] & 7,
1430                   Matcher::_regEncode[src_first] & 7);
1431 #ifndef PRODUCT
1432         } else if (!do_size) {
1433           st->print("movl    %s, %s\t# spill",
1434                      Matcher::regName[dst_first],
1435                      Matcher::regName[src_first]);
1436 #endif
1437         }
1438         return
1439           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1440           ? 2
1441           : 3; // REX
1442       }
1443     } else if (dst_first_rc == rc_float) {
1444       // gpr -> xmm
1445       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1446           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1447         // 64-bit
1448         if (cbuf) {
1449           emit_opcode(*cbuf, 0x66);
1450           if (Matcher::_regEncode[dst_first] < 8) {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_W);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WB);
1455             }
1456           } else {
1457             if (Matcher::_regEncode[src_first] < 8) {
1458               emit_opcode(*cbuf, Assembler::REX_WR);
1459             } else {
1460               emit_opcode(*cbuf, Assembler::REX_WRB);
1461             }
1462           }
1463           emit_opcode(*cbuf, 0x0F);
1464           emit_opcode(*cbuf, 0x6E);
1465           emit_rm(*cbuf, 0x3,
1466                   Matcher::_regEncode[dst_first] & 7,
1467                   Matcher::_regEncode[src_first] & 7);
1468 #ifndef PRODUCT
1469         } else if (!do_size) {
1470           st->print("movdq   %s, %s\t# spill",
1471                      Matcher::regName[dst_first],
1472                      Matcher::regName[src_first]);
1473 #endif
1474         }
1475         return 5; // REX
1476       } else {
1477         // 32-bit
1478         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1479         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1480         if (cbuf) {
1481           emit_opcode(*cbuf, 0x66);
1482           if (Matcher::_regEncode[dst_first] < 8) {
1483             if (Matcher::_regEncode[src_first] >= 8) {
1484               emit_opcode(*cbuf, Assembler::REX_B);
1485             }
1486           } else {
1487             if (Matcher::_regEncode[src_first] < 8) {
1488               emit_opcode(*cbuf, Assembler::REX_R);
1489             } else {
1490               emit_opcode(*cbuf, Assembler::REX_RB);
1491             }
1492           }
1493           emit_opcode(*cbuf, 0x0F);
1494           emit_opcode(*cbuf, 0x6E);
1495           emit_rm(*cbuf, 0x3,
1496                   Matcher::_regEncode[dst_first] & 7,
1497                   Matcher::_regEncode[src_first] & 7);
1498 #ifndef PRODUCT
1499         } else if (!do_size) {
1500           st->print("movdl   %s, %s\t# spill",
1501                      Matcher::regName[dst_first],
1502                      Matcher::regName[src_first]);
1503 #endif
1504         }
1505         return
1506           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1507           ? 4
1508           : 5; // REX
1509       }
1510     }
1511   } else if (src_first_rc == rc_float) {
1512     // xmm ->
1513     if (dst_first_rc == rc_stack) {
1514       // xmm -> mem
1515       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1516           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1517         // 64-bit
1518         int offset = ra_->reg2offset(dst_first);
1519         if (cbuf) {
1520           emit_opcode(*cbuf, 0xF2);
1521           if (Matcher::_regEncode[src_first] >= 8) {
1522               emit_opcode(*cbuf, Assembler::REX_R);
1523           }
1524           emit_opcode(*cbuf, 0x0F);
1525           emit_opcode(*cbuf, 0x11);
1526           encode_RegMem(*cbuf,
1527                         Matcher::_regEncode[src_first],
1528                         RSP_enc, 0x4, 0, offset,
1529                         false);
1530 #ifndef PRODUCT
1531         } else if (!do_size) {
1532           st->print("movsd   [rsp + #%d], %s\t# spill",
1533                      offset,
1534                      Matcher::regName[src_first]);
1535 #endif
1536         }
1537         return
1538           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1539           ((Matcher::_regEncode[src_first] < 8)
1540            ? 5
1541            : 6); // REX
1542       } else {
1543         // 32-bit
1544         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1545         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1546         int offset = ra_->reg2offset(dst_first);
1547         if (cbuf) {
1548           emit_opcode(*cbuf, 0xF3);
1549           if (Matcher::_regEncode[src_first] >= 8) {
1550               emit_opcode(*cbuf, Assembler::REX_R);
1551           }
1552           emit_opcode(*cbuf, 0x0F);
1553           emit_opcode(*cbuf, 0x11);
1554           encode_RegMem(*cbuf,
1555                         Matcher::_regEncode[src_first],
1556                         RSP_enc, 0x4, 0, offset,
1557                         false);
1558 #ifndef PRODUCT
1559         } else if (!do_size) {
1560           st->print("movss   [rsp + #%d], %s\t# spill",
1561                      offset,
1562                      Matcher::regName[src_first]);
1563 #endif
1564         }
1565         return
1566           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1567           ((Matcher::_regEncode[src_first] < 8)
1568            ? 5
1569            : 6); // REX
1570       }
1571     } else if (dst_first_rc == rc_int) {
1572       // xmm -> gpr
1573       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1574           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1575         // 64-bit
1576         if (cbuf) {
1577           emit_opcode(*cbuf, 0x66);
1578           if (Matcher::_regEncode[dst_first] < 8) {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_W);
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1583             }
1584           } else {
1585             if (Matcher::_regEncode[src_first] < 8) {
1586               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1587             } else {
1588               emit_opcode(*cbuf, Assembler::REX_WRB);
1589             }
1590           }
1591           emit_opcode(*cbuf, 0x0F);
1592           emit_opcode(*cbuf, 0x7E);
1593           emit_rm(*cbuf, 0x3,
1594                   Matcher::_regEncode[dst_first] & 7,
1595                   Matcher::_regEncode[src_first] & 7);
1596 #ifndef PRODUCT
1597         } else if (!do_size) {
1598           st->print("movdq   %s, %s\t# spill",
1599                      Matcher::regName[dst_first],
1600                      Matcher::regName[src_first]);
1601 #endif
1602         }
1603         return 5; // REX
1604       } else {
1605         // 32-bit
1606         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1607         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1608         if (cbuf) {
1609           emit_opcode(*cbuf, 0x66);
1610           if (Matcher::_regEncode[dst_first] < 8) {
1611             if (Matcher::_regEncode[src_first] >= 8) {
1612               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1613             }
1614           } else {
1615             if (Matcher::_regEncode[src_first] < 8) {
1616               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1617             } else {
1618               emit_opcode(*cbuf, Assembler::REX_RB);
1619             }
1620           }
1621           emit_opcode(*cbuf, 0x0F);
1622           emit_opcode(*cbuf, 0x7E);
1623           emit_rm(*cbuf, 0x3,
1624                   Matcher::_regEncode[dst_first] & 7,
1625                   Matcher::_regEncode[src_first] & 7);
1626 #ifndef PRODUCT
1627         } else if (!do_size) {
1628           st->print("movdl   %s, %s\t# spill",
1629                      Matcher::regName[dst_first],
1630                      Matcher::regName[src_first]);
1631 #endif
1632         }
1633         return
1634           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1635           ? 4
1636           : 5; // REX
1637       }
1638     } else if (dst_first_rc == rc_float) {
1639       // xmm -> xmm
1640       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1641           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1642         // 64-bit
1643         if (cbuf) {
1644           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1645           if (Matcher::_regEncode[dst_first] < 8) {
1646             if (Matcher::_regEncode[src_first] >= 8) {
1647               emit_opcode(*cbuf, Assembler::REX_B);
1648             }
1649           } else {
1650             if (Matcher::_regEncode[src_first] < 8) {
1651               emit_opcode(*cbuf, Assembler::REX_R);
1652             } else {
1653               emit_opcode(*cbuf, Assembler::REX_RB);
1654             }
1655           }
1656           emit_opcode(*cbuf, 0x0F);
1657           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1658           emit_rm(*cbuf, 0x3,
1659                   Matcher::_regEncode[dst_first] & 7,
1660                   Matcher::_regEncode[src_first] & 7);
1661 #ifndef PRODUCT
1662         } else if (!do_size) {
1663           st->print("%s  %s, %s\t# spill",
1664                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1665                      Matcher::regName[dst_first],
1666                      Matcher::regName[src_first]);
1667 #endif
1668         }
1669         return
1670           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1671           ? 4
1672           : 5; // REX
1673       } else {
1674         // 32-bit
1675         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1676         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1677         if (cbuf) {
1678           if (!UseXmmRegToRegMoveAll)
1679             emit_opcode(*cbuf, 0xF3);
1680           if (Matcher::_regEncode[dst_first] < 8) {
1681             if (Matcher::_regEncode[src_first] >= 8) {
1682               emit_opcode(*cbuf, Assembler::REX_B);
1683             }
1684           } else {
1685             if (Matcher::_regEncode[src_first] < 8) {
1686               emit_opcode(*cbuf, Assembler::REX_R);
1687             } else {
1688               emit_opcode(*cbuf, Assembler::REX_RB);
1689             }
1690           }
1691           emit_opcode(*cbuf, 0x0F);
1692           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1693           emit_rm(*cbuf, 0x3,
1694                   Matcher::_regEncode[dst_first] & 7,
1695                   Matcher::_regEncode[src_first] & 7);
1696 #ifndef PRODUCT
1697         } else if (!do_size) {
1698           st->print("%s  %s, %s\t# spill",
1699                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1700                      Matcher::regName[dst_first],
1701                      Matcher::regName[src_first]);
1702 #endif
1703         }
1704         return
1705           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1706           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1707           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1708       }
1709     }
1710   }
1711 
1712   assert(0," foo ");
1713   Unimplemented();
1714 
1715   return 0;
1716 }
1717 
1718 #ifndef PRODUCT
1719 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1720 {
1721   implementation(NULL, ra_, false, st);
1722 }
1723 #endif
1724 
1725 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1726 {
1727   implementation(&cbuf, ra_, false, NULL);
1728 }
1729 
1730 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1731 {
1732   return implementation(NULL, ra_, true, NULL);
1733 }
1734 
1735 //=============================================================================
1736 #ifndef PRODUCT
1737 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1738 {
1739   st->print("nop \t# %d bytes pad for loops and calls", _count);
1740 }
1741 #endif
1742 
1743 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1744 {
1745   MacroAssembler _masm(&cbuf);
1746   __ nop(_count);
1747 }
1748 
1749 uint MachNopNode::size(PhaseRegAlloc*) const
1750 {
1751   return _count;
1752 }
1753 
1754 
1755 //=============================================================================
1756 #ifndef PRODUCT
1757 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1758 {
1759   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1760   int reg = ra_->get_reg_first(this);
1761   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1762             Matcher::regName[reg], offset);
1763 }
1764 #endif
1765 
1766 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1767 {
1768   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1769   int reg = ra_->get_encode(this);
1770   if (offset >= 0x80) {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d32(cbuf, offset);
1776   } else {
1777     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1780     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781     emit_d8(cbuf, offset);
1782   }
1783 }
1784 
1785 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1786 {
1787   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1788   return (offset < 0x80) ? 5 : 8; // REX
1789 }
1790 
1791 //=============================================================================
1792 
1793 // emit call stub, compiled java to interpreter
1794 void emit_java_to_interp(CodeBuffer& cbuf)
1795 {
1796   // Stub is fixed up when the corresponding call is converted from
1797   // calling compiled code to calling interpreted code.
1798   // movq rbx, 0
1799   // jmp -5 # to self
1800 
1801   address mark = cbuf.inst_mark();  // get mark within main instrs section
1802 
1803   // Note that the code buffer's inst_mark is always relative to insts.
1804   // That's why we must use the macroassembler to generate a stub.
1805   MacroAssembler _masm(&cbuf);
1806 
1807   address base =
1808   __ start_a_stub(Compile::MAX_stubs_size);
1809   if (base == NULL)  return;  // CodeBuffer::expand failed
1810   // static stub relocation stores the instruction address of the call
1811   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1812   // static stub relocation also tags the methodOop in the code-stream.
1813   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1814   // This is recognized as unresolved by relocs/nativeinst/ic code
1815   __ jump(RuntimeAddress(__ pc()));
1816 
1817   // Update current stubs pointer and restore code_end.
1818   __ end_a_stub();
1819 }
1820 
1821 // size of call stub, compiled java to interpretor
1822 uint size_java_to_interp()
1823 {
1824   return 15;  // movq (1+1+8); jmp (1+4)
1825 }
1826 
1827 // relocation entries for call stub, compiled java to interpretor
1828 uint reloc_java_to_interp()
1829 {
1830   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1831 }
1832 
1833 //=============================================================================
1834 #ifndef PRODUCT
1835 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1836 {
1837   if (UseCompressedOops) {
1838     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1839     if (Universe::narrow_oop_shift() != 0) {
1840       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1841     }
1842     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1843   } else {
1844     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1845                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1846   }
1847   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1848   st->print_cr("\tnop");
1849   if (!OptoBreakpoint) {
1850     st->print_cr("\tnop");
1851   }
1852 }
1853 #endif
1854 
1855 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1856 {
1857   MacroAssembler masm(&cbuf);
1858 #ifdef ASSERT
1859   uint code_size = cbuf.code_size();
1860 #endif
1861   if (UseCompressedOops) {
1862     masm.load_klass(rscratch1, j_rarg0);
1863     masm.cmpptr(rax, rscratch1);
1864   } else {
1865     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1866   }
1867 
1868   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1869 
1870   /* WARNING these NOPs are critical so that verified entry point is properly
1871      aligned for patching by NativeJump::patch_verified_entry() */
1872   int nops_cnt = 1;
1873   if (!OptoBreakpoint) {
1874     // Leave space for int3
1875      nops_cnt += 1;
1876   }
1877   if (UseCompressedOops) {
1878     // ??? divisible by 4 is aligned?
1879     nops_cnt += 1;
1880   }
1881   masm.nop(nops_cnt);
1882 
1883   assert(cbuf.code_size() - code_size == size(ra_),
1884          "checking code size of inline cache node");
1885 }
1886 
1887 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1888 {
1889   if (UseCompressedOops) {
1890     if (Universe::narrow_oop_shift() == 0) {
1891       return OptoBreakpoint ? 15 : 16;
1892     } else {
1893       return OptoBreakpoint ? 19 : 20;
1894     }
1895   } else {
1896     return OptoBreakpoint ? 11 : 12;
1897   }
1898 }
1899 
1900 
1901 //=============================================================================
1902 uint size_exception_handler()
1903 {
1904   // NativeCall instruction size is the same as NativeJump.
1905   // Note that this value is also credited (in output.cpp) to
1906   // the size of the code section.
1907   return NativeJump::instruction_size;
1908 }
1909 
1910 // Emit exception handler code.
1911 int emit_exception_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's inst_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_exception_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1922   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1923   __ end_a_stub();
1924   return offset;
1925 }
1926 
1927 uint size_deopt_handler()
1928 {
1929   // three 5 byte instructions
1930   return 15;
1931 }
1932 
1933 // Emit deopt handler code.
1934 int emit_deopt_handler(CodeBuffer& cbuf)
1935 {
1936 
1937   // Note that the code buffer's inst_mark is always relative to insts.
1938   // That's why we must use the macroassembler to generate a handler.
1939   MacroAssembler _masm(&cbuf);
1940   address base =
1941   __ start_a_stub(size_deopt_handler());
1942   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1943   int offset = __ offset();
1944   address the_pc = (address) __ pc();
1945   Label next;
1946   // push a "the_pc" on the stack without destroying any registers
1947   // as they all may be live.
1948 
1949   // push address of "next"
1950   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1951   __ bind(next);
1952   // adjust it so it matches "the_pc"
1953   __ subptr(Address(rsp, 0), __ offset() - offset);
1954   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1955   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1956   __ end_a_stub();
1957   return offset;
1958 }
1959 
1960 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1961   int mark = cbuf.insts()->mark_off();
1962   MacroAssembler _masm(&cbuf);
1963   address double_address = __ double_constant(x);
1964   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1965   emit_d32_reloc(cbuf,
1966                  (int) (double_address - cbuf.code_end() - 4),
1967                  internal_word_Relocation::spec(double_address),
1968                  RELOC_DISP32);
1969 }
1970 
1971 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1972   int mark = cbuf.insts()->mark_off();
1973   MacroAssembler _masm(&cbuf);
1974   address float_address = __ float_constant(x);
1975   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1976   emit_d32_reloc(cbuf,
1977                  (int) (float_address - cbuf.code_end() - 4),
1978                  internal_word_Relocation::spec(float_address),
1979                  RELOC_DISP32);
1980 }
1981 
1982 
1983 int Matcher::regnum_to_fpu_offset(int regnum)
1984 {
1985   return regnum - 32; // The FP registers are in the second chunk
1986 }
1987 
1988 // This is UltraSparc specific, true just means we have fast l2f conversion
1989 const bool Matcher::convL2FSupported(void) {
1990   return true;
1991 }
1992 
1993 // Vector width in bytes
1994 const uint Matcher::vector_width_in_bytes(void) {
1995   return 8;
1996 }
1997 
1998 // Vector ideal reg
1999 const uint Matcher::vector_ideal_reg(void) {
2000   return Op_RegD;
2001 }
2002 
2003 // Is this branch offset short enough that a short branch can be used?
2004 //
2005 // NOTE: If the platform does not provide any short branch variants, then
2006 //       this method should return false for offset 0.
2007 bool Matcher::is_short_branch_offset(int rule, int offset) {
2008   // the short version of jmpConUCF2 contains multiple branches,
2009   // making the reach slightly less
2010   if (rule == jmpConUCF2_rule)
2011     return (-126 <= offset && offset <= 125);
2012   return (-128 <= offset && offset <= 127);
2013 }
2014 
2015 const bool Matcher::isSimpleConstant64(jlong value) {
2016   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2017   //return value == (int) value;  // Cf. storeImmL and immL32.
2018 
2019   // Probably always true, even if a temp register is required.
2020   return true;
2021 }
2022 
2023 // The ecx parameter to rep stosq for the ClearArray node is in words.
2024 const bool Matcher::init_array_count_is_in_bytes = false;
2025 
2026 // Threshold size for cleararray.
2027 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2028 
2029 // Should the Matcher clone shifts on addressing modes, expecting them
2030 // to be subsumed into complex addressing expressions or compute them
2031 // into registers?  True for Intel but false for most RISCs
2032 const bool Matcher::clone_shift_expressions = true;
2033 
2034 // Is it better to copy float constants, or load them directly from
2035 // memory?  Intel can load a float constant from a direct address,
2036 // requiring no extra registers.  Most RISCs will have to materialize
2037 // an address into a register first, so they would do better to copy
2038 // the constant from stack.
2039 const bool Matcher::rematerialize_float_constants = true; // XXX
2040 
2041 // If CPU can load and store mis-aligned doubles directly then no
2042 // fixup is needed.  Else we split the double into 2 integer pieces
2043 // and move it piece-by-piece.  Only happens when passing doubles into
2044 // C code as the Java calling convention forces doubles to be aligned.
2045 const bool Matcher::misaligned_doubles_ok = true;
2046 
2047 // No-op on amd64
2048 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2049 
2050 // Advertise here if the CPU requires explicit rounding operations to
2051 // implement the UseStrictFP mode.
2052 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2053 
2054 // Do floats take an entire double register or just half?
2055 const bool Matcher::float_in_double = true;
2056 // Do ints take an entire long register or just half?
2057 const bool Matcher::int_in_long = true;
2058 
2059 // Return whether or not this register is ever used as an argument.
2060 // This function is used on startup to build the trampoline stubs in
2061 // generateOptoStub.  Registers not mentioned will be killed by the VM
2062 // call in the trampoline, and arguments in those registers not be
2063 // available to the callee.
2064 bool Matcher::can_be_java_arg(int reg)
2065 {
2066   return
2067     reg ==  RDI_num || reg ==  RDI_H_num ||
2068     reg ==  RSI_num || reg ==  RSI_H_num ||
2069     reg ==  RDX_num || reg ==  RDX_H_num ||
2070     reg ==  RCX_num || reg ==  RCX_H_num ||
2071     reg ==   R8_num || reg ==   R8_H_num ||
2072     reg ==   R9_num || reg ==   R9_H_num ||
2073     reg ==  R12_num || reg ==  R12_H_num ||
2074     reg == XMM0_num || reg == XMM0_H_num ||
2075     reg == XMM1_num || reg == XMM1_H_num ||
2076     reg == XMM2_num || reg == XMM2_H_num ||
2077     reg == XMM3_num || reg == XMM3_H_num ||
2078     reg == XMM4_num || reg == XMM4_H_num ||
2079     reg == XMM5_num || reg == XMM5_H_num ||
2080     reg == XMM6_num || reg == XMM6_H_num ||
2081     reg == XMM7_num || reg == XMM7_H_num;
2082 }
2083 
2084 bool Matcher::is_spillable_arg(int reg)
2085 {
2086   return can_be_java_arg(reg);
2087 }
2088 
2089 // Register for DIVI projection of divmodI
2090 RegMask Matcher::divI_proj_mask() {
2091   return INT_RAX_REG_mask;
2092 }
2093 
2094 // Register for MODI projection of divmodI
2095 RegMask Matcher::modI_proj_mask() {
2096   return INT_RDX_REG_mask;
2097 }
2098 
2099 // Register for DIVL projection of divmodL
2100 RegMask Matcher::divL_proj_mask() {
2101   return LONG_RAX_REG_mask;
2102 }
2103 
2104 // Register for MODL projection of divmodL
2105 RegMask Matcher::modL_proj_mask() {
2106   return LONG_RDX_REG_mask;
2107 }
2108 
2109 static Address build_address(int b, int i, int s, int d) {
2110   Register index = as_Register(i);
2111   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2112   if (index == rsp) {
2113     index = noreg;
2114     scale = Address::no_scale;
2115   }
2116   Address addr(as_Register(b), index, scale, d);
2117   return addr;
2118 }
2119 
2120 %}
2121 
2122 //----------ENCODING BLOCK-----------------------------------------------------
2123 // This block specifies the encoding classes used by the compiler to
2124 // output byte streams.  Encoding classes are parameterized macros
2125 // used by Machine Instruction Nodes in order to generate the bit
2126 // encoding of the instruction.  Operands specify their base encoding
2127 // interface with the interface keyword.  There are currently
2128 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2129 // COND_INTER.  REG_INTER causes an operand to generate a function
2130 // which returns its register number when queried.  CONST_INTER causes
2131 // an operand to generate a function which returns the value of the
2132 // constant when queried.  MEMORY_INTER causes an operand to generate
2133 // four functions which return the Base Register, the Index Register,
2134 // the Scale Value, and the Offset Value of the operand when queried.
2135 // COND_INTER causes an operand to generate six functions which return
2136 // the encoding code (ie - encoding bits for the instruction)
2137 // associated with each basic boolean condition for a conditional
2138 // instruction.
2139 //
2140 // Instructions specify two basic values for encoding.  Again, a
2141 // function is available to check if the constant displacement is an
2142 // oop. They use the ins_encode keyword to specify their encoding
2143 // classes (which must be a sequence of enc_class names, and their
2144 // parameters, specified in the encoding block), and they use the
2145 // opcode keyword to specify, in order, their primary, secondary, and
2146 // tertiary opcode.  Only the opcode sections which a particular
2147 // instruction needs for encoding need to be specified.
2148 encode %{
2149   // Build emit functions for each basic byte or larger field in the
2150   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2151   // from C++ code in the enc_class source block.  Emit functions will
2152   // live in the main source block for now.  In future, we can
2153   // generalize this by adding a syntax that specifies the sizes of
2154   // fields in an order, so that the adlc can build the emit functions
2155   // automagically
2156 
2157   // Emit primary opcode
2158   enc_class OpcP
2159   %{
2160     emit_opcode(cbuf, $primary);
2161   %}
2162 
2163   // Emit secondary opcode
2164   enc_class OpcS
2165   %{
2166     emit_opcode(cbuf, $secondary);
2167   %}
2168 
2169   // Emit tertiary opcode
2170   enc_class OpcT
2171   %{
2172     emit_opcode(cbuf, $tertiary);
2173   %}
2174 
2175   // Emit opcode directly
2176   enc_class Opcode(immI d8)
2177   %{
2178     emit_opcode(cbuf, $d8$$constant);
2179   %}
2180 
2181   // Emit size prefix
2182   enc_class SizePrefix
2183   %{
2184     emit_opcode(cbuf, 0x66);
2185   %}
2186 
2187   enc_class reg(rRegI reg)
2188   %{
2189     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2190   %}
2191 
2192   enc_class reg_reg(rRegI dst, rRegI src)
2193   %{
2194     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2195   %}
2196 
2197   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2198   %{
2199     emit_opcode(cbuf, $opcode$$constant);
2200     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2201   %}
2202 
2203   enc_class cmpfp_fixup()
2204   %{
2205     // jnp,s exit
2206     emit_opcode(cbuf, 0x7B);
2207     emit_d8(cbuf, 0x0A);
2208 
2209     // pushfq
2210     emit_opcode(cbuf, 0x9C);
2211 
2212     // andq $0xffffff2b, (%rsp)
2213     emit_opcode(cbuf, Assembler::REX_W);
2214     emit_opcode(cbuf, 0x81);
2215     emit_opcode(cbuf, 0x24);
2216     emit_opcode(cbuf, 0x24);
2217     emit_d32(cbuf, 0xffffff2b);
2218 
2219     // popfq
2220     emit_opcode(cbuf, 0x9D);
2221 
2222     // nop (target for branch to avoid branch to branch)
2223     emit_opcode(cbuf, 0x90);
2224   %}
2225 
2226   enc_class cmpfp3(rRegI dst)
2227   %{
2228     int dstenc = $dst$$reg;
2229 
2230     // movl $dst, -1
2231     if (dstenc >= 8) {
2232       emit_opcode(cbuf, Assembler::REX_B);
2233     }
2234     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2235     emit_d32(cbuf, -1);
2236 
2237     // jp,s done
2238     emit_opcode(cbuf, 0x7A);
2239     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2240 
2241     // jb,s done
2242     emit_opcode(cbuf, 0x72);
2243     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2244 
2245     // setne $dst
2246     if (dstenc >= 4) {
2247       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2248     }
2249     emit_opcode(cbuf, 0x0F);
2250     emit_opcode(cbuf, 0x95);
2251     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2252 
2253     // movzbl $dst, $dst
2254     if (dstenc >= 4) {
2255       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2256     }
2257     emit_opcode(cbuf, 0x0F);
2258     emit_opcode(cbuf, 0xB6);
2259     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2260   %}
2261 
2262   enc_class cdql_enc(no_rax_rdx_RegI div)
2263   %{
2264     // Full implementation of Java idiv and irem; checks for
2265     // special case as described in JVM spec., p.243 & p.271.
2266     //
2267     //         normal case                           special case
2268     //
2269     // input : rax: dividend                         min_int
2270     //         reg: divisor                          -1
2271     //
2272     // output: rax: quotient  (= rax idiv reg)       min_int
2273     //         rdx: remainder (= rax irem reg)       0
2274     //
2275     //  Code sequnce:
2276     //
2277     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2278     //    5:   75 07/08                jne    e <normal>
2279     //    7:   33 d2                   xor    %edx,%edx
2280     //  [div >= 8 -> offset + 1]
2281     //  [REX_B]
2282     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2283     //    c:   74 03/04                je     11 <done>
2284     // 000000000000000e <normal>:
2285     //    e:   99                      cltd
2286     //  [div >= 8 -> offset + 1]
2287     //  [REX_B]
2288     //    f:   f7 f9                   idiv   $div
2289     // 0000000000000011 <done>:
2290 
2291     // cmp    $0x80000000,%eax
2292     emit_opcode(cbuf, 0x3d);
2293     emit_d8(cbuf, 0x00);
2294     emit_d8(cbuf, 0x00);
2295     emit_d8(cbuf, 0x00);
2296     emit_d8(cbuf, 0x80);
2297 
2298     // jne    e <normal>
2299     emit_opcode(cbuf, 0x75);
2300     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2301 
2302     // xor    %edx,%edx
2303     emit_opcode(cbuf, 0x33);
2304     emit_d8(cbuf, 0xD2);
2305 
2306     // cmp    $0xffffffffffffffff,%ecx
2307     if ($div$$reg >= 8) {
2308       emit_opcode(cbuf, Assembler::REX_B);
2309     }
2310     emit_opcode(cbuf, 0x83);
2311     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2312     emit_d8(cbuf, 0xFF);
2313 
2314     // je     11 <done>
2315     emit_opcode(cbuf, 0x74);
2316     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2317 
2318     // <normal>
2319     // cltd
2320     emit_opcode(cbuf, 0x99);
2321 
2322     // idivl (note: must be emitted by the user of this rule)
2323     // <done>
2324   %}
2325 
2326   enc_class cdqq_enc(no_rax_rdx_RegL div)
2327   %{
2328     // Full implementation of Java ldiv and lrem; checks for
2329     // special case as described in JVM spec., p.243 & p.271.
2330     //
2331     //         normal case                           special case
2332     //
2333     // input : rax: dividend                         min_long
2334     //         reg: divisor                          -1
2335     //
2336     // output: rax: quotient  (= rax idiv reg)       min_long
2337     //         rdx: remainder (= rax irem reg)       0
2338     //
2339     //  Code sequnce:
2340     //
2341     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2342     //    7:   00 00 80
2343     //    a:   48 39 d0                cmp    %rdx,%rax
2344     //    d:   75 08                   jne    17 <normal>
2345     //    f:   33 d2                   xor    %edx,%edx
2346     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2347     //   15:   74 05                   je     1c <done>
2348     // 0000000000000017 <normal>:
2349     //   17:   48 99                   cqto
2350     //   19:   48 f7 f9                idiv   $div
2351     // 000000000000001c <done>:
2352 
2353     // mov    $0x8000000000000000,%rdx
2354     emit_opcode(cbuf, Assembler::REX_W);
2355     emit_opcode(cbuf, 0xBA);
2356     emit_d8(cbuf, 0x00);
2357     emit_d8(cbuf, 0x00);
2358     emit_d8(cbuf, 0x00);
2359     emit_d8(cbuf, 0x00);
2360     emit_d8(cbuf, 0x00);
2361     emit_d8(cbuf, 0x00);
2362     emit_d8(cbuf, 0x00);
2363     emit_d8(cbuf, 0x80);
2364 
2365     // cmp    %rdx,%rax
2366     emit_opcode(cbuf, Assembler::REX_W);
2367     emit_opcode(cbuf, 0x39);
2368     emit_d8(cbuf, 0xD0);
2369 
2370     // jne    17 <normal>
2371     emit_opcode(cbuf, 0x75);
2372     emit_d8(cbuf, 0x08);
2373 
2374     // xor    %edx,%edx
2375     emit_opcode(cbuf, 0x33);
2376     emit_d8(cbuf, 0xD2);
2377 
2378     // cmp    $0xffffffffffffffff,$div
2379     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2380     emit_opcode(cbuf, 0x83);
2381     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2382     emit_d8(cbuf, 0xFF);
2383 
2384     // je     1e <done>
2385     emit_opcode(cbuf, 0x74);
2386     emit_d8(cbuf, 0x05);
2387 
2388     // <normal>
2389     // cqto
2390     emit_opcode(cbuf, Assembler::REX_W);
2391     emit_opcode(cbuf, 0x99);
2392 
2393     // idivq (note: must be emitted by the user of this rule)
2394     // <done>
2395   %}
2396 
2397   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2398   enc_class OpcSE(immI imm)
2399   %{
2400     // Emit primary opcode and set sign-extend bit
2401     // Check for 8-bit immediate, and set sign extend bit in opcode
2402     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2403       emit_opcode(cbuf, $primary | 0x02);
2404     } else {
2405       // 32-bit immediate
2406       emit_opcode(cbuf, $primary);
2407     }
2408   %}
2409 
2410   enc_class OpcSErm(rRegI dst, immI imm)
2411   %{
2412     // OpcSEr/m
2413     int dstenc = $dst$$reg;
2414     if (dstenc >= 8) {
2415       emit_opcode(cbuf, Assembler::REX_B);
2416       dstenc -= 8;
2417     }
2418     // Emit primary opcode and set sign-extend bit
2419     // Check for 8-bit immediate, and set sign extend bit in opcode
2420     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2421       emit_opcode(cbuf, $primary | 0x02);
2422     } else {
2423       // 32-bit immediate
2424       emit_opcode(cbuf, $primary);
2425     }
2426     // Emit r/m byte with secondary opcode, after primary opcode.
2427     emit_rm(cbuf, 0x3, $secondary, dstenc);
2428   %}
2429 
2430   enc_class OpcSErm_wide(rRegL dst, immI imm)
2431   %{
2432     // OpcSEr/m
2433     int dstenc = $dst$$reg;
2434     if (dstenc < 8) {
2435       emit_opcode(cbuf, Assembler::REX_W);
2436     } else {
2437       emit_opcode(cbuf, Assembler::REX_WB);
2438       dstenc -= 8;
2439     }
2440     // Emit primary opcode and set sign-extend bit
2441     // Check for 8-bit immediate, and set sign extend bit in opcode
2442     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2443       emit_opcode(cbuf, $primary | 0x02);
2444     } else {
2445       // 32-bit immediate
2446       emit_opcode(cbuf, $primary);
2447     }
2448     // Emit r/m byte with secondary opcode, after primary opcode.
2449     emit_rm(cbuf, 0x3, $secondary, dstenc);
2450   %}
2451 
2452   enc_class Con8or32(immI imm)
2453   %{
2454     // Check for 8-bit immediate, and set sign extend bit in opcode
2455     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2456       $$$emit8$imm$$constant;
2457     } else {
2458       // 32-bit immediate
2459       $$$emit32$imm$$constant;
2460     }
2461   %}
2462 
2463   enc_class Lbl(label labl)
2464   %{
2465     // JMP, CALL
2466     Label* l = $labl$$label;
2467     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2468   %}
2469 
2470   enc_class LblShort(label labl)
2471   %{
2472     // JMP, CALL
2473     Label* l = $labl$$label;
2474     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2475     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2476     emit_d8(cbuf, disp);
2477   %}
2478 
2479   enc_class opc2_reg(rRegI dst)
2480   %{
2481     // BSWAP
2482     emit_cc(cbuf, $secondary, $dst$$reg);
2483   %}
2484 
2485   enc_class opc3_reg(rRegI dst)
2486   %{
2487     // BSWAP
2488     emit_cc(cbuf, $tertiary, $dst$$reg);
2489   %}
2490 
2491   enc_class reg_opc(rRegI div)
2492   %{
2493     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2494     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2495   %}
2496 
2497   enc_class Jcc(cmpOp cop, label labl)
2498   %{
2499     // JCC
2500     Label* l = $labl$$label;
2501     $$$emit8$primary;
2502     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2503     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2504   %}
2505 
2506   enc_class JccShort (cmpOp cop, label labl)
2507   %{
2508   // JCC
2509     Label *l = $labl$$label;
2510     emit_cc(cbuf, $primary, $cop$$cmpcode);
2511     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2512     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2513     emit_d8(cbuf, disp);
2514   %}
2515 
2516   enc_class enc_cmov(cmpOp cop)
2517   %{
2518     // CMOV
2519     $$$emit8$primary;
2520     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2521   %}
2522 
2523   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2524   %{
2525     // Invert sense of branch from sense of cmov
2526     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2527     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2528                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2529                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2530     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2531     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2532     if ($dst$$reg < 8) {
2533       if ($src$$reg >= 8) {
2534         emit_opcode(cbuf, Assembler::REX_B);
2535       }
2536     } else {
2537       if ($src$$reg < 8) {
2538         emit_opcode(cbuf, Assembler::REX_R);
2539       } else {
2540         emit_opcode(cbuf, Assembler::REX_RB);
2541       }
2542     }
2543     emit_opcode(cbuf, 0x0F);
2544     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2545     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2546   %}
2547 
2548   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2549   %{
2550     // Invert sense of branch from sense of cmov
2551     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2552     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2553 
2554     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2555     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2556     if ($dst$$reg < 8) {
2557       if ($src$$reg >= 8) {
2558         emit_opcode(cbuf, Assembler::REX_B);
2559       }
2560     } else {
2561       if ($src$$reg < 8) {
2562         emit_opcode(cbuf, Assembler::REX_R);
2563       } else {
2564         emit_opcode(cbuf, Assembler::REX_RB);
2565       }
2566     }
2567     emit_opcode(cbuf, 0x0F);
2568     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2569     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2570   %}
2571 
2572   enc_class enc_PartialSubtypeCheck()
2573   %{
2574     Register Rrdi = as_Register(RDI_enc); // result register
2575     Register Rrax = as_Register(RAX_enc); // super class
2576     Register Rrcx = as_Register(RCX_enc); // killed
2577     Register Rrsi = as_Register(RSI_enc); // sub class
2578     Label miss;
2579     const bool set_cond_codes = true;
2580 
2581     MacroAssembler _masm(&cbuf);
2582     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2583                                      NULL, &miss,
2584                                      /*set_cond_codes:*/ true);
2585     if ($primary) {
2586       __ xorptr(Rrdi, Rrdi);
2587     }
2588     __ bind(miss);
2589   %}
2590 
2591   enc_class Java_To_Interpreter(method meth)
2592   %{
2593     // CALL Java_To_Interpreter
2594     // This is the instruction starting address for relocation info.
2595     cbuf.set_inst_mark();
2596     $$$emit8$primary;
2597     // CALL directly to the runtime
2598     emit_d32_reloc(cbuf,
2599                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2600                    runtime_call_Relocation::spec(),
2601                    RELOC_DISP32);
2602   %}
2603 
2604   enc_class Java_Static_Call(method meth)
2605   %{
2606     // JAVA STATIC CALL
2607     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2608     // determine who we intended to call.
2609     cbuf.set_inst_mark();
2610     $$$emit8$primary;
2611 
2612     if (!_method) {
2613       emit_d32_reloc(cbuf,
2614                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2615                      runtime_call_Relocation::spec(),
2616                      RELOC_DISP32);
2617     } else if (_optimized_virtual) {
2618       emit_d32_reloc(cbuf,
2619                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2620                      opt_virtual_call_Relocation::spec(),
2621                      RELOC_DISP32);
2622     } else {
2623       emit_d32_reloc(cbuf,
2624                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2625                      static_call_Relocation::spec(),
2626                      RELOC_DISP32);
2627     }
2628     if (_method) {
2629       // Emit stub for static call
2630       emit_java_to_interp(cbuf);
2631     }
2632   %}
2633 
2634   enc_class Java_Dynamic_Call(method meth)
2635   %{
2636     // JAVA DYNAMIC CALL
2637     // !!!!!
2638     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2639     // emit_call_dynamic_prologue( cbuf );
2640     cbuf.set_inst_mark();
2641 
2642     // movq rax, -1
2643     emit_opcode(cbuf, Assembler::REX_W);
2644     emit_opcode(cbuf, 0xB8 | RAX_enc);
2645     emit_d64_reloc(cbuf,
2646                    (int64_t) Universe::non_oop_word(),
2647                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2648     address virtual_call_oop_addr = cbuf.inst_mark();
2649     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2650     // who we intended to call.
2651     cbuf.set_inst_mark();
2652     $$$emit8$primary;
2653     emit_d32_reloc(cbuf,
2654                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2655                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2656                    RELOC_DISP32);
2657   %}
2658 
2659   enc_class Java_Compiled_Call(method meth)
2660   %{
2661     // JAVA COMPILED CALL
2662     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2663 
2664     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2665     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2666 
2667     // callq *disp(%rax)
2668     cbuf.set_inst_mark();
2669     $$$emit8$primary;
2670     if (disp < 0x80) {
2671       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2672       emit_d8(cbuf, disp); // Displacement
2673     } else {
2674       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2675       emit_d32(cbuf, disp); // Displacement
2676     }
2677   %}
2678 
2679   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2680   %{
2681     // SAL, SAR, SHR
2682     int dstenc = $dst$$reg;
2683     if (dstenc >= 8) {
2684       emit_opcode(cbuf, Assembler::REX_B);
2685       dstenc -= 8;
2686     }
2687     $$$emit8$primary;
2688     emit_rm(cbuf, 0x3, $secondary, dstenc);
2689     $$$emit8$shift$$constant;
2690   %}
2691 
2692   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2693   %{
2694     // SAL, SAR, SHR
2695     int dstenc = $dst$$reg;
2696     if (dstenc < 8) {
2697       emit_opcode(cbuf, Assembler::REX_W);
2698     } else {
2699       emit_opcode(cbuf, Assembler::REX_WB);
2700       dstenc -= 8;
2701     }
2702     $$$emit8$primary;
2703     emit_rm(cbuf, 0x3, $secondary, dstenc);
2704     $$$emit8$shift$$constant;
2705   %}
2706 
2707   enc_class load_immI(rRegI dst, immI src)
2708   %{
2709     int dstenc = $dst$$reg;
2710     if (dstenc >= 8) {
2711       emit_opcode(cbuf, Assembler::REX_B);
2712       dstenc -= 8;
2713     }
2714     emit_opcode(cbuf, 0xB8 | dstenc);
2715     $$$emit32$src$$constant;
2716   %}
2717 
2718   enc_class load_immL(rRegL dst, immL src)
2719   %{
2720     int dstenc = $dst$$reg;
2721     if (dstenc < 8) {
2722       emit_opcode(cbuf, Assembler::REX_W);
2723     } else {
2724       emit_opcode(cbuf, Assembler::REX_WB);
2725       dstenc -= 8;
2726     }
2727     emit_opcode(cbuf, 0xB8 | dstenc);
2728     emit_d64(cbuf, $src$$constant);
2729   %}
2730 
2731   enc_class load_immUL32(rRegL dst, immUL32 src)
2732   %{
2733     // same as load_immI, but this time we care about zeroes in the high word
2734     int dstenc = $dst$$reg;
2735     if (dstenc >= 8) {
2736       emit_opcode(cbuf, Assembler::REX_B);
2737       dstenc -= 8;
2738     }
2739     emit_opcode(cbuf, 0xB8 | dstenc);
2740     $$$emit32$src$$constant;
2741   %}
2742 
2743   enc_class load_immL32(rRegL dst, immL32 src)
2744   %{
2745     int dstenc = $dst$$reg;
2746     if (dstenc < 8) {
2747       emit_opcode(cbuf, Assembler::REX_W);
2748     } else {
2749       emit_opcode(cbuf, Assembler::REX_WB);
2750       dstenc -= 8;
2751     }
2752     emit_opcode(cbuf, 0xC7);
2753     emit_rm(cbuf, 0x03, 0x00, dstenc);
2754     $$$emit32$src$$constant;
2755   %}
2756 
2757   enc_class load_immP31(rRegP dst, immP32 src)
2758   %{
2759     // same as load_immI, but this time we care about zeroes in the high word
2760     int dstenc = $dst$$reg;
2761     if (dstenc >= 8) {
2762       emit_opcode(cbuf, Assembler::REX_B);
2763       dstenc -= 8;
2764     }
2765     emit_opcode(cbuf, 0xB8 | dstenc);
2766     $$$emit32$src$$constant;
2767   %}
2768 
2769   enc_class load_immP(rRegP dst, immP src)
2770   %{
2771     int dstenc = $dst$$reg;
2772     if (dstenc < 8) {
2773       emit_opcode(cbuf, Assembler::REX_W);
2774     } else {
2775       emit_opcode(cbuf, Assembler::REX_WB);
2776       dstenc -= 8;
2777     }
2778     emit_opcode(cbuf, 0xB8 | dstenc);
2779     // This next line should be generated from ADLC
2780     if ($src->constant_is_oop()) {
2781       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2782     } else {
2783       emit_d64(cbuf, $src$$constant);
2784     }
2785   %}
2786 
2787   enc_class load_immF(regF dst, immF con)
2788   %{
2789     // XXX reg_mem doesn't support RIP-relative addressing yet
2790     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2791     emit_float_constant(cbuf, $con$$constant);
2792   %}
2793 
2794   enc_class load_immD(regD dst, immD con)
2795   %{
2796     // XXX reg_mem doesn't support RIP-relative addressing yet
2797     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2798     emit_double_constant(cbuf, $con$$constant);
2799   %}
2800 
2801   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2802     emit_opcode(cbuf, 0xF3);
2803     if ($dst$$reg >= 8) {
2804       emit_opcode(cbuf, Assembler::REX_R);
2805     }
2806     emit_opcode(cbuf, 0x0F);
2807     emit_opcode(cbuf, 0x10);
2808     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2809     emit_float_constant(cbuf, $con$$constant);
2810   %}
2811 
2812   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2813     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2814     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2815     if ($dst$$reg >= 8) {
2816       emit_opcode(cbuf, Assembler::REX_R);
2817     }
2818     emit_opcode(cbuf, 0x0F);
2819     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2820     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2821     emit_double_constant(cbuf, $con$$constant);
2822   %}
2823 
2824   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2825   enc_class enc_copy(rRegI dst, rRegI src)
2826   %{
2827     encode_copy(cbuf, $dst$$reg, $src$$reg);
2828   %}
2829 
2830   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2831   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2832     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2833   %}
2834 
2835   enc_class enc_copy_always(rRegI dst, rRegI src)
2836   %{
2837     int srcenc = $src$$reg;
2838     int dstenc = $dst$$reg;
2839 
2840     if (dstenc < 8) {
2841       if (srcenc >= 8) {
2842         emit_opcode(cbuf, Assembler::REX_B);
2843         srcenc -= 8;
2844       }
2845     } else {
2846       if (srcenc < 8) {
2847         emit_opcode(cbuf, Assembler::REX_R);
2848       } else {
2849         emit_opcode(cbuf, Assembler::REX_RB);
2850         srcenc -= 8;
2851       }
2852       dstenc -= 8;
2853     }
2854 
2855     emit_opcode(cbuf, 0x8B);
2856     emit_rm(cbuf, 0x3, dstenc, srcenc);
2857   %}
2858 
2859   enc_class enc_copy_wide(rRegL dst, rRegL src)
2860   %{
2861     int srcenc = $src$$reg;
2862     int dstenc = $dst$$reg;
2863 
2864     if (dstenc != srcenc) {
2865       if (dstenc < 8) {
2866         if (srcenc < 8) {
2867           emit_opcode(cbuf, Assembler::REX_W);
2868         } else {
2869           emit_opcode(cbuf, Assembler::REX_WB);
2870           srcenc -= 8;
2871         }
2872       } else {
2873         if (srcenc < 8) {
2874           emit_opcode(cbuf, Assembler::REX_WR);
2875         } else {
2876           emit_opcode(cbuf, Assembler::REX_WRB);
2877           srcenc -= 8;
2878         }
2879         dstenc -= 8;
2880       }
2881       emit_opcode(cbuf, 0x8B);
2882       emit_rm(cbuf, 0x3, dstenc, srcenc);
2883     }
2884   %}
2885 
2886   enc_class Con32(immI src)
2887   %{
2888     // Output immediate
2889     $$$emit32$src$$constant;
2890   %}
2891 
2892   enc_class Con64(immL src)
2893   %{
2894     // Output immediate
2895     emit_d64($src$$constant);
2896   %}
2897 
2898   enc_class Con32F_as_bits(immF src)
2899   %{
2900     // Output Float immediate bits
2901     jfloat jf = $src$$constant;
2902     jint jf_as_bits = jint_cast(jf);
2903     emit_d32(cbuf, jf_as_bits);
2904   %}
2905 
2906   enc_class Con16(immI src)
2907   %{
2908     // Output immediate
2909     $$$emit16$src$$constant;
2910   %}
2911 
2912   // How is this different from Con32??? XXX
2913   enc_class Con_d32(immI src)
2914   %{
2915     emit_d32(cbuf,$src$$constant);
2916   %}
2917 
2918   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2919     // Output immediate memory reference
2920     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2921     emit_d32(cbuf, 0x00);
2922   %}
2923 
2924   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2925     MacroAssembler masm(&cbuf);
2926 
2927     Register switch_reg = as_Register($switch_val$$reg);
2928     Register dest_reg   = as_Register($dest$$reg);
2929     address table_base  = masm.address_table_constant(_index2label);
2930 
2931     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2932     // to do that and the compiler is using that register as one it can allocate.
2933     // So we build it all by hand.
2934     // Address index(noreg, switch_reg, Address::times_1);
2935     // ArrayAddress dispatch(table, index);
2936 
2937     Address dispatch(dest_reg, switch_reg, Address::times_1);
2938 
2939     masm.lea(dest_reg, InternalAddress(table_base));
2940     masm.jmp(dispatch);
2941   %}
2942 
2943   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2944     MacroAssembler masm(&cbuf);
2945 
2946     Register switch_reg = as_Register($switch_val$$reg);
2947     Register dest_reg   = as_Register($dest$$reg);
2948     address table_base  = masm.address_table_constant(_index2label);
2949 
2950     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2951     // to do that and the compiler is using that register as one it can allocate.
2952     // So we build it all by hand.
2953     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2954     // ArrayAddress dispatch(table, index);
2955 
2956     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2957 
2958     masm.lea(dest_reg, InternalAddress(table_base));
2959     masm.jmp(dispatch);
2960   %}
2961 
2962   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2963     MacroAssembler masm(&cbuf);
2964 
2965     Register switch_reg = as_Register($switch_val$$reg);
2966     Register dest_reg   = as_Register($dest$$reg);
2967     address table_base  = masm.address_table_constant(_index2label);
2968 
2969     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2970     // to do that and the compiler is using that register as one it can allocate.
2971     // So we build it all by hand.
2972     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2973     // ArrayAddress dispatch(table, index);
2974 
2975     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2976     masm.lea(dest_reg, InternalAddress(table_base));
2977     masm.jmp(dispatch);
2978 
2979   %}
2980 
2981   enc_class lock_prefix()
2982   %{
2983     if (os::is_MP()) {
2984       emit_opcode(cbuf, 0xF0); // lock
2985     }
2986   %}
2987 
2988   enc_class REX_mem(memory mem)
2989   %{
2990     if ($mem$$base >= 8) {
2991       if ($mem$$index < 8) {
2992         emit_opcode(cbuf, Assembler::REX_B);
2993       } else {
2994         emit_opcode(cbuf, Assembler::REX_XB);
2995       }
2996     } else {
2997       if ($mem$$index >= 8) {
2998         emit_opcode(cbuf, Assembler::REX_X);
2999       }
3000     }
3001   %}
3002 
3003   enc_class REX_mem_wide(memory mem)
3004   %{
3005     if ($mem$$base >= 8) {
3006       if ($mem$$index < 8) {
3007         emit_opcode(cbuf, Assembler::REX_WB);
3008       } else {
3009         emit_opcode(cbuf, Assembler::REX_WXB);
3010       }
3011     } else {
3012       if ($mem$$index < 8) {
3013         emit_opcode(cbuf, Assembler::REX_W);
3014       } else {
3015         emit_opcode(cbuf, Assembler::REX_WX);
3016       }
3017     }
3018   %}
3019 
3020   // for byte regs
3021   enc_class REX_breg(rRegI reg)
3022   %{
3023     if ($reg$$reg >= 4) {
3024       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3025     }
3026   %}
3027 
3028   // for byte regs
3029   enc_class REX_reg_breg(rRegI dst, rRegI src)
3030   %{
3031     if ($dst$$reg < 8) {
3032       if ($src$$reg >= 4) {
3033         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3034       }
3035     } else {
3036       if ($src$$reg < 8) {
3037         emit_opcode(cbuf, Assembler::REX_R);
3038       } else {
3039         emit_opcode(cbuf, Assembler::REX_RB);
3040       }
3041     }
3042   %}
3043 
3044   // for byte regs
3045   enc_class REX_breg_mem(rRegI reg, memory mem)
3046   %{
3047     if ($reg$$reg < 8) {
3048       if ($mem$$base < 8) {
3049         if ($mem$$index >= 8) {
3050           emit_opcode(cbuf, Assembler::REX_X);
3051         } else if ($reg$$reg >= 4) {
3052           emit_opcode(cbuf, Assembler::REX);
3053         }
3054       } else {
3055         if ($mem$$index < 8) {
3056           emit_opcode(cbuf, Assembler::REX_B);
3057         } else {
3058           emit_opcode(cbuf, Assembler::REX_XB);
3059         }
3060       }
3061     } else {
3062       if ($mem$$base < 8) {
3063         if ($mem$$index < 8) {
3064           emit_opcode(cbuf, Assembler::REX_R);
3065         } else {
3066           emit_opcode(cbuf, Assembler::REX_RX);
3067         }
3068       } else {
3069         if ($mem$$index < 8) {
3070           emit_opcode(cbuf, Assembler::REX_RB);
3071         } else {
3072           emit_opcode(cbuf, Assembler::REX_RXB);
3073         }
3074       }
3075     }
3076   %}
3077 
3078   enc_class REX_reg(rRegI reg)
3079   %{
3080     if ($reg$$reg >= 8) {
3081       emit_opcode(cbuf, Assembler::REX_B);
3082     }
3083   %}
3084 
3085   enc_class REX_reg_wide(rRegI reg)
3086   %{
3087     if ($reg$$reg < 8) {
3088       emit_opcode(cbuf, Assembler::REX_W);
3089     } else {
3090       emit_opcode(cbuf, Assembler::REX_WB);
3091     }
3092   %}
3093 
3094   enc_class REX_reg_reg(rRegI dst, rRegI src)
3095   %{
3096     if ($dst$$reg < 8) {
3097       if ($src$$reg >= 8) {
3098         emit_opcode(cbuf, Assembler::REX_B);
3099       }
3100     } else {
3101       if ($src$$reg < 8) {
3102         emit_opcode(cbuf, Assembler::REX_R);
3103       } else {
3104         emit_opcode(cbuf, Assembler::REX_RB);
3105       }
3106     }
3107   %}
3108 
3109   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3110   %{
3111     if ($dst$$reg < 8) {
3112       if ($src$$reg < 8) {
3113         emit_opcode(cbuf, Assembler::REX_W);
3114       } else {
3115         emit_opcode(cbuf, Assembler::REX_WB);
3116       }
3117     } else {
3118       if ($src$$reg < 8) {
3119         emit_opcode(cbuf, Assembler::REX_WR);
3120       } else {
3121         emit_opcode(cbuf, Assembler::REX_WRB);
3122       }
3123     }
3124   %}
3125 
3126   enc_class REX_reg_mem(rRegI reg, memory mem)
3127   %{
3128     if ($reg$$reg < 8) {
3129       if ($mem$$base < 8) {
3130         if ($mem$$index >= 8) {
3131           emit_opcode(cbuf, Assembler::REX_X);
3132         }
3133       } else {
3134         if ($mem$$index < 8) {
3135           emit_opcode(cbuf, Assembler::REX_B);
3136         } else {
3137           emit_opcode(cbuf, Assembler::REX_XB);
3138         }
3139       }
3140     } else {
3141       if ($mem$$base < 8) {
3142         if ($mem$$index < 8) {
3143           emit_opcode(cbuf, Assembler::REX_R);
3144         } else {
3145           emit_opcode(cbuf, Assembler::REX_RX);
3146         }
3147       } else {
3148         if ($mem$$index < 8) {
3149           emit_opcode(cbuf, Assembler::REX_RB);
3150         } else {
3151           emit_opcode(cbuf, Assembler::REX_RXB);
3152         }
3153       }
3154     }
3155   %}
3156 
3157   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3158   %{
3159     if ($reg$$reg < 8) {
3160       if ($mem$$base < 8) {
3161         if ($mem$$index < 8) {
3162           emit_opcode(cbuf, Assembler::REX_W);
3163         } else {
3164           emit_opcode(cbuf, Assembler::REX_WX);
3165         }
3166       } else {
3167         if ($mem$$index < 8) {
3168           emit_opcode(cbuf, Assembler::REX_WB);
3169         } else {
3170           emit_opcode(cbuf, Assembler::REX_WXB);
3171         }
3172       }
3173     } else {
3174       if ($mem$$base < 8) {
3175         if ($mem$$index < 8) {
3176           emit_opcode(cbuf, Assembler::REX_WR);
3177         } else {
3178           emit_opcode(cbuf, Assembler::REX_WRX);
3179         }
3180       } else {
3181         if ($mem$$index < 8) {
3182           emit_opcode(cbuf, Assembler::REX_WRB);
3183         } else {
3184           emit_opcode(cbuf, Assembler::REX_WRXB);
3185         }
3186       }
3187     }
3188   %}
3189 
3190   enc_class reg_mem(rRegI ereg, memory mem)
3191   %{
3192     // High registers handle in encode_RegMem
3193     int reg = $ereg$$reg;
3194     int base = $mem$$base;
3195     int index = $mem$$index;
3196     int scale = $mem$$scale;
3197     int disp = $mem$$disp;
3198     bool disp_is_oop = $mem->disp_is_oop();
3199 
3200     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3201   %}
3202 
3203   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3204   %{
3205     int rm_byte_opcode = $rm_opcode$$constant;
3206 
3207     // High registers handle in encode_RegMem
3208     int base = $mem$$base;
3209     int index = $mem$$index;
3210     int scale = $mem$$scale;
3211     int displace = $mem$$disp;
3212 
3213     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3214                                             // working with static
3215                                             // globals
3216     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3217                   disp_is_oop);
3218   %}
3219 
3220   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3221   %{
3222     int reg_encoding = $dst$$reg;
3223     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3224     int index        = 0x04;            // 0x04 indicates no index
3225     int scale        = 0x00;            // 0x00 indicates no scale
3226     int displace     = $src1$$constant; // 0x00 indicates no displacement
3227     bool disp_is_oop = false;
3228     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3229                   disp_is_oop);
3230   %}
3231 
3232   enc_class neg_reg(rRegI dst)
3233   %{
3234     int dstenc = $dst$$reg;
3235     if (dstenc >= 8) {
3236       emit_opcode(cbuf, Assembler::REX_B);
3237       dstenc -= 8;
3238     }
3239     // NEG $dst
3240     emit_opcode(cbuf, 0xF7);
3241     emit_rm(cbuf, 0x3, 0x03, dstenc);
3242   %}
3243 
3244   enc_class neg_reg_wide(rRegI dst)
3245   %{
3246     int dstenc = $dst$$reg;
3247     if (dstenc < 8) {
3248       emit_opcode(cbuf, Assembler::REX_W);
3249     } else {
3250       emit_opcode(cbuf, Assembler::REX_WB);
3251       dstenc -= 8;
3252     }
3253     // NEG $dst
3254     emit_opcode(cbuf, 0xF7);
3255     emit_rm(cbuf, 0x3, 0x03, dstenc);
3256   %}
3257 
3258   enc_class setLT_reg(rRegI dst)
3259   %{
3260     int dstenc = $dst$$reg;
3261     if (dstenc >= 8) {
3262       emit_opcode(cbuf, Assembler::REX_B);
3263       dstenc -= 8;
3264     } else if (dstenc >= 4) {
3265       emit_opcode(cbuf, Assembler::REX);
3266     }
3267     // SETLT $dst
3268     emit_opcode(cbuf, 0x0F);
3269     emit_opcode(cbuf, 0x9C);
3270     emit_rm(cbuf, 0x3, 0x0, dstenc);
3271   %}
3272 
3273   enc_class setNZ_reg(rRegI dst)
3274   %{
3275     int dstenc = $dst$$reg;
3276     if (dstenc >= 8) {
3277       emit_opcode(cbuf, Assembler::REX_B);
3278       dstenc -= 8;
3279     } else if (dstenc >= 4) {
3280       emit_opcode(cbuf, Assembler::REX);
3281     }
3282     // SETNZ $dst
3283     emit_opcode(cbuf, 0x0F);
3284     emit_opcode(cbuf, 0x95);
3285     emit_rm(cbuf, 0x3, 0x0, dstenc);
3286   %}
3287 
3288   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3289                        rcx_RegI tmp)
3290   %{
3291     // cadd_cmpLT
3292 
3293     int tmpReg = $tmp$$reg;
3294 
3295     int penc = $p$$reg;
3296     int qenc = $q$$reg;
3297     int yenc = $y$$reg;
3298 
3299     // subl $p,$q
3300     if (penc < 8) {
3301       if (qenc >= 8) {
3302         emit_opcode(cbuf, Assembler::REX_B);
3303       }
3304     } else {
3305       if (qenc < 8) {
3306         emit_opcode(cbuf, Assembler::REX_R);
3307       } else {
3308         emit_opcode(cbuf, Assembler::REX_RB);
3309       }
3310     }
3311     emit_opcode(cbuf, 0x2B);
3312     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3313 
3314     // sbbl $tmp, $tmp
3315     emit_opcode(cbuf, 0x1B);
3316     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3317 
3318     // andl $tmp, $y
3319     if (yenc >= 8) {
3320       emit_opcode(cbuf, Assembler::REX_B);
3321     }
3322     emit_opcode(cbuf, 0x23);
3323     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3324 
3325     // addl $p,$tmp
3326     if (penc >= 8) {
3327         emit_opcode(cbuf, Assembler::REX_R);
3328     }
3329     emit_opcode(cbuf, 0x03);
3330     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3331   %}
3332 
3333   // Compare the lonogs and set -1, 0, or 1 into dst
3334   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3335   %{
3336     int src1enc = $src1$$reg;
3337     int src2enc = $src2$$reg;
3338     int dstenc = $dst$$reg;
3339 
3340     // cmpq $src1, $src2
3341     if (src1enc < 8) {
3342       if (src2enc < 8) {
3343         emit_opcode(cbuf, Assembler::REX_W);
3344       } else {
3345         emit_opcode(cbuf, Assembler::REX_WB);
3346       }
3347     } else {
3348       if (src2enc < 8) {
3349         emit_opcode(cbuf, Assembler::REX_WR);
3350       } else {
3351         emit_opcode(cbuf, Assembler::REX_WRB);
3352       }
3353     }
3354     emit_opcode(cbuf, 0x3B);
3355     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3356 
3357     // movl $dst, -1
3358     if (dstenc >= 8) {
3359       emit_opcode(cbuf, Assembler::REX_B);
3360     }
3361     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3362     emit_d32(cbuf, -1);
3363 
3364     // jl,s done
3365     emit_opcode(cbuf, 0x7C);
3366     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3367 
3368     // setne $dst
3369     if (dstenc >= 4) {
3370       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3371     }
3372     emit_opcode(cbuf, 0x0F);
3373     emit_opcode(cbuf, 0x95);
3374     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3375 
3376     // movzbl $dst, $dst
3377     if (dstenc >= 4) {
3378       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3379     }
3380     emit_opcode(cbuf, 0x0F);
3381     emit_opcode(cbuf, 0xB6);
3382     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3383   %}
3384 
3385   enc_class Push_ResultXD(regD dst) %{
3386     int dstenc = $dst$$reg;
3387 
3388     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3389 
3390     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3391     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3392     if (dstenc >= 8) {
3393       emit_opcode(cbuf, Assembler::REX_R);
3394     }
3395     emit_opcode  (cbuf, 0x0F );
3396     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3397     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3398 
3399     // add rsp,8
3400     emit_opcode(cbuf, Assembler::REX_W);
3401     emit_opcode(cbuf,0x83);
3402     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3403     emit_d8(cbuf,0x08);
3404   %}
3405 
3406   enc_class Push_SrcXD(regD src) %{
3407     int srcenc = $src$$reg;
3408 
3409     // subq rsp,#8
3410     emit_opcode(cbuf, Assembler::REX_W);
3411     emit_opcode(cbuf, 0x83);
3412     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3413     emit_d8(cbuf, 0x8);
3414 
3415     // movsd [rsp],src
3416     emit_opcode(cbuf, 0xF2);
3417     if (srcenc >= 8) {
3418       emit_opcode(cbuf, Assembler::REX_R);
3419     }
3420     emit_opcode(cbuf, 0x0F);
3421     emit_opcode(cbuf, 0x11);
3422     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3423 
3424     // fldd [rsp]
3425     emit_opcode(cbuf, 0x66);
3426     emit_opcode(cbuf, 0xDD);
3427     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3428   %}
3429 
3430 
3431   enc_class movq_ld(regD dst, memory mem) %{
3432     MacroAssembler _masm(&cbuf);
3433     __ movq($dst$$XMMRegister, $mem$$Address);
3434   %}
3435 
3436   enc_class movq_st(memory mem, regD src) %{
3437     MacroAssembler _masm(&cbuf);
3438     __ movq($mem$$Address, $src$$XMMRegister);
3439   %}
3440 
3441   enc_class pshufd_8x8(regF dst, regF src) %{
3442     MacroAssembler _masm(&cbuf);
3443 
3444     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3445     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3446     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3447   %}
3448 
3449   enc_class pshufd_4x16(regF dst, regF src) %{
3450     MacroAssembler _masm(&cbuf);
3451 
3452     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3453   %}
3454 
3455   enc_class pshufd(regD dst, regD src, int mode) %{
3456     MacroAssembler _masm(&cbuf);
3457 
3458     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3459   %}
3460 
3461   enc_class pxor(regD dst, regD src) %{
3462     MacroAssembler _masm(&cbuf);
3463 
3464     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3465   %}
3466 
3467   enc_class mov_i2x(regD dst, rRegI src) %{
3468     MacroAssembler _masm(&cbuf);
3469 
3470     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3471   %}
3472 
3473   // obj: object to lock
3474   // box: box address (header location) -- killed
3475   // tmp: rax -- killed
3476   // scr: rbx -- killed
3477   //
3478   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3479   // from i486.ad.  See that file for comments.
3480   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3481   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3482 
3483 
3484   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3485   %{
3486     Register objReg = as_Register((int)$obj$$reg);
3487     Register boxReg = as_Register((int)$box$$reg);
3488     Register tmpReg = as_Register($tmp$$reg);
3489     Register scrReg = as_Register($scr$$reg);
3490     MacroAssembler masm(&cbuf);
3491 
3492     // Verify uniqueness of register assignments -- necessary but not sufficient
3493     assert (objReg != boxReg && objReg != tmpReg &&
3494             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3495 
3496     if (_counters != NULL) {
3497       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3498     }
3499     if (EmitSync & 1) {
3500         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3501         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3502         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3503     } else
3504     if (EmitSync & 2) {
3505         Label DONE_LABEL;
3506         if (UseBiasedLocking) {
3507            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3508           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3509         }
3510         // QQQ was movl...
3511         masm.movptr(tmpReg, 0x1);
3512         masm.orptr(tmpReg, Address(objReg, 0));
3513         masm.movptr(Address(boxReg, 0), tmpReg);
3514         if (os::is_MP()) {
3515           masm.lock();
3516         }
3517         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3518         masm.jcc(Assembler::equal, DONE_LABEL);
3519 
3520         // Recursive locking
3521         masm.subptr(tmpReg, rsp);
3522         masm.andptr(tmpReg, 7 - os::vm_page_size());
3523         masm.movptr(Address(boxReg, 0), tmpReg);
3524 
3525         masm.bind(DONE_LABEL);
3526         masm.nop(); // avoid branch to branch
3527     } else {
3528         Label DONE_LABEL, IsInflated, Egress;
3529 
3530         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3531         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3532         masm.jcc   (Assembler::notZero, IsInflated) ; 
3533          
3534         // it's stack-locked, biased or neutral
3535         // TODO: optimize markword triage order to reduce the number of
3536         // conditional branches in the most common cases.
3537         // Beware -- there's a subtle invariant that fetch of the markword
3538         // at [FETCH], below, will never observe a biased encoding (*101b).
3539         // If this invariant is not held we'll suffer exclusion (safety) failure.
3540 
3541         if (UseBiasedLocking && !UseOptoBiasInlining) {
3542           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3543           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3544         }
3545 
3546         // was q will it destroy high?
3547         masm.orl   (tmpReg, 1) ; 
3548         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3549         if (os::is_MP()) { masm.lock(); } 
3550         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3551         if (_counters != NULL) {
3552            masm.cond_inc32(Assembler::equal,
3553                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3554         }
3555         masm.jcc   (Assembler::equal, DONE_LABEL);
3556 
3557         // Recursive locking
3558         masm.subptr(tmpReg, rsp);
3559         masm.andptr(tmpReg, 7 - os::vm_page_size());
3560         masm.movptr(Address(boxReg, 0), tmpReg);
3561         if (_counters != NULL) {
3562            masm.cond_inc32(Assembler::equal,
3563                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3564         }
3565         masm.jmp   (DONE_LABEL) ;
3566 
3567         masm.bind  (IsInflated) ;
3568         // It's inflated
3569 
3570         // TODO: someday avoid the ST-before-CAS penalty by
3571         // relocating (deferring) the following ST.
3572         // We should also think about trying a CAS without having
3573         // fetched _owner.  If the CAS is successful we may
3574         // avoid an RTO->RTS upgrade on the $line.
3575         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3576         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3577 
3578         masm.mov    (boxReg, tmpReg) ; 
3579         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3580         masm.testptr(tmpReg, tmpReg) ;   
3581         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3582 
3583         // It's inflated and appears unlocked
3584         if (os::is_MP()) { masm.lock(); } 
3585         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3586         // Intentional fall-through into DONE_LABEL ...
3587 
3588         masm.bind  (DONE_LABEL) ;
3589         masm.nop   () ;                 // avoid jmp to jmp
3590     }
3591   %}
3592 
3593   // obj: object to unlock
3594   // box: box address (displaced header location), killed
3595   // RBX: killed tmp; cannot be obj nor box
3596   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3597   %{
3598 
3599     Register objReg = as_Register($obj$$reg);
3600     Register boxReg = as_Register($box$$reg);
3601     Register tmpReg = as_Register($tmp$$reg);
3602     MacroAssembler masm(&cbuf);
3603 
3604     if (EmitSync & 4) { 
3605        masm.cmpptr(rsp, 0) ; 
3606     } else
3607     if (EmitSync & 8) {
3608        Label DONE_LABEL;
3609        if (UseBiasedLocking) {
3610          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3611        }
3612 
3613        // Check whether the displaced header is 0
3614        //(=> recursive unlock)
3615        masm.movptr(tmpReg, Address(boxReg, 0));
3616        masm.testptr(tmpReg, tmpReg);
3617        masm.jcc(Assembler::zero, DONE_LABEL);
3618 
3619        // If not recursive lock, reset the header to displaced header
3620        if (os::is_MP()) {
3621          masm.lock();
3622        }
3623        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3624        masm.bind(DONE_LABEL);
3625        masm.nop(); // avoid branch to branch
3626     } else {
3627        Label DONE_LABEL, Stacked, CheckSucc ;
3628 
3629        if (UseBiasedLocking && !UseOptoBiasInlining) {
3630          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3631        }
3632         
3633        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3634        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3635        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3636        masm.testl (tmpReg, 0x02) ; 
3637        masm.jcc   (Assembler::zero, Stacked) ; 
3638         
3639        // It's inflated
3640        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3641        masm.xorptr(boxReg, r15_thread) ; 
3642        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3643        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3644        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3645        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3646        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3647        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3648        masm.jmp   (DONE_LABEL) ; 
3649         
3650        if ((EmitSync & 65536) == 0) { 
3651          Label LSuccess, LGoSlowPath ;
3652          masm.bind  (CheckSucc) ;
3653          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3654          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3655 
3656          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3657          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3658          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3659          // are all faster when the write buffer is populated.
3660          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3661          if (os::is_MP()) {
3662             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3663          }
3664          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3665          masm.jcc   (Assembler::notZero, LSuccess) ;
3666 
3667          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3668          if (os::is_MP()) { masm.lock(); }
3669          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3670          masm.jcc   (Assembler::notEqual, LSuccess) ;
3671          // Intentional fall-through into slow-path
3672 
3673          masm.bind  (LGoSlowPath) ;
3674          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3675          masm.jmp   (DONE_LABEL) ;
3676 
3677          masm.bind  (LSuccess) ;
3678          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3679          masm.jmp   (DONE_LABEL) ;
3680        }
3681 
3682        masm.bind  (Stacked) ; 
3683        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3684        if (os::is_MP()) { masm.lock(); } 
3685        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3686 
3687        if (EmitSync & 65536) {
3688           masm.bind (CheckSucc) ;
3689        }
3690        masm.bind(DONE_LABEL);
3691        if (EmitSync & 32768) {
3692           masm.nop();                      // avoid branch to branch
3693        }
3694     }
3695   %}
3696 
3697   enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
3698                         rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
3699     Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3700           POP_LABEL, DONE_LABEL, CONT_LABEL,
3701           WHILE_HEAD_LABEL;
3702     MacroAssembler masm(&cbuf);
3703 
3704     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3705     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
3706 
3707     // Get the first character position in both strings
3708     //         [8] char array, [12] offset, [16] count
3709     int value_offset  = java_lang_String::value_offset_in_bytes();
3710     int offset_offset = java_lang_String::offset_offset_in_bytes();
3711     int count_offset  = java_lang_String::count_offset_in_bytes();
3712     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3713 
3714     masm.load_heap_oop(rax, Address(rsi, value_offset));
3715     masm.movl(rcx, Address(rsi, offset_offset));
3716     masm.lea(rax, Address(rax, rcx, Address::times_2, base_offset));
3717     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3718     masm.movl(rcx, Address(rdi, offset_offset));
3719     masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3720 
3721     // Compute the minimum of the string lengths(rsi) and the
3722     // difference of the string lengths (stack)
3723 
3724     // do the conditional move stuff
3725     masm.movl(rdi, Address(rdi, count_offset));
3726     masm.movl(rsi, Address(rsi, count_offset));
3727     masm.movl(rcx, rdi);
3728     masm.subl(rdi, rsi);
3729     masm.push(rdi);
3730     masm.cmov(Assembler::lessEqual, rsi, rcx);
3731 
3732     // Is the minimum length zero?
3733     masm.bind(RCX_GOOD_LABEL);
3734     masm.testl(rsi, rsi);
3735     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3736 
3737     // Load first characters
3738     masm.load_unsigned_short(rcx, Address(rbx, 0));
3739     masm.load_unsigned_short(rdi, Address(rax, 0));
3740 
3741     // Compare first characters
3742     masm.subl(rcx, rdi);
3743     masm.jcc(Assembler::notZero,  POP_LABEL);
3744     masm.decrementl(rsi);
3745     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3746 
3747     {
3748       // Check after comparing first character to see if strings are equivalent
3749       Label LSkip2;
3750       // Check if the strings start at same location
3751       masm.cmpptr(rbx, rax);
3752       masm.jccb(Assembler::notEqual, LSkip2);
3753 
3754       // Check if the length difference is zero (from stack)
3755       masm.cmpl(Address(rsp, 0), 0x0);
3756       masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3757 
3758       // Strings might not be equivalent
3759       masm.bind(LSkip2);
3760     }
3761 
3762     // Advance to next character
3763     masm.addptr(rax, 2);
3764     masm.addptr(rbx, 2);
3765 
3766     if (UseSSE42Intrinsics) {
3767       // With SSE4.2, use double quad vector compare
3768       Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3769       // Setup to compare 16-byte vectors
3770       masm.movl(rdi, rsi);
3771       masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
3772       masm.andl(rdi, 0x00000007); // rdi holds the tail count
3773       masm.testl(rsi, rsi);
3774       masm.jccb(Assembler::zero, COMPARE_TAIL);
3775 
3776       masm.lea(rax, Address(rax, rsi, Address::times_2));
3777       masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3778       masm.negptr(rsi);
3779 
3780       masm.bind(COMPARE_VECTORS);
3781       masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
3782       masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
3783       masm.pxor(tmp1Reg, tmp2Reg);
3784       masm.ptest(tmp1Reg, tmp1Reg);
3785       masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
3786       masm.addptr(rsi, 8);
3787       masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3788       masm.jmpb(COMPARE_TAIL);
3789 
3790       // Mismatched characters in the vectors
3791       masm.bind(VECTOR_NOT_EQUAL);
3792       masm.lea(rax, Address(rax, rsi, Address::times_2));
3793       masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3794       masm.movl(rdi, 8);
3795 
3796       // Compare tail (< 8 chars), or rescan last vectors to
3797       // find 1st mismatched characters
3798       masm.bind(COMPARE_TAIL);
3799       masm.testl(rdi, rdi);
3800       masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3801       masm.movl(rsi, rdi);
3802       // Fallthru to tail compare
3803     }
3804 
3805     // Shift RAX and RBX to the end of the arrays, negate min
3806     masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
3807     masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
3808     masm.negptr(rsi);
3809 
3810     // Compare the rest of the characters
3811     masm.bind(WHILE_HEAD_LABEL);
3812     masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
3813     masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
3814     masm.subl(rcx, rdi);
3815     masm.jccb(Assembler::notZero, POP_LABEL);
3816     masm.increment(rsi);
3817     masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3818 
3819     // Strings are equal up to min length.  Return the length difference.
3820     masm.bind(LENGTH_DIFF_LABEL);
3821     masm.pop(rcx);
3822     masm.jmpb(DONE_LABEL);
3823 
3824     // Discard the stored length difference
3825     masm.bind(POP_LABEL);
3826     masm.addptr(rsp, 8);
3827 
3828     // That's it
3829     masm.bind(DONE_LABEL);
3830   %}
3831 
3832  enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
3833                         rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{
3834     // SSE4.2 version
3835     Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
3836           SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
3837     MacroAssembler masm(&cbuf);
3838 
3839     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3840 
3841     // Get the first character position in both strings
3842     //         [8] char array, [12] offset, [16] count
3843     int value_offset  = java_lang_String::value_offset_in_bytes();
3844     int offset_offset = java_lang_String::offset_offset_in_bytes();
3845     int count_offset  = java_lang_String::count_offset_in_bytes();
3846     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3847 
3848     // Get counts for string and substr
3849     masm.movl(rdx, Address(rsi, count_offset));
3850     masm.movl(rax, Address(rdi, count_offset));
3851     // Check for substr count > string count
3852     masm.cmpl(rax, rdx);
3853     masm.jcc(Assembler::greater, RET_NEG_ONE);
3854 
3855     // Start the indexOf operation
3856     // Get start addr of string
3857     masm.load_heap_oop(rbx, Address(rsi, value_offset));
3858     masm.movl(rcx, Address(rsi, offset_offset));
3859     masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
3860     masm.push(rsi);
3861 
3862     // Get start addr of substr
3863     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3864     masm.movl(rcx, Address(rdi, offset_offset));
3865     masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
3866     masm.push(rdi);
3867     masm.push(rax);
3868     masm.jmpb(PREP_FOR_SCAN);
3869 
3870     // Substr count saved at sp
3871     // Substr saved at sp+8
3872     // String saved at sp+16
3873 
3874     // Prep to load substr for scan
3875     masm.bind(LOAD_SUBSTR);
3876     masm.movptr(rdi, Address(rsp, 8));
3877     masm.movl(rax, Address(rsp, 0));
3878 
3879     // Load substr
3880     masm.bind(PREP_FOR_SCAN);
3881     masm.movdqu(tmp1Reg, Address(rdi, 0));
3882     masm.addq(rdx, 8);    // prime the loop
3883     masm.subptr(rsi, 16);
3884 
3885     // Scan string for substr in 16-byte vectors
3886     masm.bind(SCAN_TO_SUBSTR);
3887     masm.subq(rdx, 8);
3888     masm.addptr(rsi, 16);
3889     masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3890     masm.jcc(Assembler::above, SCAN_TO_SUBSTR);
3891     masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND);
3892 
3893     // Fallthru: found a potential substr
3894 
3895     //Make sure string is still long enough
3896     masm.subl(rdx, rcx);
3897     masm.cmpl(rdx, rax);
3898     masm.jccb(Assembler::negative, RET_NOT_FOUND);
3899     // Compute start addr of substr
3900     masm.lea(rsi, Address(rsi, rcx, Address::times_2));
3901     masm.movptr(rbx, rsi);
3902 
3903     // Compare potential substr
3904     masm.addq(rdx, 8);        // prime the loop
3905     masm.addq(rax, 8);
3906     masm.subptr(rsi, 16);
3907     masm.subptr(rdi, 16);
3908 
3909     // Scan 16-byte vectors of string and substr
3910     masm.bind(SCAN_SUBSTR);
3911     masm.subq(rax, 8);
3912     masm.subq(rdx, 8);
3913     masm.addptr(rsi, 16);
3914     masm.addptr(rdi, 16);
3915     masm.movdqu(tmp1Reg, Address(rdi, 0));
3916     masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3917     masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
3918     masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
3919 
3920     // Compute substr offset
3921     masm.movptr(rsi, Address(rsp, 16));
3922     masm.subptr(rbx, rsi);
3923     masm.shrl(rbx, 1);
3924     masm.jmpb(CLEANUP);
3925 
3926     masm.bind(RET_NEG_ONE);
3927     masm.movl(rbx, -1);
3928     masm.jmpb(DONE);
3929 
3930     masm.bind(RET_NOT_FOUND);
3931     masm.movl(rbx, -1);
3932 
3933     masm.bind(CLEANUP);
3934     masm.addptr(rsp, 24);
3935 
3936     masm.bind(DONE);
3937   %}
3938 
3939   enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
3940                               rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{
3941     Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
3942     MacroAssembler masm(&cbuf);
3943 
3944     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3945     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
3946 
3947     int value_offset  = java_lang_String::value_offset_in_bytes();
3948     int offset_offset = java_lang_String::offset_offset_in_bytes();
3949     int count_offset  = java_lang_String::count_offset_in_bytes();
3950     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3951 
3952     // does source == target string?
3953     masm.cmpptr(rdi, rsi);
3954     masm.jcc(Assembler::equal, RET_TRUE);
3955 
3956     // get and compare counts
3957     masm.movl(rcx, Address(rdi, count_offset));
3958     masm.movl(rax, Address(rsi, count_offset));
3959     masm.cmpl(rcx, rax);
3960     masm.jcc(Assembler::notEqual, RET_FALSE);
3961     masm.testl(rax, rax);
3962     masm.jcc(Assembler::zero, RET_TRUE);
3963 
3964     // get source string offset and value
3965     masm.load_heap_oop(rbx, Address(rsi, value_offset));
3966     masm.movl(rax, Address(rsi, offset_offset));
3967     masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset));
3968 
3969     // get compare string offset and value
3970     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3971     masm.movl(rax, Address(rdi, offset_offset));
3972     masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset));
3973 
3974     // Set byte count
3975     masm.shll(rcx, 1);
3976     masm.movl(rax, rcx);
3977 
3978     if (UseSSE42Intrinsics) {
3979       // With SSE4.2, use double quad vector compare
3980       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3981       // Compare 16-byte vectors
3982       masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
3983       masm.andl(rax, 0x0000000e);  // tail count (in bytes)
3984       masm.testl(rcx, rcx);
3985       masm.jccb(Assembler::zero, COMPARE_TAIL);
3986       masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3987       masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3988       masm.negptr(rcx);
3989 
3990       masm.bind(COMPARE_WIDE_VECTORS);
3991       masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
3992       masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
3993       masm.pxor(tmp1Reg, tmp2Reg);
3994       masm.ptest(tmp1Reg, tmp1Reg);
3995       masm.jccb(Assembler::notZero, RET_FALSE);
3996       masm.addptr(rcx, 16);
3997       masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3998       masm.bind(COMPARE_TAIL);
3999       masm.movl(rcx, rax);
4000       // Fallthru to tail compare
4001     }
4002 
4003     // Compare 4-byte vectors
4004     masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
4005     masm.andl(rax, 0x00000002);  // tail char (in bytes)
4006     masm.testl(rcx, rcx);
4007     masm.jccb(Assembler::zero, COMPARE_CHAR);
4008     masm.lea(rdi, Address(rdi, rcx, Address::times_1));
4009     masm.lea(rsi, Address(rsi, rcx, Address::times_1));
4010     masm.negptr(rcx);
4011 
4012     masm.bind(COMPARE_VECTORS);
4013     masm.movl(rbx, Address(rdi, rcx, Address::times_1));
4014     masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
4015     masm.jccb(Assembler::notEqual, RET_FALSE);
4016     masm.addptr(rcx, 4);
4017     masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4018 
4019     // Compare trailing char (final 2 bytes), if any
4020     masm.bind(COMPARE_CHAR);
4021     masm.testl(rax, rax);
4022     masm.jccb(Assembler::zero, RET_TRUE);
4023     masm.load_unsigned_short(rbx, Address(rdi, 0));
4024     masm.load_unsigned_short(rcx, Address(rsi, 0));
4025     masm.cmpl(rbx, rcx);
4026     masm.jccb(Assembler::notEqual, RET_FALSE);
4027 
4028     masm.bind(RET_TRUE);
4029     masm.movl(rax, 1);   // return true
4030     masm.jmpb(DONE);
4031 
4032     masm.bind(RET_FALSE);
4033     masm.xorl(rax, rax); // return false
4034 
4035     masm.bind(DONE);
4036   %}
4037 
4038   enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2,
4039                              rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
4040     Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
4041     MacroAssembler masm(&cbuf);
4042 
4043     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
4044     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
4045     Register ary1Reg      = as_Register($ary1$$reg);
4046     Register ary2Reg      = as_Register($ary2$$reg);
4047     Register tmp3Reg      = as_Register($tmp3$$reg);
4048     Register tmp4Reg      = as_Register($tmp4$$reg);
4049     Register resultReg    = as_Register($result$$reg);
4050 
4051     int length_offset  = arrayOopDesc::length_offset_in_bytes();
4052     int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
4053 
4054     // Check the input args
4055     masm.cmpq(ary1Reg, ary2Reg);
4056     masm.jcc(Assembler::equal, TRUE_LABEL);
4057     masm.testq(ary1Reg, ary1Reg);
4058     masm.jcc(Assembler::zero, FALSE_LABEL);
4059     masm.testq(ary2Reg, ary2Reg);
4060     masm.jcc(Assembler::zero, FALSE_LABEL);
4061 
4062     // Check the lengths
4063     masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
4064     masm.movl(resultReg, Address(ary2Reg, length_offset));
4065     masm.cmpl(tmp4Reg, resultReg);
4066     masm.jcc(Assembler::notEqual, FALSE_LABEL);
4067     masm.testl(resultReg, resultReg);
4068     masm.jcc(Assembler::zero, TRUE_LABEL);
4069 
4070     //load array address
4071     masm.lea(ary1Reg, Address(ary1Reg, base_offset));
4072     masm.lea(ary2Reg, Address(ary2Reg, base_offset));
4073 
4074     //set byte count
4075     masm.shll(tmp4Reg, 1);
4076     masm.movl(resultReg,tmp4Reg);
4077 
4078     if (UseSSE42Intrinsics){
4079       // With SSE4.2, use double quad vector compare
4080       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
4081       // Compare 16-byte vectors
4082       masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
4083       masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
4084       masm.testl(tmp4Reg, tmp4Reg);
4085       masm.jccb(Assembler::zero, COMPARE_TAIL);
4086       masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4087       masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4088       masm.negptr(tmp4Reg);
4089 
4090       masm.bind(COMPARE_WIDE_VECTORS);
4091       masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4092       masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4093       masm.pxor(tmp1Reg, tmp2Reg);
4094       masm.ptest(tmp1Reg, tmp1Reg);
4095 
4096       masm.jccb(Assembler::notZero, FALSE_LABEL);
4097       masm.addptr(tmp4Reg, 16);
4098       masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4099       masm.bind(COMPARE_TAIL);
4100       masm.movl(tmp4Reg, resultReg);
4101       // Fallthru to tail compare
4102     }
4103 
4104    // Compare 4-byte vectors
4105     masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
4106     masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
4107     masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char
4108     masm.jccb(Assembler::zero, COMPARE_CHAR);
4109     masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4110     masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4111     masm.negptr(tmp4Reg);
4112 
4113     masm.bind(COMPARE_VECTORS);
4114     masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4115     masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4116     masm.jccb(Assembler::notEqual, FALSE_LABEL);
4117     masm.addptr(tmp4Reg, 4);
4118     masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4119 
4120     // Compare trailing char (final 2 bytes), if any
4121     masm.bind(COMPARE_CHAR);
4122     masm.testl(resultReg, resultReg);
4123     masm.jccb(Assembler::zero, TRUE_LABEL);
4124     masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
4125     masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
4126     masm.cmpl(tmp3Reg, tmp4Reg);
4127     masm.jccb(Assembler::notEqual, FALSE_LABEL);
4128 
4129     masm.bind(TRUE_LABEL);
4130     masm.movl(resultReg, 1);   // return true
4131     masm.jmpb(DONE);
4132 
4133     masm.bind(FALSE_LABEL);
4134     masm.xorl(resultReg, resultReg); // return false
4135 
4136     // That's it
4137     masm.bind(DONE);
4138   %}
4139 
4140   enc_class enc_rethrow()
4141   %{
4142     cbuf.set_inst_mark();
4143     emit_opcode(cbuf, 0xE9); // jmp entry
4144     emit_d32_reloc(cbuf,
4145                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
4146                    runtime_call_Relocation::spec(),
4147                    RELOC_DISP32);
4148   %}
4149 
4150   enc_class absF_encoding(regF dst)
4151   %{
4152     int dstenc = $dst$$reg;
4153     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
4154 
4155     cbuf.set_inst_mark();
4156     if (dstenc >= 8) {
4157       emit_opcode(cbuf, Assembler::REX_R);
4158       dstenc -= 8;
4159     }
4160     // XXX reg_mem doesn't support RIP-relative addressing yet
4161     emit_opcode(cbuf, 0x0F);
4162     emit_opcode(cbuf, 0x54);
4163     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4164     emit_d32_reloc(cbuf, signmask_address);
4165   %}
4166 
4167   enc_class absD_encoding(regD dst)
4168   %{
4169     int dstenc = $dst$$reg;
4170     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
4171 
4172     cbuf.set_inst_mark();
4173     emit_opcode(cbuf, 0x66);
4174     if (dstenc >= 8) {
4175       emit_opcode(cbuf, Assembler::REX_R);
4176       dstenc -= 8;
4177     }
4178     // XXX reg_mem doesn't support RIP-relative addressing yet
4179     emit_opcode(cbuf, 0x0F);
4180     emit_opcode(cbuf, 0x54);
4181     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4182     emit_d32_reloc(cbuf, signmask_address);
4183   %}
4184 
4185   enc_class negF_encoding(regF dst)
4186   %{
4187     int dstenc = $dst$$reg;
4188     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
4189 
4190     cbuf.set_inst_mark();
4191     if (dstenc >= 8) {
4192       emit_opcode(cbuf, Assembler::REX_R);
4193       dstenc -= 8;
4194     }
4195     // XXX reg_mem doesn't support RIP-relative addressing yet
4196     emit_opcode(cbuf, 0x0F);
4197     emit_opcode(cbuf, 0x57);
4198     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4199     emit_d32_reloc(cbuf, signflip_address);
4200   %}
4201 
4202   enc_class negD_encoding(regD dst)
4203   %{
4204     int dstenc = $dst$$reg;
4205     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
4206 
4207     cbuf.set_inst_mark();
4208     emit_opcode(cbuf, 0x66);
4209     if (dstenc >= 8) {
4210       emit_opcode(cbuf, Assembler::REX_R);
4211       dstenc -= 8;
4212     }
4213     // XXX reg_mem doesn't support RIP-relative addressing yet
4214     emit_opcode(cbuf, 0x0F);
4215     emit_opcode(cbuf, 0x57);
4216     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4217     emit_d32_reloc(cbuf, signflip_address);
4218   %}
4219 
4220   enc_class f2i_fixup(rRegI dst, regF src)
4221   %{
4222     int dstenc = $dst$$reg;
4223     int srcenc = $src$$reg;
4224 
4225     // cmpl $dst, #0x80000000
4226     if (dstenc >= 8) {
4227       emit_opcode(cbuf, Assembler::REX_B);
4228     }
4229     emit_opcode(cbuf, 0x81);
4230     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4231     emit_d32(cbuf, 0x80000000);
4232 
4233     // jne,s done
4234     emit_opcode(cbuf, 0x75);
4235     if (srcenc < 8 && dstenc < 8) {
4236       emit_d8(cbuf, 0xF);
4237     } else if (srcenc >= 8 && dstenc >= 8) {
4238       emit_d8(cbuf, 0x11);
4239     } else {
4240       emit_d8(cbuf, 0x10);
4241     }
4242 
4243     // subq rsp, #8
4244     emit_opcode(cbuf, Assembler::REX_W);
4245     emit_opcode(cbuf, 0x83);
4246     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4247     emit_d8(cbuf, 8);
4248 
4249     // movss [rsp], $src
4250     emit_opcode(cbuf, 0xF3);
4251     if (srcenc >= 8) {
4252       emit_opcode(cbuf, Assembler::REX_R);
4253     }
4254     emit_opcode(cbuf, 0x0F);
4255     emit_opcode(cbuf, 0x11);
4256     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4257 
4258     // call f2i_fixup
4259     cbuf.set_inst_mark();
4260     emit_opcode(cbuf, 0xE8);
4261     emit_d32_reloc(cbuf,
4262                    (int)
4263                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
4264                    runtime_call_Relocation::spec(),
4265                    RELOC_DISP32);
4266 
4267     // popq $dst
4268     if (dstenc >= 8) {
4269       emit_opcode(cbuf, Assembler::REX_B);
4270     }
4271     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4272 
4273     // done:
4274   %}
4275 
4276   enc_class f2l_fixup(rRegL dst, regF src)
4277   %{
4278     int dstenc = $dst$$reg;
4279     int srcenc = $src$$reg;
4280     address const_address = (address) StubRoutines::x86::double_sign_flip();
4281 
4282     // cmpq $dst, [0x8000000000000000]
4283     cbuf.set_inst_mark();
4284     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4285     emit_opcode(cbuf, 0x39);
4286     // XXX reg_mem doesn't support RIP-relative addressing yet
4287     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4288     emit_d32_reloc(cbuf, const_address);
4289 
4290 
4291     // jne,s done
4292     emit_opcode(cbuf, 0x75);
4293     if (srcenc < 8 && dstenc < 8) {
4294       emit_d8(cbuf, 0xF);
4295     } else if (srcenc >= 8 && dstenc >= 8) {
4296       emit_d8(cbuf, 0x11);
4297     } else {
4298       emit_d8(cbuf, 0x10);
4299     }
4300 
4301     // subq rsp, #8
4302     emit_opcode(cbuf, Assembler::REX_W);
4303     emit_opcode(cbuf, 0x83);
4304     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4305     emit_d8(cbuf, 8);
4306 
4307     // movss [rsp], $src
4308     emit_opcode(cbuf, 0xF3);
4309     if (srcenc >= 8) {
4310       emit_opcode(cbuf, Assembler::REX_R);
4311     }
4312     emit_opcode(cbuf, 0x0F);
4313     emit_opcode(cbuf, 0x11);
4314     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4315 
4316     // call f2l_fixup
4317     cbuf.set_inst_mark();
4318     emit_opcode(cbuf, 0xE8);
4319     emit_d32_reloc(cbuf,
4320                    (int)
4321                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
4322                    runtime_call_Relocation::spec(),
4323                    RELOC_DISP32);
4324 
4325     // popq $dst
4326     if (dstenc >= 8) {
4327       emit_opcode(cbuf, Assembler::REX_B);
4328     }
4329     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4330 
4331     // done:
4332   %}
4333 
4334   enc_class d2i_fixup(rRegI dst, regD src)
4335   %{
4336     int dstenc = $dst$$reg;
4337     int srcenc = $src$$reg;
4338 
4339     // cmpl $dst, #0x80000000
4340     if (dstenc >= 8) {
4341       emit_opcode(cbuf, Assembler::REX_B);
4342     }
4343     emit_opcode(cbuf, 0x81);
4344     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4345     emit_d32(cbuf, 0x80000000);
4346 
4347     // jne,s done
4348     emit_opcode(cbuf, 0x75);
4349     if (srcenc < 8 && dstenc < 8) {
4350       emit_d8(cbuf, 0xF);
4351     } else if (srcenc >= 8 && dstenc >= 8) {
4352       emit_d8(cbuf, 0x11);
4353     } else {
4354       emit_d8(cbuf, 0x10);
4355     }
4356 
4357     // subq rsp, #8
4358     emit_opcode(cbuf, Assembler::REX_W);
4359     emit_opcode(cbuf, 0x83);
4360     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4361     emit_d8(cbuf, 8);
4362 
4363     // movsd [rsp], $src
4364     emit_opcode(cbuf, 0xF2);
4365     if (srcenc >= 8) {
4366       emit_opcode(cbuf, Assembler::REX_R);
4367     }
4368     emit_opcode(cbuf, 0x0F);
4369     emit_opcode(cbuf, 0x11);
4370     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4371 
4372     // call d2i_fixup
4373     cbuf.set_inst_mark();
4374     emit_opcode(cbuf, 0xE8);
4375     emit_d32_reloc(cbuf,
4376                    (int)
4377                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
4378                    runtime_call_Relocation::spec(),
4379                    RELOC_DISP32);
4380 
4381     // popq $dst
4382     if (dstenc >= 8) {
4383       emit_opcode(cbuf, Assembler::REX_B);
4384     }
4385     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4386 
4387     // done:
4388   %}
4389 
4390   enc_class d2l_fixup(rRegL dst, regD src)
4391   %{
4392     int dstenc = $dst$$reg;
4393     int srcenc = $src$$reg;
4394     address const_address = (address) StubRoutines::x86::double_sign_flip();
4395 
4396     // cmpq $dst, [0x8000000000000000]
4397     cbuf.set_inst_mark();
4398     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4399     emit_opcode(cbuf, 0x39);
4400     // XXX reg_mem doesn't support RIP-relative addressing yet
4401     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4402     emit_d32_reloc(cbuf, const_address);
4403 
4404 
4405     // jne,s done
4406     emit_opcode(cbuf, 0x75);
4407     if (srcenc < 8 && dstenc < 8) {
4408       emit_d8(cbuf, 0xF);
4409     } else if (srcenc >= 8 && dstenc >= 8) {
4410       emit_d8(cbuf, 0x11);
4411     } else {
4412       emit_d8(cbuf, 0x10);
4413     }
4414 
4415     // subq rsp, #8
4416     emit_opcode(cbuf, Assembler::REX_W);
4417     emit_opcode(cbuf, 0x83);
4418     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4419     emit_d8(cbuf, 8);
4420 
4421     // movsd [rsp], $src
4422     emit_opcode(cbuf, 0xF2);
4423     if (srcenc >= 8) {
4424       emit_opcode(cbuf, Assembler::REX_R);
4425     }
4426     emit_opcode(cbuf, 0x0F);
4427     emit_opcode(cbuf, 0x11);
4428     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4429 
4430     // call d2l_fixup
4431     cbuf.set_inst_mark();
4432     emit_opcode(cbuf, 0xE8);
4433     emit_d32_reloc(cbuf,
4434                    (int)
4435                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4436                    runtime_call_Relocation::spec(),
4437                    RELOC_DISP32);
4438 
4439     // popq $dst
4440     if (dstenc >= 8) {
4441       emit_opcode(cbuf, Assembler::REX_B);
4442     }
4443     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4444 
4445     // done:
4446   %}
4447 
4448   // Safepoint Poll.  This polls the safepoint page, and causes an
4449   // exception if it is not readable. Unfortunately, it kills
4450   // RFLAGS in the process.
4451   enc_class enc_safepoint_poll
4452   %{
4453     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4454     // XXX reg_mem doesn't support RIP-relative addressing yet
4455     cbuf.set_inst_mark();
4456     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4457     emit_opcode(cbuf, 0x85); // testl
4458     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4459     // cbuf.inst_mark() is beginning of instruction
4460     emit_d32_reloc(cbuf, os::get_polling_page());
4461 //                    relocInfo::poll_type,
4462   %}
4463 %}
4464 
4465 
4466 
4467 //----------FRAME--------------------------------------------------------------
4468 // Definition of frame structure and management information.
4469 //
4470 //  S T A C K   L A Y O U T    Allocators stack-slot number
4471 //                             |   (to get allocators register number
4472 //  G  Owned by    |        |  v    add OptoReg::stack0())
4473 //  r   CALLER     |        |
4474 //  o     |        +--------+      pad to even-align allocators stack-slot
4475 //  w     V        |  pad0  |        numbers; owned by CALLER
4476 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4477 //  h     ^        |   in   |  5
4478 //        |        |  args  |  4   Holes in incoming args owned by SELF
4479 //  |     |        |        |  3
4480 //  |     |        +--------+
4481 //  V     |        | old out|      Empty on Intel, window on Sparc
4482 //        |    old |preserve|      Must be even aligned.
4483 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4484 //        |        |   in   |  3   area for Intel ret address
4485 //     Owned by    |preserve|      Empty on Sparc.
4486 //       SELF      +--------+
4487 //        |        |  pad2  |  2   pad to align old SP
4488 //        |        +--------+  1
4489 //        |        | locks  |  0
4490 //        |        +--------+----> OptoReg::stack0(), even aligned
4491 //        |        |  pad1  | 11   pad to align new SP
4492 //        |        +--------+
4493 //        |        |        | 10
4494 //        |        | spills |  9   spills
4495 //        V        |        |  8   (pad0 slot for callee)
4496 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4497 //        ^        |  out   |  7
4498 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4499 //     Owned by    +--------+
4500 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4501 //        |    new |preserve|      Must be even-aligned.
4502 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4503 //        |        |        |
4504 //
4505 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4506 //         known from SELF's arguments and the Java calling convention.
4507 //         Region 6-7 is determined per call site.
4508 // Note 2: If the calling convention leaves holes in the incoming argument
4509 //         area, those holes are owned by SELF.  Holes in the outgoing area
4510 //         are owned by the CALLEE.  Holes should not be nessecary in the
4511 //         incoming area, as the Java calling convention is completely under
4512 //         the control of the AD file.  Doubles can be sorted and packed to
4513 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4514 //         varargs C calling conventions.
4515 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4516 //         even aligned with pad0 as needed.
4517 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4518 //         region 6-11 is even aligned; it may be padded out more so that
4519 //         the region from SP to FP meets the minimum stack alignment.
4520 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4521 //         alignment.  Region 11, pad1, may be dynamically extended so that
4522 //         SP meets the minimum alignment.
4523 
4524 frame
4525 %{
4526   // What direction does stack grow in (assumed to be same for C & Java)
4527   stack_direction(TOWARDS_LOW);
4528 
4529   // These three registers define part of the calling convention
4530   // between compiled code and the interpreter.
4531   inline_cache_reg(RAX);                // Inline Cache Register
4532   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4533                                         // calling interpreter
4534 
4535   // Optional: name the operand used by cisc-spilling to access
4536   // [stack_pointer + offset]
4537   cisc_spilling_operand_name(indOffset32);
4538 
4539   // Number of stack slots consumed by locking an object
4540   sync_stack_slots(2);
4541 
4542   // Compiled code's Frame Pointer
4543   frame_pointer(RSP);
4544 
4545   // Interpreter stores its frame pointer in a register which is
4546   // stored to the stack by I2CAdaptors.
4547   // I2CAdaptors convert from interpreted java to compiled java.
4548   interpreter_frame_pointer(RBP);
4549 
4550   // Stack alignment requirement
4551   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4552 
4553   // Number of stack slots between incoming argument block and the start of
4554   // a new frame.  The PROLOG must add this many slots to the stack.  The
4555   // EPILOG must remove this many slots.  amd64 needs two slots for
4556   // return address.
4557   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4558 
4559   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4560   // for calls to C.  Supports the var-args backing area for register parms.
4561   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4562 
4563   // The after-PROLOG location of the return address.  Location of
4564   // return address specifies a type (REG or STACK) and a number
4565   // representing the register number (i.e. - use a register name) or
4566   // stack slot.
4567   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4568   // Otherwise, it is above the locks and verification slot and alignment word
4569   return_addr(STACK - 2 +
4570               round_to(2 + 2 * VerifyStackAtCalls +
4571                        Compile::current()->fixed_slots(),
4572                        WordsPerLong * 2));
4573 
4574   // Body of function which returns an integer array locating
4575   // arguments either in registers or in stack slots.  Passed an array
4576   // of ideal registers called "sig" and a "length" count.  Stack-slot
4577   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4578   // arguments for a CALLEE.  Incoming stack arguments are
4579   // automatically biased by the preserve_stack_slots field above.
4580 
4581   calling_convention
4582   %{
4583     // No difference between ingoing/outgoing just pass false
4584     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4585   %}
4586 
4587   c_calling_convention
4588   %{
4589     // This is obviously always outgoing
4590     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4591   %}
4592 
4593   // Location of compiled Java return values.  Same as C for now.
4594   return_value
4595   %{
4596     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4597            "only return normal values");
4598 
4599     static const int lo[Op_RegL + 1] = {
4600       0,
4601       0,
4602       RAX_num,  // Op_RegN
4603       RAX_num,  // Op_RegI
4604       RAX_num,  // Op_RegP
4605       XMM0_num, // Op_RegF
4606       XMM0_num, // Op_RegD
4607       RAX_num   // Op_RegL
4608     };
4609     static const int hi[Op_RegL + 1] = {
4610       0,
4611       0,
4612       OptoReg::Bad, // Op_RegN
4613       OptoReg::Bad, // Op_RegI
4614       RAX_H_num,    // Op_RegP
4615       OptoReg::Bad, // Op_RegF
4616       XMM0_H_num,   // Op_RegD
4617       RAX_H_num     // Op_RegL
4618     };
4619     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4620     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4621   %}
4622 %}
4623 
4624 //----------ATTRIBUTES---------------------------------------------------------
4625 //----------Operand Attributes-------------------------------------------------
4626 op_attrib op_cost(0);        // Required cost attribute
4627 
4628 //----------Instruction Attributes---------------------------------------------
4629 ins_attrib ins_cost(100);       // Required cost attribute
4630 ins_attrib ins_size(8);         // Required size attribute (in bits)
4631 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4632 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4633                                 // a non-matching short branch variant
4634                                 // of some long branch?
4635 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4636                                 // be a power of 2) specifies the
4637                                 // alignment that some part of the
4638                                 // instruction (not necessarily the
4639                                 // start) requires.  If > 1, a
4640                                 // compute_padding() function must be
4641                                 // provided for the instruction
4642 
4643 //----------OPERANDS-----------------------------------------------------------
4644 // Operand definitions must precede instruction definitions for correct parsing
4645 // in the ADLC because operands constitute user defined types which are used in
4646 // instruction definitions.
4647 
4648 //----------Simple Operands----------------------------------------------------
4649 // Immediate Operands
4650 // Integer Immediate
4651 operand immI()
4652 %{
4653   match(ConI);
4654 
4655   op_cost(10);
4656   format %{ %}
4657   interface(CONST_INTER);
4658 %}
4659 
4660 // Constant for test vs zero
4661 operand immI0()
4662 %{
4663   predicate(n->get_int() == 0);
4664   match(ConI);
4665 
4666   op_cost(0);
4667   format %{ %}
4668   interface(CONST_INTER);
4669 %}
4670 
4671 // Constant for increment
4672 operand immI1()
4673 %{
4674   predicate(n->get_int() == 1);
4675   match(ConI);
4676 
4677   op_cost(0);
4678   format %{ %}
4679   interface(CONST_INTER);
4680 %}
4681 
4682 // Constant for decrement
4683 operand immI_M1()
4684 %{
4685   predicate(n->get_int() == -1);
4686   match(ConI);
4687 
4688   op_cost(0);
4689   format %{ %}
4690   interface(CONST_INTER);
4691 %}
4692 
4693 // Valid scale values for addressing modes
4694 operand immI2()
4695 %{
4696   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4697   match(ConI);
4698 
4699   format %{ %}
4700   interface(CONST_INTER);
4701 %}
4702 
4703 operand immI8()
4704 %{
4705   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4706   match(ConI);
4707 
4708   op_cost(5);
4709   format %{ %}
4710   interface(CONST_INTER);
4711 %}
4712 
4713 operand immI16()
4714 %{
4715   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4716   match(ConI);
4717 
4718   op_cost(10);
4719   format %{ %}
4720   interface(CONST_INTER);
4721 %}
4722 
4723 // Constant for long shifts
4724 operand immI_32()
4725 %{
4726   predicate( n->get_int() == 32 );
4727   match(ConI);
4728 
4729   op_cost(0);
4730   format %{ %}
4731   interface(CONST_INTER);
4732 %}
4733 
4734 // Constant for long shifts
4735 operand immI_64()
4736 %{
4737   predicate( n->get_int() == 64 );
4738   match(ConI);
4739 
4740   op_cost(0);
4741   format %{ %}
4742   interface(CONST_INTER);
4743 %}
4744 
4745 // Pointer Immediate
4746 operand immP()
4747 %{
4748   match(ConP);
4749 
4750   op_cost(10);
4751   format %{ %}
4752   interface(CONST_INTER);
4753 %}
4754 
4755 // NULL Pointer Immediate
4756 operand immP0()
4757 %{
4758   predicate(n->get_ptr() == 0);
4759   match(ConP);
4760 
4761   op_cost(5);
4762   format %{ %}
4763   interface(CONST_INTER);
4764 %}
4765 
4766 // Pointer Immediate
4767 operand immN() %{
4768   match(ConN);
4769 
4770   op_cost(10);
4771   format %{ %}
4772   interface(CONST_INTER);
4773 %}
4774 
4775 // NULL Pointer Immediate
4776 operand immN0() %{
4777   predicate(n->get_narrowcon() == 0);
4778   match(ConN);
4779 
4780   op_cost(5);
4781   format %{ %}
4782   interface(CONST_INTER);
4783 %}
4784 
4785 operand immP31()
4786 %{
4787   predicate(!n->as_Type()->type()->isa_oopptr()
4788             && (n->get_ptr() >> 31) == 0);
4789   match(ConP);
4790 
4791   op_cost(5);
4792   format %{ %}
4793   interface(CONST_INTER);
4794 %}
4795 
4796 
4797 // Long Immediate
4798 operand immL()
4799 %{
4800   match(ConL);
4801 
4802   op_cost(20);
4803   format %{ %}
4804   interface(CONST_INTER);
4805 %}
4806 
4807 // Long Immediate 8-bit
4808 operand immL8()
4809 %{
4810   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4811   match(ConL);
4812 
4813   op_cost(5);
4814   format %{ %}
4815   interface(CONST_INTER);
4816 %}
4817 
4818 // Long Immediate 32-bit unsigned
4819 operand immUL32()
4820 %{
4821   predicate(n->get_long() == (unsigned int) (n->get_long()));
4822   match(ConL);
4823 
4824   op_cost(10);
4825   format %{ %}
4826   interface(CONST_INTER);
4827 %}
4828 
4829 // Long Immediate 32-bit signed
4830 operand immL32()
4831 %{
4832   predicate(n->get_long() == (int) (n->get_long()));
4833   match(ConL);
4834 
4835   op_cost(15);
4836   format %{ %}
4837   interface(CONST_INTER);
4838 %}
4839 
4840 // Long Immediate zero
4841 operand immL0()
4842 %{
4843   predicate(n->get_long() == 0L);
4844   match(ConL);
4845 
4846   op_cost(10);
4847   format %{ %}
4848   interface(CONST_INTER);
4849 %}
4850 
4851 // Constant for increment
4852 operand immL1()
4853 %{
4854   predicate(n->get_long() == 1);
4855   match(ConL);
4856 
4857   format %{ %}
4858   interface(CONST_INTER);
4859 %}
4860 
4861 // Constant for decrement
4862 operand immL_M1()
4863 %{
4864   predicate(n->get_long() == -1);
4865   match(ConL);
4866 
4867   format %{ %}
4868   interface(CONST_INTER);
4869 %}
4870 
4871 // Long Immediate: the value 10
4872 operand immL10()
4873 %{
4874   predicate(n->get_long() == 10);
4875   match(ConL);
4876 
4877   format %{ %}
4878   interface(CONST_INTER);
4879 %}
4880 
4881 // Long immediate from 0 to 127.
4882 // Used for a shorter form of long mul by 10.
4883 operand immL_127()
4884 %{
4885   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4886   match(ConL);
4887 
4888   op_cost(10);
4889   format %{ %}
4890   interface(CONST_INTER);
4891 %}
4892 
4893 // Long Immediate: low 32-bit mask
4894 operand immL_32bits()
4895 %{
4896   predicate(n->get_long() == 0xFFFFFFFFL);
4897   match(ConL);
4898   op_cost(20);
4899 
4900   format %{ %}
4901   interface(CONST_INTER);
4902 %}
4903 
4904 // Float Immediate zero
4905 operand immF0()
4906 %{
4907   predicate(jint_cast(n->getf()) == 0);
4908   match(ConF);
4909 
4910   op_cost(5);
4911   format %{ %}
4912   interface(CONST_INTER);
4913 %}
4914 
4915 // Float Immediate
4916 operand immF()
4917 %{
4918   match(ConF);
4919 
4920   op_cost(15);
4921   format %{ %}
4922   interface(CONST_INTER);
4923 %}
4924 
4925 // Double Immediate zero
4926 operand immD0()
4927 %{
4928   predicate(jlong_cast(n->getd()) == 0);
4929   match(ConD);
4930 
4931   op_cost(5);
4932   format %{ %}
4933   interface(CONST_INTER);
4934 %}
4935 
4936 // Double Immediate
4937 operand immD()
4938 %{
4939   match(ConD);
4940 
4941   op_cost(15);
4942   format %{ %}
4943   interface(CONST_INTER);
4944 %}
4945 
4946 // Immediates for special shifts (sign extend)
4947 
4948 // Constants for increment
4949 operand immI_16()
4950 %{
4951   predicate(n->get_int() == 16);
4952   match(ConI);
4953 
4954   format %{ %}
4955   interface(CONST_INTER);
4956 %}
4957 
4958 operand immI_24()
4959 %{
4960   predicate(n->get_int() == 24);
4961   match(ConI);
4962 
4963   format %{ %}
4964   interface(CONST_INTER);
4965 %}
4966 
4967 // Constant for byte-wide masking
4968 operand immI_255()
4969 %{
4970   predicate(n->get_int() == 255);
4971   match(ConI);
4972 
4973   format %{ %}
4974   interface(CONST_INTER);
4975 %}
4976 
4977 // Constant for short-wide masking
4978 operand immI_65535()
4979 %{
4980   predicate(n->get_int() == 65535);
4981   match(ConI);
4982 
4983   format %{ %}
4984   interface(CONST_INTER);
4985 %}
4986 
4987 // Constant for byte-wide masking
4988 operand immL_255()
4989 %{
4990   predicate(n->get_long() == 255);
4991   match(ConL);
4992 
4993   format %{ %}
4994   interface(CONST_INTER);
4995 %}
4996 
4997 // Constant for short-wide masking
4998 operand immL_65535()
4999 %{
5000   predicate(n->get_long() == 65535);
5001   match(ConL);
5002 
5003   format %{ %}
5004   interface(CONST_INTER);
5005 %}
5006 
5007 // Register Operands
5008 // Integer Register
5009 operand rRegI()
5010 %{
5011   constraint(ALLOC_IN_RC(int_reg));
5012   match(RegI);
5013 
5014   match(rax_RegI);
5015   match(rbx_RegI);
5016   match(rcx_RegI);
5017   match(rdx_RegI);
5018   match(rdi_RegI);
5019 
5020   format %{ %}
5021   interface(REG_INTER);
5022 %}
5023 
5024 // Special Registers
5025 operand rax_RegI()
5026 %{
5027   constraint(ALLOC_IN_RC(int_rax_reg));
5028   match(RegI);
5029   match(rRegI);
5030 
5031   format %{ "RAX" %}
5032   interface(REG_INTER);
5033 %}
5034 
5035 // Special Registers
5036 operand rbx_RegI()
5037 %{
5038   constraint(ALLOC_IN_RC(int_rbx_reg));
5039   match(RegI);
5040   match(rRegI);
5041 
5042   format %{ "RBX" %}
5043   interface(REG_INTER);
5044 %}
5045 
5046 operand rcx_RegI()
5047 %{
5048   constraint(ALLOC_IN_RC(int_rcx_reg));
5049   match(RegI);
5050   match(rRegI);
5051 
5052   format %{ "RCX" %}
5053   interface(REG_INTER);
5054 %}
5055 
5056 operand rdx_RegI()
5057 %{
5058   constraint(ALLOC_IN_RC(int_rdx_reg));
5059   match(RegI);
5060   match(rRegI);
5061 
5062   format %{ "RDX" %}
5063   interface(REG_INTER);
5064 %}
5065 
5066 operand rdi_RegI()
5067 %{
5068   constraint(ALLOC_IN_RC(int_rdi_reg));
5069   match(RegI);
5070   match(rRegI);
5071 
5072   format %{ "RDI" %}
5073   interface(REG_INTER);
5074 %}
5075 
5076 operand no_rcx_RegI()
5077 %{
5078   constraint(ALLOC_IN_RC(int_no_rcx_reg));
5079   match(RegI);
5080   match(rax_RegI);
5081   match(rbx_RegI);
5082   match(rdx_RegI);
5083   match(rdi_RegI);
5084 
5085   format %{ %}
5086   interface(REG_INTER);
5087 %}
5088 
5089 operand no_rax_rdx_RegI()
5090 %{
5091   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5092   match(RegI);
5093   match(rbx_RegI);
5094   match(rcx_RegI);
5095   match(rdi_RegI);
5096 
5097   format %{ %}
5098   interface(REG_INTER);
5099 %}
5100 
5101 // Pointer Register
5102 operand any_RegP()
5103 %{
5104   constraint(ALLOC_IN_RC(any_reg));
5105   match(RegP);
5106   match(rax_RegP);
5107   match(rbx_RegP);
5108   match(rdi_RegP);
5109   match(rsi_RegP);
5110   match(rbp_RegP);
5111   match(r15_RegP);
5112   match(rRegP);
5113 
5114   format %{ %}
5115   interface(REG_INTER);
5116 %}
5117 
5118 operand rRegP()
5119 %{
5120   constraint(ALLOC_IN_RC(ptr_reg));
5121   match(RegP);
5122   match(rax_RegP);
5123   match(rbx_RegP);
5124   match(rdi_RegP);
5125   match(rsi_RegP);
5126   match(rbp_RegP);
5127   match(r15_RegP);  // See Q&A below about r15_RegP.
5128 
5129   format %{ %}
5130   interface(REG_INTER);
5131 %}
5132 
5133 operand rRegN() %{
5134   constraint(ALLOC_IN_RC(int_reg));
5135   match(RegN);
5136 
5137   format %{ %}
5138   interface(REG_INTER);
5139 %}
5140 
5141 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5142 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5143 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
5144 // The output of an instruction is controlled by the allocator, which respects
5145 // register class masks, not match rules.  Unless an instruction mentions
5146 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5147 // by the allocator as an input.
5148 
5149 operand no_rax_RegP()
5150 %{
5151   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
5152   match(RegP);
5153   match(rbx_RegP);
5154   match(rsi_RegP);
5155   match(rdi_RegP);
5156 
5157   format %{ %}
5158   interface(REG_INTER);
5159 %}
5160 
5161 operand no_rbp_RegP()
5162 %{
5163   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
5164   match(RegP);
5165   match(rbx_RegP);
5166   match(rsi_RegP);
5167   match(rdi_RegP);
5168 
5169   format %{ %}
5170   interface(REG_INTER);
5171 %}
5172 
5173 operand no_rax_rbx_RegP()
5174 %{
5175   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
5176   match(RegP);
5177   match(rsi_RegP);
5178   match(rdi_RegP);
5179 
5180   format %{ %}
5181   interface(REG_INTER);
5182 %}
5183 
5184 // Special Registers
5185 // Return a pointer value
5186 operand rax_RegP()
5187 %{
5188   constraint(ALLOC_IN_RC(ptr_rax_reg));
5189   match(RegP);
5190   match(rRegP);
5191 
5192   format %{ %}
5193   interface(REG_INTER);
5194 %}
5195 
5196 // Special Registers
5197 // Return a compressed pointer value
5198 operand rax_RegN()
5199 %{
5200   constraint(ALLOC_IN_RC(int_rax_reg));
5201   match(RegN);
5202   match(rRegN);
5203 
5204   format %{ %}
5205   interface(REG_INTER);
5206 %}
5207 
5208 // Used in AtomicAdd
5209 operand rbx_RegP()
5210 %{
5211   constraint(ALLOC_IN_RC(ptr_rbx_reg));
5212   match(RegP);
5213   match(rRegP);
5214 
5215   format %{ %}
5216   interface(REG_INTER);
5217 %}
5218 
5219 operand rsi_RegP()
5220 %{
5221   constraint(ALLOC_IN_RC(ptr_rsi_reg));
5222   match(RegP);
5223   match(rRegP);
5224 
5225   format %{ %}
5226   interface(REG_INTER);
5227 %}
5228 
5229 // Used in rep stosq
5230 operand rdi_RegP()
5231 %{
5232   constraint(ALLOC_IN_RC(ptr_rdi_reg));
5233   match(RegP);
5234   match(rRegP);
5235 
5236   format %{ %}
5237   interface(REG_INTER);
5238 %}
5239 
5240 operand rbp_RegP()
5241 %{
5242   constraint(ALLOC_IN_RC(ptr_rbp_reg));
5243   match(RegP);
5244   match(rRegP);
5245 
5246   format %{ %}
5247   interface(REG_INTER);
5248 %}
5249 
5250 operand r15_RegP()
5251 %{
5252   constraint(ALLOC_IN_RC(ptr_r15_reg));
5253   match(RegP);
5254   match(rRegP);
5255 
5256   format %{ %}
5257   interface(REG_INTER);
5258 %}
5259 
5260 operand rRegL()
5261 %{
5262   constraint(ALLOC_IN_RC(long_reg));
5263   match(RegL);
5264   match(rax_RegL);
5265   match(rdx_RegL);
5266 
5267   format %{ %}
5268   interface(REG_INTER);
5269 %}
5270 
5271 // Special Registers
5272 operand no_rax_rdx_RegL()
5273 %{
5274   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5275   match(RegL);
5276   match(rRegL);
5277 
5278   format %{ %}
5279   interface(REG_INTER);
5280 %}
5281 
5282 operand no_rax_RegL()
5283 %{
5284   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5285   match(RegL);
5286   match(rRegL);
5287   match(rdx_RegL);
5288 
5289   format %{ %}
5290   interface(REG_INTER);
5291 %}
5292 
5293 operand no_rcx_RegL()
5294 %{
5295   constraint(ALLOC_IN_RC(long_no_rcx_reg));
5296   match(RegL);
5297   match(rRegL);
5298 
5299   format %{ %}
5300   interface(REG_INTER);
5301 %}
5302 
5303 operand rax_RegL()
5304 %{
5305   constraint(ALLOC_IN_RC(long_rax_reg));
5306   match(RegL);
5307   match(rRegL);
5308 
5309   format %{ "RAX" %}
5310   interface(REG_INTER);
5311 %}
5312 
5313 operand rcx_RegL()
5314 %{
5315   constraint(ALLOC_IN_RC(long_rcx_reg));
5316   match(RegL);
5317   match(rRegL);
5318 
5319   format %{ %}
5320   interface(REG_INTER);
5321 %}
5322 
5323 operand rdx_RegL()
5324 %{
5325   constraint(ALLOC_IN_RC(long_rdx_reg));
5326   match(RegL);
5327   match(rRegL);
5328 
5329   format %{ %}
5330   interface(REG_INTER);
5331 %}
5332 
5333 // Flags register, used as output of compare instructions
5334 operand rFlagsReg()
5335 %{
5336   constraint(ALLOC_IN_RC(int_flags));
5337   match(RegFlags);
5338 
5339   format %{ "RFLAGS" %}
5340   interface(REG_INTER);
5341 %}
5342 
5343 // Flags register, used as output of FLOATING POINT compare instructions
5344 operand rFlagsRegU()
5345 %{
5346   constraint(ALLOC_IN_RC(int_flags));
5347   match(RegFlags);
5348 
5349   format %{ "RFLAGS_U" %}
5350   interface(REG_INTER);
5351 %}
5352 
5353 operand rFlagsRegUCF() %{
5354   constraint(ALLOC_IN_RC(int_flags));
5355   match(RegFlags);
5356   predicate(false);
5357 
5358   format %{ "RFLAGS_U_CF" %}
5359   interface(REG_INTER);
5360 %}
5361 
5362 // Float register operands
5363 operand regF()
5364 %{
5365   constraint(ALLOC_IN_RC(float_reg));
5366   match(RegF);
5367 
5368   format %{ %}
5369   interface(REG_INTER);
5370 %}
5371 
5372 // Double register operands
5373 operand regD() 
5374 %{
5375   constraint(ALLOC_IN_RC(double_reg));
5376   match(RegD);
5377 
5378   format %{ %}
5379   interface(REG_INTER);
5380 %}
5381 
5382 
5383 //----------Memory Operands----------------------------------------------------
5384 // Direct Memory Operand
5385 // operand direct(immP addr)
5386 // %{
5387 //   match(addr);
5388 
5389 //   format %{ "[$addr]" %}
5390 //   interface(MEMORY_INTER) %{
5391 //     base(0xFFFFFFFF);
5392 //     index(0x4);
5393 //     scale(0x0);
5394 //     disp($addr);
5395 //   %}
5396 // %}
5397 
5398 // Indirect Memory Operand
5399 operand indirect(any_RegP reg)
5400 %{
5401   constraint(ALLOC_IN_RC(ptr_reg));
5402   match(reg);
5403 
5404   format %{ "[$reg]" %}
5405   interface(MEMORY_INTER) %{
5406     base($reg);
5407     index(0x4);
5408     scale(0x0);
5409     disp(0x0);
5410   %}
5411 %}
5412 
5413 // Indirect Memory Plus Short Offset Operand
5414 operand indOffset8(any_RegP reg, immL8 off)
5415 %{
5416   constraint(ALLOC_IN_RC(ptr_reg));
5417   match(AddP reg off);
5418 
5419   format %{ "[$reg + $off (8-bit)]" %}
5420   interface(MEMORY_INTER) %{
5421     base($reg);
5422     index(0x4);
5423     scale(0x0);
5424     disp($off);
5425   %}
5426 %}
5427 
5428 // Indirect Memory Plus Long Offset Operand
5429 operand indOffset32(any_RegP reg, immL32 off)
5430 %{
5431   constraint(ALLOC_IN_RC(ptr_reg));
5432   match(AddP reg off);
5433 
5434   format %{ "[$reg + $off (32-bit)]" %}
5435   interface(MEMORY_INTER) %{
5436     base($reg);
5437     index(0x4);
5438     scale(0x0);
5439     disp($off);
5440   %}
5441 %}
5442 
5443 // Indirect Memory Plus Index Register Plus Offset Operand
5444 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5445 %{
5446   constraint(ALLOC_IN_RC(ptr_reg));
5447   match(AddP (AddP reg lreg) off);
5448 
5449   op_cost(10);
5450   format %{"[$reg + $off + $lreg]" %}
5451   interface(MEMORY_INTER) %{
5452     base($reg);
5453     index($lreg);
5454     scale(0x0);
5455     disp($off);
5456   %}
5457 %}
5458 
5459 // Indirect Memory Plus Index Register Plus Offset Operand
5460 operand indIndex(any_RegP reg, rRegL lreg)
5461 %{
5462   constraint(ALLOC_IN_RC(ptr_reg));
5463   match(AddP reg lreg);
5464 
5465   op_cost(10);
5466   format %{"[$reg + $lreg]" %}
5467   interface(MEMORY_INTER) %{
5468     base($reg);
5469     index($lreg);
5470     scale(0x0);
5471     disp(0x0);
5472   %}
5473 %}
5474 
5475 // Indirect Memory Times Scale Plus Index Register
5476 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5477 %{
5478   constraint(ALLOC_IN_RC(ptr_reg));
5479   match(AddP reg (LShiftL lreg scale));
5480 
5481   op_cost(10);
5482   format %{"[$reg + $lreg << $scale]" %}
5483   interface(MEMORY_INTER) %{
5484     base($reg);
5485     index($lreg);
5486     scale($scale);
5487     disp(0x0);
5488   %}
5489 %}
5490 
5491 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5492 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5493 %{
5494   constraint(ALLOC_IN_RC(ptr_reg));
5495   match(AddP (AddP reg (LShiftL lreg scale)) off);
5496 
5497   op_cost(10);
5498   format %{"[$reg + $off + $lreg << $scale]" %}
5499   interface(MEMORY_INTER) %{
5500     base($reg);
5501     index($lreg);
5502     scale($scale);
5503     disp($off);
5504   %}
5505 %}
5506 
5507 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5508 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5509 %{
5510   constraint(ALLOC_IN_RC(ptr_reg));
5511   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5512   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5513 
5514   op_cost(10);
5515   format %{"[$reg + $off + $idx << $scale]" %}
5516   interface(MEMORY_INTER) %{
5517     base($reg);
5518     index($idx);
5519     scale($scale);
5520     disp($off);
5521   %}
5522 %}
5523 
5524 // Indirect Narrow Oop Plus Offset Operand
5525 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5526 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5527 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5528   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5529   constraint(ALLOC_IN_RC(ptr_reg));
5530   match(AddP (DecodeN reg) off);
5531 
5532   op_cost(10);
5533   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5534   interface(MEMORY_INTER) %{
5535     base(0xc); // R12
5536     index($reg);
5537     scale(0x3);
5538     disp($off);
5539   %}
5540 %}
5541 
5542 // Indirect Memory Operand
5543 operand indirectNarrow(rRegN reg)
5544 %{
5545   predicate(Universe::narrow_oop_shift() == 0);
5546   constraint(ALLOC_IN_RC(ptr_reg));
5547   match(DecodeN reg);
5548 
5549   format %{ "[$reg]" %}
5550   interface(MEMORY_INTER) %{
5551     base($reg);
5552     index(0x4);
5553     scale(0x0);
5554     disp(0x0);
5555   %}
5556 %}
5557 
5558 // Indirect Memory Plus Short Offset Operand
5559 operand indOffset8Narrow(rRegN reg, immL8 off)
5560 %{
5561   predicate(Universe::narrow_oop_shift() == 0);
5562   constraint(ALLOC_IN_RC(ptr_reg));
5563   match(AddP (DecodeN reg) off);
5564 
5565   format %{ "[$reg + $off (8-bit)]" %}
5566   interface(MEMORY_INTER) %{
5567     base($reg);
5568     index(0x4);
5569     scale(0x0);
5570     disp($off);
5571   %}
5572 %}
5573 
5574 // Indirect Memory Plus Long Offset Operand
5575 operand indOffset32Narrow(rRegN reg, immL32 off)
5576 %{
5577   predicate(Universe::narrow_oop_shift() == 0);
5578   constraint(ALLOC_IN_RC(ptr_reg));
5579   match(AddP (DecodeN reg) off);
5580 
5581   format %{ "[$reg + $off (32-bit)]" %}
5582   interface(MEMORY_INTER) %{
5583     base($reg);
5584     index(0x4);
5585     scale(0x0);
5586     disp($off);
5587   %}
5588 %}
5589 
5590 // Indirect Memory Plus Index Register Plus Offset Operand
5591 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5592 %{
5593   predicate(Universe::narrow_oop_shift() == 0);
5594   constraint(ALLOC_IN_RC(ptr_reg));
5595   match(AddP (AddP (DecodeN reg) lreg) off);
5596 
5597   op_cost(10);
5598   format %{"[$reg + $off + $lreg]" %}
5599   interface(MEMORY_INTER) %{
5600     base($reg);
5601     index($lreg);
5602     scale(0x0);
5603     disp($off);
5604   %}
5605 %}
5606 
5607 // Indirect Memory Plus Index Register Plus Offset Operand
5608 operand indIndexNarrow(rRegN reg, rRegL lreg)
5609 %{
5610   predicate(Universe::narrow_oop_shift() == 0);
5611   constraint(ALLOC_IN_RC(ptr_reg));
5612   match(AddP (DecodeN reg) lreg);
5613 
5614   op_cost(10);
5615   format %{"[$reg + $lreg]" %}
5616   interface(MEMORY_INTER) %{
5617     base($reg);
5618     index($lreg);
5619     scale(0x0);
5620     disp(0x0);
5621   %}
5622 %}
5623 
5624 // Indirect Memory Times Scale Plus Index Register
5625 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5626 %{
5627   predicate(Universe::narrow_oop_shift() == 0);
5628   constraint(ALLOC_IN_RC(ptr_reg));
5629   match(AddP (DecodeN reg) (LShiftL lreg scale));
5630 
5631   op_cost(10);
5632   format %{"[$reg + $lreg << $scale]" %}
5633   interface(MEMORY_INTER) %{
5634     base($reg);
5635     index($lreg);
5636     scale($scale);
5637     disp(0x0);
5638   %}
5639 %}
5640 
5641 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5642 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5643 %{
5644   predicate(Universe::narrow_oop_shift() == 0);
5645   constraint(ALLOC_IN_RC(ptr_reg));
5646   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5647 
5648   op_cost(10);
5649   format %{"[$reg + $off + $lreg << $scale]" %}
5650   interface(MEMORY_INTER) %{
5651     base($reg);
5652     index($lreg);
5653     scale($scale);
5654     disp($off);
5655   %}
5656 %}
5657 
5658 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5659 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5660 %{
5661   constraint(ALLOC_IN_RC(ptr_reg));
5662   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5663   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5664 
5665   op_cost(10);
5666   format %{"[$reg + $off + $idx << $scale]" %}
5667   interface(MEMORY_INTER) %{
5668     base($reg);
5669     index($idx);
5670     scale($scale);
5671     disp($off);
5672   %}
5673 %}
5674 
5675 
5676 //----------Special Memory Operands--------------------------------------------
5677 // Stack Slot Operand - This operand is used for loading and storing temporary
5678 //                      values on the stack where a match requires a value to
5679 //                      flow through memory.
5680 operand stackSlotP(sRegP reg)
5681 %{
5682   constraint(ALLOC_IN_RC(stack_slots));
5683   // No match rule because this operand is only generated in matching
5684 
5685   format %{ "[$reg]" %}
5686   interface(MEMORY_INTER) %{
5687     base(0x4);   // RSP
5688     index(0x4);  // No Index
5689     scale(0x0);  // No Scale
5690     disp($reg);  // Stack Offset
5691   %}
5692 %}
5693 
5694 operand stackSlotI(sRegI reg)
5695 %{
5696   constraint(ALLOC_IN_RC(stack_slots));
5697   // No match rule because this operand is only generated in matching
5698 
5699   format %{ "[$reg]" %}
5700   interface(MEMORY_INTER) %{
5701     base(0x4);   // RSP
5702     index(0x4);  // No Index
5703     scale(0x0);  // No Scale
5704     disp($reg);  // Stack Offset
5705   %}
5706 %}
5707 
5708 operand stackSlotF(sRegF reg)
5709 %{
5710   constraint(ALLOC_IN_RC(stack_slots));
5711   // No match rule because this operand is only generated in matching
5712 
5713   format %{ "[$reg]" %}
5714   interface(MEMORY_INTER) %{
5715     base(0x4);   // RSP
5716     index(0x4);  // No Index
5717     scale(0x0);  // No Scale
5718     disp($reg);  // Stack Offset
5719   %}
5720 %}
5721 
5722 operand stackSlotD(sRegD reg)
5723 %{
5724   constraint(ALLOC_IN_RC(stack_slots));
5725   // No match rule because this operand is only generated in matching
5726 
5727   format %{ "[$reg]" %}
5728   interface(MEMORY_INTER) %{
5729     base(0x4);   // RSP
5730     index(0x4);  // No Index
5731     scale(0x0);  // No Scale
5732     disp($reg);  // Stack Offset
5733   %}
5734 %}
5735 operand stackSlotL(sRegL reg)
5736 %{
5737   constraint(ALLOC_IN_RC(stack_slots));
5738   // No match rule because this operand is only generated in matching
5739 
5740   format %{ "[$reg]" %}
5741   interface(MEMORY_INTER) %{
5742     base(0x4);   // RSP
5743     index(0x4);  // No Index
5744     scale(0x0);  // No Scale
5745     disp($reg);  // Stack Offset
5746   %}
5747 %}
5748 
5749 //----------Conditional Branch Operands----------------------------------------
5750 // Comparison Op  - This is the operation of the comparison, and is limited to
5751 //                  the following set of codes:
5752 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5753 //
5754 // Other attributes of the comparison, such as unsignedness, are specified
5755 // by the comparison instruction that sets a condition code flags register.
5756 // That result is represented by a flags operand whose subtype is appropriate
5757 // to the unsignedness (etc.) of the comparison.
5758 //
5759 // Later, the instruction which matches both the Comparison Op (a Bool) and
5760 // the flags (produced by the Cmp) specifies the coding of the comparison op
5761 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5762 
5763 // Comparision Code
5764 operand cmpOp()
5765 %{
5766   match(Bool);
5767 
5768   format %{ "" %}
5769   interface(COND_INTER) %{
5770     equal(0x4, "e");
5771     not_equal(0x5, "ne");
5772     less(0xC, "l");
5773     greater_equal(0xD, "ge");
5774     less_equal(0xE, "le");
5775     greater(0xF, "g");
5776   %}
5777 %}
5778 
5779 // Comparison Code, unsigned compare.  Used by FP also, with
5780 // C2 (unordered) turned into GT or LT already.  The other bits
5781 // C0 and C3 are turned into Carry & Zero flags.
5782 operand cmpOpU()
5783 %{
5784   match(Bool);
5785 
5786   format %{ "" %}
5787   interface(COND_INTER) %{
5788     equal(0x4, "e");
5789     not_equal(0x5, "ne");
5790     less(0x2, "b");
5791     greater_equal(0x3, "nb");
5792     less_equal(0x6, "be");
5793     greater(0x7, "nbe");
5794   %}
5795 %}
5796 
5797 
5798 // Floating comparisons that don't require any fixup for the unordered case
5799 operand cmpOpUCF() %{
5800   match(Bool);
5801   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5802             n->as_Bool()->_test._test == BoolTest::ge ||
5803             n->as_Bool()->_test._test == BoolTest::le ||
5804             n->as_Bool()->_test._test == BoolTest::gt);
5805   format %{ "" %}
5806   interface(COND_INTER) %{
5807     equal(0x4, "e");
5808     not_equal(0x5, "ne");
5809     less(0x2, "b");
5810     greater_equal(0x3, "nb");
5811     less_equal(0x6, "be");
5812     greater(0x7, "nbe");
5813   %}
5814 %}
5815 
5816 
5817 // Floating comparisons that can be fixed up with extra conditional jumps
5818 operand cmpOpUCF2() %{
5819   match(Bool);
5820   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5821             n->as_Bool()->_test._test == BoolTest::eq);
5822   format %{ "" %}
5823   interface(COND_INTER) %{
5824     equal(0x4, "e");
5825     not_equal(0x5, "ne");
5826     less(0x2, "b");
5827     greater_equal(0x3, "nb");
5828     less_equal(0x6, "be");
5829     greater(0x7, "nbe");
5830   %}
5831 %}
5832 
5833 
5834 //----------OPERAND CLASSES----------------------------------------------------
5835 // Operand Classes are groups of operands that are used as to simplify
5836 // instruction definitions by not requiring the AD writer to specify separate
5837 // instructions for every form of operand when the instruction accepts
5838 // multiple operand types with the same basic encoding and format.  The classic
5839 // case of this is memory operands.
5840 
5841 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5842                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5843                indCompressedOopOffset,
5844                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5845                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5846                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5847 
5848 //----------PIPELINE-----------------------------------------------------------
5849 // Rules which define the behavior of the target architectures pipeline.
5850 pipeline %{
5851 
5852 //----------ATTRIBUTES---------------------------------------------------------
5853 attributes %{
5854   variable_size_instructions;        // Fixed size instructions
5855   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5856   instruction_unit_size = 1;         // An instruction is 1 bytes long
5857   instruction_fetch_unit_size = 16;  // The processor fetches one line
5858   instruction_fetch_units = 1;       // of 16 bytes
5859 
5860   // List of nop instructions
5861   nops( MachNop );
5862 %}
5863 
5864 //----------RESOURCES----------------------------------------------------------
5865 // Resources are the functional units available to the machine
5866 
5867 // Generic P2/P3 pipeline
5868 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5869 // 3 instructions decoded per cycle.
5870 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5871 // 3 ALU op, only ALU0 handles mul instructions.
5872 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5873            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5874            BR, FPU,
5875            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5876 
5877 //----------PIPELINE DESCRIPTION-----------------------------------------------
5878 // Pipeline Description specifies the stages in the machine's pipeline
5879 
5880 // Generic P2/P3 pipeline
5881 pipe_desc(S0, S1, S2, S3, S4, S5);
5882 
5883 //----------PIPELINE CLASSES---------------------------------------------------
5884 // Pipeline Classes describe the stages in which input and output are
5885 // referenced by the hardware pipeline.
5886 
5887 // Naming convention: ialu or fpu
5888 // Then: _reg
5889 // Then: _reg if there is a 2nd register
5890 // Then: _long if it's a pair of instructions implementing a long
5891 // Then: _fat if it requires the big decoder
5892 //   Or: _mem if it requires the big decoder and a memory unit.
5893 
5894 // Integer ALU reg operation
5895 pipe_class ialu_reg(rRegI dst)
5896 %{
5897     single_instruction;
5898     dst    : S4(write);
5899     dst    : S3(read);
5900     DECODE : S0;        // any decoder
5901     ALU    : S3;        // any alu
5902 %}
5903 
5904 // Long ALU reg operation
5905 pipe_class ialu_reg_long(rRegL dst)
5906 %{
5907     instruction_count(2);
5908     dst    : S4(write);
5909     dst    : S3(read);
5910     DECODE : S0(2);     // any 2 decoders
5911     ALU    : S3(2);     // both alus
5912 %}
5913 
5914 // Integer ALU reg operation using big decoder
5915 pipe_class ialu_reg_fat(rRegI dst)
5916 %{
5917     single_instruction;
5918     dst    : S4(write);
5919     dst    : S3(read);
5920     D0     : S0;        // big decoder only
5921     ALU    : S3;        // any alu
5922 %}
5923 
5924 // Long ALU reg operation using big decoder
5925 pipe_class ialu_reg_long_fat(rRegL dst)
5926 %{
5927     instruction_count(2);
5928     dst    : S4(write);
5929     dst    : S3(read);
5930     D0     : S0(2);     // big decoder only; twice
5931     ALU    : S3(2);     // any 2 alus
5932 %}
5933 
5934 // Integer ALU reg-reg operation
5935 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5936 %{
5937     single_instruction;
5938     dst    : S4(write);
5939     src    : S3(read);
5940     DECODE : S0;        // any decoder
5941     ALU    : S3;        // any alu
5942 %}
5943 
5944 // Long ALU reg-reg operation
5945 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5946 %{
5947     instruction_count(2);
5948     dst    : S4(write);
5949     src    : S3(read);
5950     DECODE : S0(2);     // any 2 decoders
5951     ALU    : S3(2);     // both alus
5952 %}
5953 
5954 // Integer ALU reg-reg operation
5955 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5956 %{
5957     single_instruction;
5958     dst    : S4(write);
5959     src    : S3(read);
5960     D0     : S0;        // big decoder only
5961     ALU    : S3;        // any alu
5962 %}
5963 
5964 // Long ALU reg-reg operation
5965 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5966 %{
5967     instruction_count(2);
5968     dst    : S4(write);
5969     src    : S3(read);
5970     D0     : S0(2);     // big decoder only; twice
5971     ALU    : S3(2);     // both alus
5972 %}
5973 
5974 // Integer ALU reg-mem operation
5975 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5976 %{
5977     single_instruction;
5978     dst    : S5(write);
5979     mem    : S3(read);
5980     D0     : S0;        // big decoder only
5981     ALU    : S4;        // any alu
5982     MEM    : S3;        // any mem
5983 %}
5984 
5985 // Integer mem operation (prefetch)
5986 pipe_class ialu_mem(memory mem)
5987 %{
5988     single_instruction;
5989     mem    : S3(read);
5990     D0     : S0;        // big decoder only
5991     MEM    : S3;        // any mem
5992 %}
5993 
5994 // Integer Store to Memory
5995 pipe_class ialu_mem_reg(memory mem, rRegI src)
5996 %{
5997     single_instruction;
5998     mem    : S3(read);
5999     src    : S5(read);
6000     D0     : S0;        // big decoder only
6001     ALU    : S4;        // any alu
6002     MEM    : S3;
6003 %}
6004 
6005 // // Long Store to Memory
6006 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6007 // %{
6008 //     instruction_count(2);
6009 //     mem    : S3(read);
6010 //     src    : S5(read);
6011 //     D0     : S0(2);          // big decoder only; twice
6012 //     ALU    : S4(2);     // any 2 alus
6013 //     MEM    : S3(2);  // Both mems
6014 // %}
6015 
6016 // Integer Store to Memory
6017 pipe_class ialu_mem_imm(memory mem)
6018 %{
6019     single_instruction;
6020     mem    : S3(read);
6021     D0     : S0;        // big decoder only
6022     ALU    : S4;        // any alu
6023     MEM    : S3;
6024 %}
6025 
6026 // Integer ALU0 reg-reg operation
6027 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6028 %{
6029     single_instruction;
6030     dst    : S4(write);
6031     src    : S3(read);
6032     D0     : S0;        // Big decoder only
6033     ALU0   : S3;        // only alu0
6034 %}
6035 
6036 // Integer ALU0 reg-mem operation
6037 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6038 %{
6039     single_instruction;
6040     dst    : S5(write);
6041     mem    : S3(read);
6042     D0     : S0;        // big decoder only
6043     ALU0   : S4;        // ALU0 only
6044     MEM    : S3;        // any mem
6045 %}
6046 
6047 // Integer ALU reg-reg operation
6048 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6049 %{
6050     single_instruction;
6051     cr     : S4(write);
6052     src1   : S3(read);
6053     src2   : S3(read);
6054     DECODE : S0;        // any decoder
6055     ALU    : S3;        // any alu
6056 %}
6057 
6058 // Integer ALU reg-imm operation
6059 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6060 %{
6061     single_instruction;
6062     cr     : S4(write);
6063     src1   : S3(read);
6064     DECODE : S0;        // any decoder
6065     ALU    : S3;        // any alu
6066 %}
6067 
6068 // Integer ALU reg-mem operation
6069 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6070 %{
6071     single_instruction;
6072     cr     : S4(write);
6073     src1   : S3(read);
6074     src2   : S3(read);
6075     D0     : S0;        // big decoder only
6076     ALU    : S4;        // any alu
6077     MEM    : S3;
6078 %}
6079 
6080 // Conditional move reg-reg
6081 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6082 %{
6083     instruction_count(4);
6084     y      : S4(read);
6085     q      : S3(read);
6086     p      : S3(read);
6087     DECODE : S0(4);     // any decoder
6088 %}
6089 
6090 // Conditional move reg-reg
6091 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6092 %{
6093     single_instruction;
6094     dst    : S4(write);
6095     src    : S3(read);
6096     cr     : S3(read);
6097     DECODE : S0;        // any decoder
6098 %}
6099 
6100 // Conditional move reg-mem
6101 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6102 %{
6103     single_instruction;
6104     dst    : S4(write);
6105     src    : S3(read);
6106     cr     : S3(read);
6107     DECODE : S0;        // any decoder
6108     MEM    : S3;
6109 %}
6110 
6111 // Conditional move reg-reg long
6112 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6113 %{
6114     single_instruction;
6115     dst    : S4(write);
6116     src    : S3(read);
6117     cr     : S3(read);
6118     DECODE : S0(2);     // any 2 decoders
6119 %}
6120 
6121 // XXX
6122 // // Conditional move double reg-reg
6123 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
6124 // %{
6125 //     single_instruction;
6126 //     dst    : S4(write);
6127 //     src    : S3(read);
6128 //     cr     : S3(read);
6129 //     DECODE : S0;     // any decoder
6130 // %}
6131 
6132 // Float reg-reg operation
6133 pipe_class fpu_reg(regD dst)
6134 %{
6135     instruction_count(2);
6136     dst    : S3(read);
6137     DECODE : S0(2);     // any 2 decoders
6138     FPU    : S3;
6139 %}
6140 
6141 // Float reg-reg operation
6142 pipe_class fpu_reg_reg(regD dst, regD src)
6143 %{
6144     instruction_count(2);
6145     dst    : S4(write);
6146     src    : S3(read);
6147     DECODE : S0(2);     // any 2 decoders
6148     FPU    : S3;
6149 %}
6150 
6151 // Float reg-reg operation
6152 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6153 %{
6154     instruction_count(3);
6155     dst    : S4(write);
6156     src1   : S3(read);
6157     src2   : S3(read);
6158     DECODE : S0(3);     // any 3 decoders
6159     FPU    : S3(2);
6160 %}
6161 
6162 // Float reg-reg operation
6163 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6164 %{
6165     instruction_count(4);
6166     dst    : S4(write);
6167     src1   : S3(read);
6168     src2   : S3(read);
6169     src3   : S3(read);
6170     DECODE : S0(4);     // any 3 decoders
6171     FPU    : S3(2);
6172 %}
6173 
6174 // Float reg-reg operation
6175 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6176 %{
6177     instruction_count(4);
6178     dst    : S4(write);
6179     src1   : S3(read);
6180     src2   : S3(read);
6181     src3   : S3(read);
6182     DECODE : S1(3);     // any 3 decoders
6183     D0     : S0;        // Big decoder only
6184     FPU    : S3(2);
6185     MEM    : S3;
6186 %}
6187 
6188 // Float reg-mem operation
6189 pipe_class fpu_reg_mem(regD dst, memory mem)
6190 %{
6191     instruction_count(2);
6192     dst    : S5(write);
6193     mem    : S3(read);
6194     D0     : S0;        // big decoder only
6195     DECODE : S1;        // any decoder for FPU POP
6196     FPU    : S4;
6197     MEM    : S3;        // any mem
6198 %}
6199 
6200 // Float reg-mem operation
6201 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6202 %{
6203     instruction_count(3);
6204     dst    : S5(write);
6205     src1   : S3(read);
6206     mem    : S3(read);
6207     D0     : S0;        // big decoder only
6208     DECODE : S1(2);     // any decoder for FPU POP
6209     FPU    : S4;
6210     MEM    : S3;        // any mem
6211 %}
6212 
6213 // Float mem-reg operation
6214 pipe_class fpu_mem_reg(memory mem, regD src)
6215 %{
6216     instruction_count(2);
6217     src    : S5(read);
6218     mem    : S3(read);
6219     DECODE : S0;        // any decoder for FPU PUSH
6220     D0     : S1;        // big decoder only
6221     FPU    : S4;
6222     MEM    : S3;        // any mem
6223 %}
6224 
6225 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6226 %{
6227     instruction_count(3);
6228     src1   : S3(read);
6229     src2   : S3(read);
6230     mem    : S3(read);
6231     DECODE : S0(2);     // any decoder for FPU PUSH
6232     D0     : S1;        // big decoder only
6233     FPU    : S4;
6234     MEM    : S3;        // any mem
6235 %}
6236 
6237 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6238 %{
6239     instruction_count(3);
6240     src1   : S3(read);
6241     src2   : S3(read);
6242     mem    : S4(read);
6243     DECODE : S0;        // any decoder for FPU PUSH
6244     D0     : S0(2);     // big decoder only
6245     FPU    : S4;
6246     MEM    : S3(2);     // any mem
6247 %}
6248 
6249 pipe_class fpu_mem_mem(memory dst, memory src1)
6250 %{
6251     instruction_count(2);
6252     src1   : S3(read);
6253     dst    : S4(read);
6254     D0     : S0(2);     // big decoder only
6255     MEM    : S3(2);     // any mem
6256 %}
6257 
6258 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6259 %{
6260     instruction_count(3);
6261     src1   : S3(read);
6262     src2   : S3(read);
6263     dst    : S4(read);
6264     D0     : S0(3);     // big decoder only
6265     FPU    : S4;
6266     MEM    : S3(3);     // any mem
6267 %}
6268 
6269 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6270 %{
6271     instruction_count(3);
6272     src1   : S4(read);
6273     mem    : S4(read);
6274     DECODE : S0;        // any decoder for FPU PUSH
6275     D0     : S0(2);     // big decoder only
6276     FPU    : S4;
6277     MEM    : S3(2);     // any mem
6278 %}
6279 
6280 // Float load constant
6281 pipe_class fpu_reg_con(regD dst)
6282 %{
6283     instruction_count(2);
6284     dst    : S5(write);
6285     D0     : S0;        // big decoder only for the load
6286     DECODE : S1;        // any decoder for FPU POP
6287     FPU    : S4;
6288     MEM    : S3;        // any mem
6289 %}
6290 
6291 // Float load constant
6292 pipe_class fpu_reg_reg_con(regD dst, regD src)
6293 %{
6294     instruction_count(3);
6295     dst    : S5(write);
6296     src    : S3(read);
6297     D0     : S0;        // big decoder only for the load
6298     DECODE : S1(2);     // any decoder for FPU POP
6299     FPU    : S4;
6300     MEM    : S3;        // any mem
6301 %}
6302 
6303 // UnConditional branch
6304 pipe_class pipe_jmp(label labl)
6305 %{
6306     single_instruction;
6307     BR   : S3;
6308 %}
6309 
6310 // Conditional branch
6311 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6312 %{
6313     single_instruction;
6314     cr    : S1(read);
6315     BR    : S3;
6316 %}
6317 
6318 // Allocation idiom
6319 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6320 %{
6321     instruction_count(1); force_serialization;
6322     fixed_latency(6);
6323     heap_ptr : S3(read);
6324     DECODE   : S0(3);
6325     D0       : S2;
6326     MEM      : S3;
6327     ALU      : S3(2);
6328     dst      : S5(write);
6329     BR       : S5;
6330 %}
6331 
6332 // Generic big/slow expanded idiom
6333 pipe_class pipe_slow()
6334 %{
6335     instruction_count(10); multiple_bundles; force_serialization;
6336     fixed_latency(100);
6337     D0  : S0(2);
6338     MEM : S3(2);
6339 %}
6340 
6341 // The real do-nothing guy
6342 pipe_class empty()
6343 %{
6344     instruction_count(0);
6345 %}
6346 
6347 // Define the class for the Nop node
6348 define
6349 %{
6350    MachNop = empty;
6351 %}
6352 
6353 %}
6354 
6355 //----------INSTRUCTIONS-------------------------------------------------------
6356 //
6357 // match      -- States which machine-independent subtree may be replaced
6358 //               by this instruction.
6359 // ins_cost   -- The estimated cost of this instruction is used by instruction
6360 //               selection to identify a minimum cost tree of machine
6361 //               instructions that matches a tree of machine-independent
6362 //               instructions.
6363 // format     -- A string providing the disassembly for this instruction.
6364 //               The value of an instruction's operand may be inserted
6365 //               by referring to it with a '$' prefix.
6366 // opcode     -- Three instruction opcodes may be provided.  These are referred
6367 //               to within an encode class as $primary, $secondary, and $tertiary
6368 //               rrspectively.  The primary opcode is commonly used to
6369 //               indicate the type of machine instruction, while secondary
6370 //               and tertiary are often used for prefix options or addressing
6371 //               modes.
6372 // ins_encode -- A list of encode classes with parameters. The encode class
6373 //               name must have been defined in an 'enc_class' specification
6374 //               in the encode section of the architecture description.
6375 
6376 
6377 //----------Load/Store/Move Instructions---------------------------------------
6378 //----------Load Instructions--------------------------------------------------
6379 
6380 // Load Byte (8 bit signed)
6381 instruct loadB(rRegI dst, memory mem)
6382 %{
6383   match(Set dst (LoadB mem));
6384 
6385   ins_cost(125);
6386   format %{ "movsbl  $dst, $mem\t# byte" %}
6387 
6388   ins_encode %{
6389     __ movsbl($dst$$Register, $mem$$Address);
6390   %}
6391 
6392   ins_pipe(ialu_reg_mem);
6393 %}
6394 
6395 // Load Byte (8 bit signed) into Long Register
6396 instruct loadB2L(rRegL dst, memory mem)
6397 %{
6398   match(Set dst (ConvI2L (LoadB mem)));
6399 
6400   ins_cost(125);
6401   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6402 
6403   ins_encode %{
6404     __ movsbq($dst$$Register, $mem$$Address);
6405   %}
6406 
6407   ins_pipe(ialu_reg_mem);
6408 %}
6409 
6410 // Load Unsigned Byte (8 bit UNsigned)
6411 instruct loadUB(rRegI dst, memory mem)
6412 %{
6413   match(Set dst (LoadUB mem));
6414 
6415   ins_cost(125);
6416   format %{ "movzbl  $dst, $mem\t# ubyte" %}
6417 
6418   ins_encode %{
6419     __ movzbl($dst$$Register, $mem$$Address);
6420   %}
6421 
6422   ins_pipe(ialu_reg_mem);
6423 %}
6424 
6425 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6426 instruct loadUB2L(rRegL dst, memory mem)
6427 %{
6428   match(Set dst (ConvI2L (LoadUB mem)));
6429 
6430   ins_cost(125);
6431   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6432 
6433   ins_encode %{
6434     __ movzbq($dst$$Register, $mem$$Address);
6435   %}
6436 
6437   ins_pipe(ialu_reg_mem);
6438 %}
6439 
6440 // Load Short (16 bit signed)
6441 instruct loadS(rRegI dst, memory mem)
6442 %{
6443   match(Set dst (LoadS mem));
6444 
6445   ins_cost(125);
6446   format %{ "movswl $dst, $mem\t# short" %}
6447 
6448   ins_encode %{
6449     __ movswl($dst$$Register, $mem$$Address);
6450   %}
6451 
6452   ins_pipe(ialu_reg_mem);
6453 %}
6454 
6455 // Load Short (16 bit signed) into Long Register
6456 instruct loadS2L(rRegL dst, memory mem)
6457 %{
6458   match(Set dst (ConvI2L (LoadS mem)));
6459 
6460   ins_cost(125);
6461   format %{ "movswq $dst, $mem\t# short -> long" %}
6462 
6463   ins_encode %{
6464     __ movswq($dst$$Register, $mem$$Address);
6465   %}
6466 
6467   ins_pipe(ialu_reg_mem);
6468 %}
6469 
6470 // Load Unsigned Short/Char (16 bit UNsigned)
6471 instruct loadUS(rRegI dst, memory mem)
6472 %{
6473   match(Set dst (LoadUS mem));
6474 
6475   ins_cost(125);
6476   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6477 
6478   ins_encode %{
6479     __ movzwl($dst$$Register, $mem$$Address);
6480   %}
6481 
6482   ins_pipe(ialu_reg_mem);
6483 %}
6484 
6485 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6486 instruct loadUS2L(rRegL dst, memory mem)
6487 %{
6488   match(Set dst (ConvI2L (LoadUS mem)));
6489 
6490   ins_cost(125);
6491   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6492 
6493   ins_encode %{
6494     __ movzwq($dst$$Register, $mem$$Address);
6495   %}
6496 
6497   ins_pipe(ialu_reg_mem);
6498 %}
6499 
6500 // Load Integer
6501 instruct loadI(rRegI dst, memory mem)
6502 %{
6503   match(Set dst (LoadI mem));
6504 
6505   ins_cost(125);
6506   format %{ "movl    $dst, $mem\t# int" %}
6507 
6508   ins_encode %{
6509     __ movl($dst$$Register, $mem$$Address);
6510   %}
6511 
6512   ins_pipe(ialu_reg_mem);
6513 %}
6514 
6515 // Load Integer into Long Register
6516 instruct loadI2L(rRegL dst, memory mem)
6517 %{
6518   match(Set dst (ConvI2L (LoadI mem)));
6519 
6520   ins_cost(125);
6521   format %{ "movslq  $dst, $mem\t# int -> long" %}
6522 
6523   ins_encode %{
6524     __ movslq($dst$$Register, $mem$$Address);
6525   %}
6526 
6527   ins_pipe(ialu_reg_mem);
6528 %}
6529 
6530 // Load Unsigned Integer into Long Register
6531 instruct loadUI2L(rRegL dst, memory mem)
6532 %{
6533   match(Set dst (LoadUI2L mem));
6534 
6535   ins_cost(125);
6536   format %{ "movl    $dst, $mem\t# uint -> long" %}
6537 
6538   ins_encode %{
6539     __ movl($dst$$Register, $mem$$Address);
6540   %}
6541 
6542   ins_pipe(ialu_reg_mem);
6543 %}
6544 
6545 // Load Long
6546 instruct loadL(rRegL dst, memory mem)
6547 %{
6548   match(Set dst (LoadL mem));
6549 
6550   ins_cost(125);
6551   format %{ "movq    $dst, $mem\t# long" %}
6552 
6553   ins_encode %{
6554     __ movq($dst$$Register, $mem$$Address);
6555   %}
6556 
6557   ins_pipe(ialu_reg_mem); // XXX
6558 %}
6559 
6560 // Load Range
6561 instruct loadRange(rRegI dst, memory mem)
6562 %{
6563   match(Set dst (LoadRange mem));
6564 
6565   ins_cost(125); // XXX
6566   format %{ "movl    $dst, $mem\t# range" %}
6567   opcode(0x8B);
6568   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6569   ins_pipe(ialu_reg_mem);
6570 %}
6571 
6572 // Load Pointer
6573 instruct loadP(rRegP dst, memory mem)
6574 %{
6575   match(Set dst (LoadP mem));
6576 
6577   ins_cost(125); // XXX
6578   format %{ "movq    $dst, $mem\t# ptr" %}
6579   opcode(0x8B);
6580   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6581   ins_pipe(ialu_reg_mem); // XXX
6582 %}
6583 
6584 // Load Compressed Pointer
6585 instruct loadN(rRegN dst, memory mem)
6586 %{
6587    match(Set dst (LoadN mem));
6588 
6589    ins_cost(125); // XXX
6590    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6591    ins_encode %{
6592      __ movl($dst$$Register, $mem$$Address);
6593    %}
6594    ins_pipe(ialu_reg_mem); // XXX
6595 %}
6596 
6597 
6598 // Load Klass Pointer
6599 instruct loadKlass(rRegP dst, memory mem)
6600 %{
6601   match(Set dst (LoadKlass mem));
6602 
6603   ins_cost(125); // XXX
6604   format %{ "movq    $dst, $mem\t# class" %}
6605   opcode(0x8B);
6606   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6607   ins_pipe(ialu_reg_mem); // XXX
6608 %}
6609 
6610 // Load narrow Klass Pointer
6611 instruct loadNKlass(rRegN dst, memory mem)
6612 %{
6613   match(Set dst (LoadNKlass mem));
6614 
6615   ins_cost(125); // XXX
6616   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6617   ins_encode %{
6618     __ movl($dst$$Register, $mem$$Address);
6619   %}
6620   ins_pipe(ialu_reg_mem); // XXX
6621 %}
6622 
6623 // Load Float
6624 instruct loadF(regF dst, memory mem)
6625 %{
6626   match(Set dst (LoadF mem));
6627 
6628   ins_cost(145); // XXX
6629   format %{ "movss   $dst, $mem\t# float" %}
6630   opcode(0xF3, 0x0F, 0x10);
6631   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6632   ins_pipe(pipe_slow); // XXX
6633 %}
6634 
6635 // Load Double
6636 instruct loadD_partial(regD dst, memory mem)
6637 %{
6638   predicate(!UseXmmLoadAndClearUpper);
6639   match(Set dst (LoadD mem));
6640 
6641   ins_cost(145); // XXX
6642   format %{ "movlpd  $dst, $mem\t# double" %}
6643   opcode(0x66, 0x0F, 0x12);
6644   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6645   ins_pipe(pipe_slow); // XXX
6646 %}
6647 
6648 instruct loadD(regD dst, memory mem)
6649 %{
6650   predicate(UseXmmLoadAndClearUpper);
6651   match(Set dst (LoadD mem));
6652 
6653   ins_cost(145); // XXX
6654   format %{ "movsd   $dst, $mem\t# double" %}
6655   opcode(0xF2, 0x0F, 0x10);
6656   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6657   ins_pipe(pipe_slow); // XXX
6658 %}
6659 
6660 // Load Aligned Packed Byte to XMM register
6661 instruct loadA8B(regD dst, memory mem) %{
6662   match(Set dst (Load8B mem));
6663   ins_cost(125);
6664   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6665   ins_encode( movq_ld(dst, mem));
6666   ins_pipe( pipe_slow );
6667 %}
6668 
6669 // Load Aligned Packed Short to XMM register
6670 instruct loadA4S(regD dst, memory mem) %{
6671   match(Set dst (Load4S mem));
6672   ins_cost(125);
6673   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6674   ins_encode( movq_ld(dst, mem));
6675   ins_pipe( pipe_slow );
6676 %}
6677 
6678 // Load Aligned Packed Char to XMM register
6679 instruct loadA4C(regD dst, memory mem) %{
6680   match(Set dst (Load4C mem));
6681   ins_cost(125);
6682   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6683   ins_encode( movq_ld(dst, mem));
6684   ins_pipe( pipe_slow );
6685 %}
6686 
6687 // Load Aligned Packed Integer to XMM register
6688 instruct load2IU(regD dst, memory mem) %{
6689   match(Set dst (Load2I mem));
6690   ins_cost(125);
6691   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6692   ins_encode( movq_ld(dst, mem));
6693   ins_pipe( pipe_slow );
6694 %}
6695 
6696 // Load Aligned Packed Single to XMM
6697 instruct loadA2F(regD dst, memory mem) %{
6698   match(Set dst (Load2F mem));
6699   ins_cost(145);
6700   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6701   ins_encode( movq_ld(dst, mem));
6702   ins_pipe( pipe_slow );
6703 %}
6704 
6705 // Load Effective Address
6706 instruct leaP8(rRegP dst, indOffset8 mem)
6707 %{
6708   match(Set dst mem);
6709 
6710   ins_cost(110); // XXX
6711   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6712   opcode(0x8D);
6713   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6714   ins_pipe(ialu_reg_reg_fat);
6715 %}
6716 
6717 instruct leaP32(rRegP dst, indOffset32 mem)
6718 %{
6719   match(Set dst mem);
6720 
6721   ins_cost(110);
6722   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6723   opcode(0x8D);
6724   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6725   ins_pipe(ialu_reg_reg_fat);
6726 %}
6727 
6728 // instruct leaPIdx(rRegP dst, indIndex mem)
6729 // %{
6730 //   match(Set dst mem);
6731 
6732 //   ins_cost(110);
6733 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6734 //   opcode(0x8D);
6735 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6736 //   ins_pipe(ialu_reg_reg_fat);
6737 // %}
6738 
6739 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6740 %{
6741   match(Set dst mem);
6742 
6743   ins_cost(110);
6744   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6745   opcode(0x8D);
6746   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6747   ins_pipe(ialu_reg_reg_fat);
6748 %}
6749 
6750 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6751 %{
6752   match(Set dst mem);
6753 
6754   ins_cost(110);
6755   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6756   opcode(0x8D);
6757   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6758   ins_pipe(ialu_reg_reg_fat);
6759 %}
6760 
6761 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6762 %{
6763   match(Set dst mem);
6764 
6765   ins_cost(110);
6766   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6767   opcode(0x8D);
6768   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6769   ins_pipe(ialu_reg_reg_fat);
6770 %}
6771 
6772 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6773 %{
6774   match(Set dst mem);
6775 
6776   ins_cost(110);
6777   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6778   opcode(0x8D);
6779   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6780   ins_pipe(ialu_reg_reg_fat);
6781 %}
6782 
6783 // Load Effective Address which uses Narrow (32-bits) oop
6784 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6785 %{
6786   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6787   match(Set dst mem);
6788 
6789   ins_cost(110);
6790   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6791   opcode(0x8D);
6792   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6793   ins_pipe(ialu_reg_reg_fat);
6794 %}
6795 
6796 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6797 %{
6798   predicate(Universe::narrow_oop_shift() == 0);
6799   match(Set dst mem);
6800 
6801   ins_cost(110); // XXX
6802   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6803   opcode(0x8D);
6804   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6805   ins_pipe(ialu_reg_reg_fat);
6806 %}
6807 
6808 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6809 %{
6810   predicate(Universe::narrow_oop_shift() == 0);
6811   match(Set dst mem);
6812 
6813   ins_cost(110);
6814   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6815   opcode(0x8D);
6816   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6817   ins_pipe(ialu_reg_reg_fat);
6818 %}
6819 
6820 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6821 %{
6822   predicate(Universe::narrow_oop_shift() == 0);
6823   match(Set dst mem);
6824 
6825   ins_cost(110);
6826   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6827   opcode(0x8D);
6828   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6829   ins_pipe(ialu_reg_reg_fat);
6830 %}
6831 
6832 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6833 %{
6834   predicate(Universe::narrow_oop_shift() == 0);
6835   match(Set dst mem);
6836 
6837   ins_cost(110);
6838   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6839   opcode(0x8D);
6840   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6841   ins_pipe(ialu_reg_reg_fat);
6842 %}
6843 
6844 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6845 %{
6846   predicate(Universe::narrow_oop_shift() == 0);
6847   match(Set dst mem);
6848 
6849   ins_cost(110);
6850   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6851   opcode(0x8D);
6852   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6853   ins_pipe(ialu_reg_reg_fat);
6854 %}
6855 
6856 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6857 %{
6858   predicate(Universe::narrow_oop_shift() == 0);
6859   match(Set dst mem);
6860 
6861   ins_cost(110);
6862   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6863   opcode(0x8D);
6864   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6865   ins_pipe(ialu_reg_reg_fat);
6866 %}
6867 
6868 instruct loadConI(rRegI dst, immI src)
6869 %{
6870   match(Set dst src);
6871 
6872   format %{ "movl    $dst, $src\t# int" %}
6873   ins_encode(load_immI(dst, src));
6874   ins_pipe(ialu_reg_fat); // XXX
6875 %}
6876 
6877 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6878 %{
6879   match(Set dst src);
6880   effect(KILL cr);
6881 
6882   ins_cost(50);
6883   format %{ "xorl    $dst, $dst\t# int" %}
6884   opcode(0x33); /* + rd */
6885   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6886   ins_pipe(ialu_reg);
6887 %}
6888 
6889 instruct loadConL(rRegL dst, immL src)
6890 %{
6891   match(Set dst src);
6892 
6893   ins_cost(150);
6894   format %{ "movq    $dst, $src\t# long" %}
6895   ins_encode(load_immL(dst, src));
6896   ins_pipe(ialu_reg);
6897 %}
6898 
6899 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6900 %{
6901   match(Set dst src);
6902   effect(KILL cr);
6903 
6904   ins_cost(50);
6905   format %{ "xorl    $dst, $dst\t# long" %}
6906   opcode(0x33); /* + rd */
6907   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6908   ins_pipe(ialu_reg); // XXX
6909 %}
6910 
6911 instruct loadConUL32(rRegL dst, immUL32 src)
6912 %{
6913   match(Set dst src);
6914 
6915   ins_cost(60);
6916   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6917   ins_encode(load_immUL32(dst, src));
6918   ins_pipe(ialu_reg);
6919 %}
6920 
6921 instruct loadConL32(rRegL dst, immL32 src)
6922 %{
6923   match(Set dst src);
6924 
6925   ins_cost(70);
6926   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6927   ins_encode(load_immL32(dst, src));
6928   ins_pipe(ialu_reg);
6929 %}
6930 
6931 instruct loadConP(rRegP dst, immP src)
6932 %{
6933   match(Set dst src);
6934 
6935   format %{ "movq    $dst, $src\t# ptr" %}
6936   ins_encode(load_immP(dst, src));
6937   ins_pipe(ialu_reg_fat); // XXX
6938 %}
6939 
6940 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6941 %{
6942   match(Set dst src);
6943   effect(KILL cr);
6944 
6945   ins_cost(50);
6946   format %{ "xorl    $dst, $dst\t# ptr" %}
6947   opcode(0x33); /* + rd */
6948   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6949   ins_pipe(ialu_reg);
6950 %}
6951 
6952 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6953 %{
6954   match(Set dst src);
6955   effect(KILL cr);
6956 
6957   ins_cost(60);
6958   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6959   ins_encode(load_immP31(dst, src));
6960   ins_pipe(ialu_reg);
6961 %}
6962 
6963 instruct loadConF(regF dst, immF src)
6964 %{
6965   match(Set dst src);
6966   ins_cost(125);
6967 
6968   format %{ "movss   $dst, [$src]" %}
6969   ins_encode(load_conF(dst, src));
6970   ins_pipe(pipe_slow);
6971 %}
6972 
6973 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6974   match(Set dst src);
6975   effect(KILL cr);
6976   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6977   ins_encode %{
6978     __ xorq($dst$$Register, $dst$$Register);
6979   %}
6980   ins_pipe(ialu_reg);
6981 %}
6982 
6983 instruct loadConN(rRegN dst, immN src) %{
6984   match(Set dst src);
6985 
6986   ins_cost(125);
6987   format %{ "movl    $dst, $src\t# compressed ptr" %}
6988   ins_encode %{
6989     address con = (address)$src$$constant;
6990     if (con == NULL) {
6991       ShouldNotReachHere();
6992     } else {
6993       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6994     }
6995   %}
6996   ins_pipe(ialu_reg_fat); // XXX
6997 %}
6998 
6999 instruct loadConF0(regF dst, immF0 src)
7000 %{
7001   match(Set dst src);
7002   ins_cost(100);
7003 
7004   format %{ "xorps   $dst, $dst\t# float 0.0" %}
7005   opcode(0x0F, 0x57);
7006   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
7007   ins_pipe(pipe_slow);
7008 %}
7009 
7010 // Use the same format since predicate() can not be used here.
7011 instruct loadConD(regD dst, immD src)
7012 %{
7013   match(Set dst src);
7014   ins_cost(125);
7015 
7016   format %{ "movsd   $dst, [$src]" %}
7017   ins_encode(load_conD(dst, src));
7018   ins_pipe(pipe_slow);
7019 %}
7020 
7021 instruct loadConD0(regD dst, immD0 src)
7022 %{
7023   match(Set dst src);
7024   ins_cost(100);
7025 
7026   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
7027   opcode(0x66, 0x0F, 0x57);
7028   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
7029   ins_pipe(pipe_slow);
7030 %}
7031 
7032 instruct loadSSI(rRegI dst, stackSlotI src)
7033 %{
7034   match(Set dst src);
7035 
7036   ins_cost(125);
7037   format %{ "movl    $dst, $src\t# int stk" %}
7038   opcode(0x8B);
7039   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7040   ins_pipe(ialu_reg_mem);
7041 %}
7042 
7043 instruct loadSSL(rRegL dst, stackSlotL src)
7044 %{
7045   match(Set dst src);
7046 
7047   ins_cost(125);
7048   format %{ "movq    $dst, $src\t# long stk" %}
7049   opcode(0x8B);
7050   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7051   ins_pipe(ialu_reg_mem);
7052 %}
7053 
7054 instruct loadSSP(rRegP dst, stackSlotP src)
7055 %{
7056   match(Set dst src);
7057 
7058   ins_cost(125);
7059   format %{ "movq    $dst, $src\t# ptr stk" %}
7060   opcode(0x8B);
7061   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7062   ins_pipe(ialu_reg_mem);
7063 %}
7064 
7065 instruct loadSSF(regF dst, stackSlotF src)
7066 %{
7067   match(Set dst src);
7068 
7069   ins_cost(125);
7070   format %{ "movss   $dst, $src\t# float stk" %}
7071   opcode(0xF3, 0x0F, 0x10);
7072   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
7073   ins_pipe(pipe_slow); // XXX
7074 %}
7075 
7076 // Use the same format since predicate() can not be used here.
7077 instruct loadSSD(regD dst, stackSlotD src)
7078 %{
7079   match(Set dst src);
7080 
7081   ins_cost(125);
7082   format %{ "movsd   $dst, $src\t# double stk" %}
7083   ins_encode  %{
7084     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7085   %}
7086   ins_pipe(pipe_slow); // XXX
7087 %}
7088 
7089 // Prefetch instructions.
7090 // Must be safe to execute with invalid address (cannot fault).
7091 
7092 instruct prefetchr( memory mem ) %{
7093   predicate(ReadPrefetchInstr==3);
7094   match(PrefetchRead mem);
7095   ins_cost(125);
7096 
7097   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
7098   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
7099   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7100   ins_pipe(ialu_mem);
7101 %}
7102 
7103 instruct prefetchrNTA( memory mem ) %{
7104   predicate(ReadPrefetchInstr==0);
7105   match(PrefetchRead mem);
7106   ins_cost(125);
7107 
7108   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
7109   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7110   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7111   ins_pipe(ialu_mem);
7112 %}
7113 
7114 instruct prefetchrT0( memory mem ) %{
7115   predicate(ReadPrefetchInstr==1);
7116   match(PrefetchRead mem);
7117   ins_cost(125);
7118 
7119   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
7120   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7121   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7122   ins_pipe(ialu_mem);
7123 %}
7124 
7125 instruct prefetchrT2( memory mem ) %{
7126   predicate(ReadPrefetchInstr==2);
7127   match(PrefetchRead mem);
7128   ins_cost(125);
7129 
7130   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
7131   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7132   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
7133   ins_pipe(ialu_mem);
7134 %}
7135 
7136 instruct prefetchw( memory mem ) %{
7137   predicate(AllocatePrefetchInstr==3);
7138   match(PrefetchWrite mem);
7139   ins_cost(125);
7140 
7141   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
7142   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7143   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7144   ins_pipe(ialu_mem);
7145 %}
7146 
7147 instruct prefetchwNTA( memory mem ) %{
7148   predicate(AllocatePrefetchInstr==0);
7149   match(PrefetchWrite mem);
7150   ins_cost(125);
7151 
7152   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
7153   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7154   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7155   ins_pipe(ialu_mem);
7156 %}
7157 
7158 instruct prefetchwT0( memory mem ) %{
7159   predicate(AllocatePrefetchInstr==1);
7160   match(PrefetchWrite mem);
7161   ins_cost(125);
7162 
7163   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
7164   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7165   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7166   ins_pipe(ialu_mem);
7167 %}
7168 
7169 instruct prefetchwT2( memory mem ) %{
7170   predicate(AllocatePrefetchInstr==2);
7171   match(PrefetchWrite mem);
7172   ins_cost(125);
7173 
7174   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
7175   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7176   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
7177   ins_pipe(ialu_mem);
7178 %}
7179 
7180 //----------Store Instructions-------------------------------------------------
7181 
7182 // Store Byte
7183 instruct storeB(memory mem, rRegI src)
7184 %{
7185   match(Set mem (StoreB mem src));
7186 
7187   ins_cost(125); // XXX
7188   format %{ "movb    $mem, $src\t# byte" %}
7189   opcode(0x88);
7190   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
7191   ins_pipe(ialu_mem_reg);
7192 %}
7193 
7194 // Store Char/Short
7195 instruct storeC(memory mem, rRegI src)
7196 %{
7197   match(Set mem (StoreC mem src));
7198 
7199   ins_cost(125); // XXX
7200   format %{ "movw    $mem, $src\t# char/short" %}
7201   opcode(0x89);
7202   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
7203   ins_pipe(ialu_mem_reg);
7204 %}
7205 
7206 // Store Integer
7207 instruct storeI(memory mem, rRegI src)
7208 %{
7209   match(Set mem (StoreI mem src));
7210 
7211   ins_cost(125); // XXX
7212   format %{ "movl    $mem, $src\t# int" %}
7213   opcode(0x89);
7214   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
7215   ins_pipe(ialu_mem_reg);
7216 %}
7217 
7218 // Store Long
7219 instruct storeL(memory mem, rRegL src)
7220 %{
7221   match(Set mem (StoreL mem src));
7222 
7223   ins_cost(125); // XXX
7224   format %{ "movq    $mem, $src\t# long" %}
7225   opcode(0x89);
7226   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
7227   ins_pipe(ialu_mem_reg); // XXX
7228 %}
7229 
7230 // Store Pointer
7231 instruct storeP(memory mem, any_RegP src)
7232 %{
7233   match(Set mem (StoreP mem src));
7234 
7235   ins_cost(125); // XXX
7236   format %{ "movq    $mem, $src\t# ptr" %}
7237   opcode(0x89);
7238   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
7239   ins_pipe(ialu_mem_reg);
7240 %}
7241 
7242 instruct storeImmP0(memory mem, immP0 zero)
7243 %{
7244   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7245   match(Set mem (StoreP mem zero));
7246 
7247   ins_cost(125); // XXX
7248   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
7249   ins_encode %{
7250     __ movq($mem$$Address, r12);
7251   %}
7252   ins_pipe(ialu_mem_reg);
7253 %}
7254 
7255 // Store NULL Pointer, mark word, or other simple pointer constant.
7256 instruct storeImmP(memory mem, immP31 src)
7257 %{
7258   match(Set mem (StoreP mem src));
7259 
7260   ins_cost(150); // XXX
7261   format %{ "movq    $mem, $src\t# ptr" %}
7262   opcode(0xC7); /* C7 /0 */
7263   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7264   ins_pipe(ialu_mem_imm);
7265 %}
7266 
7267 // Store Compressed Pointer
7268 instruct storeN(memory mem, rRegN src)
7269 %{
7270   match(Set mem (StoreN mem src));
7271 
7272   ins_cost(125); // XXX
7273   format %{ "movl    $mem, $src\t# compressed ptr" %}
7274   ins_encode %{
7275     __ movl($mem$$Address, $src$$Register);
7276   %}
7277   ins_pipe(ialu_mem_reg);
7278 %}
7279 
7280 instruct storeImmN0(memory mem, immN0 zero)
7281 %{
7282   predicate(Universe::narrow_oop_base() == NULL);
7283   match(Set mem (StoreN mem zero));
7284 
7285   ins_cost(125); // XXX
7286   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7287   ins_encode %{
7288     __ movl($mem$$Address, r12);
7289   %}
7290   ins_pipe(ialu_mem_reg);
7291 %}
7292 
7293 instruct storeImmN(memory mem, immN src)
7294 %{
7295   match(Set mem (StoreN mem src));
7296 
7297   ins_cost(150); // XXX
7298   format %{ "movl    $mem, $src\t# compressed ptr" %}
7299   ins_encode %{
7300     address con = (address)$src$$constant;
7301     if (con == NULL) {
7302       __ movl($mem$$Address, (int32_t)0);
7303     } else {
7304       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7305     }
7306   %}
7307   ins_pipe(ialu_mem_imm);
7308 %}
7309 
7310 // Store Integer Immediate
7311 instruct storeImmI0(memory mem, immI0 zero)
7312 %{
7313   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7314   match(Set mem (StoreI mem zero));
7315 
7316   ins_cost(125); // XXX
7317   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7318   ins_encode %{
7319     __ movl($mem$$Address, r12);
7320   %}
7321   ins_pipe(ialu_mem_reg);
7322 %}
7323 
7324 instruct storeImmI(memory mem, immI src)
7325 %{
7326   match(Set mem (StoreI mem src));
7327 
7328   ins_cost(150);
7329   format %{ "movl    $mem, $src\t# int" %}
7330   opcode(0xC7); /* C7 /0 */
7331   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7332   ins_pipe(ialu_mem_imm);
7333 %}
7334 
7335 // Store Long Immediate
7336 instruct storeImmL0(memory mem, immL0 zero)
7337 %{
7338   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7339   match(Set mem (StoreL mem zero));
7340 
7341   ins_cost(125); // XXX
7342   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7343   ins_encode %{
7344     __ movq($mem$$Address, r12);
7345   %}
7346   ins_pipe(ialu_mem_reg);
7347 %}
7348 
7349 instruct storeImmL(memory mem, immL32 src)
7350 %{
7351   match(Set mem (StoreL mem src));
7352 
7353   ins_cost(150);
7354   format %{ "movq    $mem, $src\t# long" %}
7355   opcode(0xC7); /* C7 /0 */
7356   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7357   ins_pipe(ialu_mem_imm);
7358 %}
7359 
7360 // Store Short/Char Immediate
7361 instruct storeImmC0(memory mem, immI0 zero)
7362 %{
7363   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7364   match(Set mem (StoreC mem zero));
7365 
7366   ins_cost(125); // XXX
7367   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7368   ins_encode %{
7369     __ movw($mem$$Address, r12);
7370   %}
7371   ins_pipe(ialu_mem_reg);
7372 %}
7373 
7374 instruct storeImmI16(memory mem, immI16 src)
7375 %{
7376   predicate(UseStoreImmI16);
7377   match(Set mem (StoreC mem src));
7378 
7379   ins_cost(150);
7380   format %{ "movw    $mem, $src\t# short/char" %}
7381   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7382   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7383   ins_pipe(ialu_mem_imm);
7384 %}
7385 
7386 // Store Byte Immediate
7387 instruct storeImmB0(memory mem, immI0 zero)
7388 %{
7389   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7390   match(Set mem (StoreB mem zero));
7391 
7392   ins_cost(125); // XXX
7393   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7394   ins_encode %{
7395     __ movb($mem$$Address, r12);
7396   %}
7397   ins_pipe(ialu_mem_reg);
7398 %}
7399 
7400 instruct storeImmB(memory mem, immI8 src)
7401 %{
7402   match(Set mem (StoreB mem src));
7403 
7404   ins_cost(150); // XXX
7405   format %{ "movb    $mem, $src\t# byte" %}
7406   opcode(0xC6); /* C6 /0 */
7407   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7408   ins_pipe(ialu_mem_imm);
7409 %}
7410 
7411 // Store Aligned Packed Byte XMM register to memory
7412 instruct storeA8B(memory mem, regD src) %{
7413   match(Set mem (Store8B mem src));
7414   ins_cost(145);
7415   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7416   ins_encode( movq_st(mem, src));
7417   ins_pipe( pipe_slow );
7418 %}
7419 
7420 // Store Aligned Packed Char/Short XMM register to memory
7421 instruct storeA4C(memory mem, regD src) %{
7422   match(Set mem (Store4C mem src));
7423   ins_cost(145);
7424   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7425   ins_encode( movq_st(mem, src));
7426   ins_pipe( pipe_slow );
7427 %}
7428 
7429 // Store Aligned Packed Integer XMM register to memory
7430 instruct storeA2I(memory mem, regD src) %{
7431   match(Set mem (Store2I mem src));
7432   ins_cost(145);
7433   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7434   ins_encode( movq_st(mem, src));
7435   ins_pipe( pipe_slow );
7436 %}
7437 
7438 // Store CMS card-mark Immediate
7439 instruct storeImmCM0_reg(memory mem, immI0 zero)
7440 %{
7441   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7442   match(Set mem (StoreCM mem zero));
7443 
7444   ins_cost(125); // XXX
7445   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7446   ins_encode %{
7447     __ movb($mem$$Address, r12);
7448   %}
7449   ins_pipe(ialu_mem_reg);
7450 %}
7451 
7452 instruct storeImmCM0(memory mem, immI0 src)
7453 %{
7454   match(Set mem (StoreCM mem src));
7455 
7456   ins_cost(150); // XXX
7457   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7458   opcode(0xC6); /* C6 /0 */
7459   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7460   ins_pipe(ialu_mem_imm);
7461 %}
7462 
7463 // Store Aligned Packed Single Float XMM register to memory
7464 instruct storeA2F(memory mem, regD src) %{
7465   match(Set mem (Store2F mem src));
7466   ins_cost(145);
7467   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7468   ins_encode( movq_st(mem, src));
7469   ins_pipe( pipe_slow );
7470 %}
7471 
7472 // Store Float
7473 instruct storeF(memory mem, regF src)
7474 %{
7475   match(Set mem (StoreF mem src));
7476 
7477   ins_cost(95); // XXX
7478   format %{ "movss   $mem, $src\t# float" %}
7479   opcode(0xF3, 0x0F, 0x11);
7480   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7481   ins_pipe(pipe_slow); // XXX
7482 %}
7483 
7484 // Store immediate Float value (it is faster than store from XMM register)
7485 instruct storeF0(memory mem, immF0 zero)
7486 %{
7487   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7488   match(Set mem (StoreF mem zero));
7489 
7490   ins_cost(25); // XXX
7491   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7492   ins_encode %{
7493     __ movl($mem$$Address, r12);
7494   %}
7495   ins_pipe(ialu_mem_reg);
7496 %}
7497 
7498 instruct storeF_imm(memory mem, immF src)
7499 %{
7500   match(Set mem (StoreF mem src));
7501 
7502   ins_cost(50);
7503   format %{ "movl    $mem, $src\t# float" %}
7504   opcode(0xC7); /* C7 /0 */
7505   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7506   ins_pipe(ialu_mem_imm);
7507 %}
7508 
7509 // Store Double
7510 instruct storeD(memory mem, regD src)
7511 %{
7512   match(Set mem (StoreD mem src));
7513 
7514   ins_cost(95); // XXX
7515   format %{ "movsd   $mem, $src\t# double" %}
7516   opcode(0xF2, 0x0F, 0x11);
7517   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7518   ins_pipe(pipe_slow); // XXX
7519 %}
7520 
7521 // Store immediate double 0.0 (it is faster than store from XMM register)
7522 instruct storeD0_imm(memory mem, immD0 src)
7523 %{
7524   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7525   match(Set mem (StoreD mem src));
7526 
7527   ins_cost(50);
7528   format %{ "movq    $mem, $src\t# double 0." %}
7529   opcode(0xC7); /* C7 /0 */
7530   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7531   ins_pipe(ialu_mem_imm);
7532 %}
7533 
7534 instruct storeD0(memory mem, immD0 zero)
7535 %{
7536   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7537   match(Set mem (StoreD mem zero));
7538 
7539   ins_cost(25); // XXX
7540   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7541   ins_encode %{
7542     __ movq($mem$$Address, r12);
7543   %}
7544   ins_pipe(ialu_mem_reg);
7545 %}
7546 
7547 instruct storeSSI(stackSlotI dst, rRegI src)
7548 %{
7549   match(Set dst src);
7550 
7551   ins_cost(100);
7552   format %{ "movl    $dst, $src\t# int stk" %}
7553   opcode(0x89);
7554   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7555   ins_pipe( ialu_mem_reg );
7556 %}
7557 
7558 instruct storeSSL(stackSlotL dst, rRegL src)
7559 %{
7560   match(Set dst src);
7561 
7562   ins_cost(100);
7563   format %{ "movq    $dst, $src\t# long stk" %}
7564   opcode(0x89);
7565   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7566   ins_pipe(ialu_mem_reg);
7567 %}
7568 
7569 instruct storeSSP(stackSlotP dst, rRegP src)
7570 %{
7571   match(Set dst src);
7572 
7573   ins_cost(100);
7574   format %{ "movq    $dst, $src\t# ptr stk" %}
7575   opcode(0x89);
7576   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7577   ins_pipe(ialu_mem_reg);
7578 %}
7579 
7580 instruct storeSSF(stackSlotF dst, regF src)
7581 %{
7582   match(Set dst src);
7583 
7584   ins_cost(95); // XXX
7585   format %{ "movss   $dst, $src\t# float stk" %}
7586   opcode(0xF3, 0x0F, 0x11);
7587   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7588   ins_pipe(pipe_slow); // XXX
7589 %}
7590 
7591 instruct storeSSD(stackSlotD dst, regD src)
7592 %{
7593   match(Set dst src);
7594 
7595   ins_cost(95); // XXX
7596   format %{ "movsd   $dst, $src\t# double stk" %}
7597   opcode(0xF2, 0x0F, 0x11);
7598   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7599   ins_pipe(pipe_slow); // XXX
7600 %}
7601 
7602 //----------BSWAP Instructions-------------------------------------------------
7603 instruct bytes_reverse_int(rRegI dst) %{
7604   match(Set dst (ReverseBytesI dst));
7605 
7606   format %{ "bswapl  $dst" %}
7607   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7608   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7609   ins_pipe( ialu_reg );
7610 %}
7611 
7612 instruct bytes_reverse_long(rRegL dst) %{
7613   match(Set dst (ReverseBytesL dst));
7614 
7615   format %{ "bswapq  $dst" %}
7616 
7617   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7618   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7619   ins_pipe( ialu_reg);
7620 %}
7621 
7622 instruct loadI_reversed(rRegI dst, memory src) %{
7623   match(Set dst (ReverseBytesI (LoadI src)));
7624 
7625   format %{ "bswap_movl $dst, $src" %}
7626   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7627   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7628   ins_pipe( ialu_reg_mem );
7629 %}
7630 
7631 instruct loadL_reversed(rRegL dst, memory src) %{
7632   match(Set dst (ReverseBytesL (LoadL src)));
7633 
7634   format %{ "bswap_movq $dst, $src" %}
7635   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7636   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7637   ins_pipe( ialu_reg_mem );
7638 %}
7639 
7640 instruct storeI_reversed(memory dst, rRegI src) %{
7641   match(Set dst (StoreI dst (ReverseBytesI  src)));
7642 
7643   format %{ "movl_bswap $dst, $src" %}
7644   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7645   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7646   ins_pipe( ialu_mem_reg );
7647 %}
7648 
7649 instruct storeL_reversed(memory dst, rRegL src) %{
7650   match(Set dst (StoreL dst (ReverseBytesL  src)));
7651 
7652   format %{ "movq_bswap $dst, $src" %}
7653   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7654   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7655   ins_pipe( ialu_mem_reg );
7656 %}
7657 
7658 
7659 //---------- Population Count Instructions -------------------------------------
7660 
7661 instruct popCountI(rRegI dst, rRegI src) %{
7662   predicate(UsePopCountInstruction);
7663   match(Set dst (PopCountI src));
7664 
7665   format %{ "popcnt  $dst, $src" %}
7666   ins_encode %{
7667     __ popcntl($dst$$Register, $src$$Register);
7668   %}
7669   ins_pipe(ialu_reg);
7670 %}
7671 
7672 instruct popCountI_mem(rRegI dst, memory mem) %{
7673   predicate(UsePopCountInstruction);
7674   match(Set dst (PopCountI (LoadI mem)));
7675 
7676   format %{ "popcnt  $dst, $mem" %}
7677   ins_encode %{
7678     __ popcntl($dst$$Register, $mem$$Address);
7679   %}
7680   ins_pipe(ialu_reg);
7681 %}
7682 
7683 // Note: Long.bitCount(long) returns an int.
7684 instruct popCountL(rRegI dst, rRegL src) %{
7685   predicate(UsePopCountInstruction);
7686   match(Set dst (PopCountL src));
7687 
7688   format %{ "popcnt  $dst, $src" %}
7689   ins_encode %{
7690     __ popcntq($dst$$Register, $src$$Register);
7691   %}
7692   ins_pipe(ialu_reg);
7693 %}
7694 
7695 // Note: Long.bitCount(long) returns an int.
7696 instruct popCountL_mem(rRegI dst, memory mem) %{
7697   predicate(UsePopCountInstruction);
7698   match(Set dst (PopCountL (LoadL mem)));
7699 
7700   format %{ "popcnt  $dst, $mem" %}
7701   ins_encode %{
7702     __ popcntq($dst$$Register, $mem$$Address);
7703   %}
7704   ins_pipe(ialu_reg);
7705 %}
7706 
7707 
7708 //----------MemBar Instructions-----------------------------------------------
7709 // Memory barrier flavors
7710 
7711 instruct membar_acquire()
7712 %{
7713   match(MemBarAcquire);
7714   ins_cost(0);
7715 
7716   size(0);
7717   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7718   ins_encode();
7719   ins_pipe(empty);
7720 %}
7721 
7722 instruct membar_acquire_lock()
7723 %{
7724   match(MemBarAcquire);
7725   predicate(Matcher::prior_fast_lock(n));
7726   ins_cost(0);
7727 
7728   size(0);
7729   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7730   ins_encode();
7731   ins_pipe(empty);
7732 %}
7733 
7734 instruct membar_release()
7735 %{
7736   match(MemBarRelease);
7737   ins_cost(0);
7738 
7739   size(0);
7740   format %{ "MEMBAR-release ! (empty encoding)" %}
7741   ins_encode();
7742   ins_pipe(empty);
7743 %}
7744 
7745 instruct membar_release_lock()
7746 %{
7747   match(MemBarRelease);
7748   predicate(Matcher::post_fast_unlock(n));
7749   ins_cost(0);
7750 
7751   size(0);
7752   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7753   ins_encode();
7754   ins_pipe(empty);
7755 %}
7756 
7757 instruct membar_volatile(rFlagsReg cr) %{
7758   match(MemBarVolatile);
7759   effect(KILL cr);
7760   ins_cost(400);
7761 
7762   format %{ 
7763     $$template
7764     if (os::is_MP()) {
7765       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7766     } else {
7767       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7768     }
7769   %}
7770   ins_encode %{
7771     __ membar(Assembler::StoreLoad);
7772   %}
7773   ins_pipe(pipe_slow);
7774 %}
7775 
7776 instruct unnecessary_membar_volatile()
7777 %{
7778   match(MemBarVolatile);
7779   predicate(Matcher::post_store_load_barrier(n));
7780   ins_cost(0);
7781 
7782   size(0);
7783   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7784   ins_encode();
7785   ins_pipe(empty);
7786 %}
7787 
7788 //----------Move Instructions--------------------------------------------------
7789 
7790 instruct castX2P(rRegP dst, rRegL src)
7791 %{
7792   match(Set dst (CastX2P src));
7793 
7794   format %{ "movq    $dst, $src\t# long->ptr" %}
7795   ins_encode(enc_copy_wide(dst, src));
7796   ins_pipe(ialu_reg_reg); // XXX
7797 %}
7798 
7799 instruct castP2X(rRegL dst, rRegP src)
7800 %{
7801   match(Set dst (CastP2X src));
7802 
7803   format %{ "movq    $dst, $src\t# ptr -> long" %}
7804   ins_encode(enc_copy_wide(dst, src));
7805   ins_pipe(ialu_reg_reg); // XXX
7806 %}
7807 
7808 
7809 // Convert oop pointer into compressed form
7810 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7811   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7812   match(Set dst (EncodeP src));
7813   effect(KILL cr);
7814   format %{ "encode_heap_oop $dst,$src" %}
7815   ins_encode %{
7816     Register s = $src$$Register;
7817     Register d = $dst$$Register;
7818     if (s != d) {
7819       __ movq(d, s);
7820     }
7821     __ encode_heap_oop(d);
7822   %}
7823   ins_pipe(ialu_reg_long);
7824 %}
7825 
7826 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7827   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7828   match(Set dst (EncodeP src));
7829   effect(KILL cr);
7830   format %{ "encode_heap_oop_not_null $dst,$src" %}
7831   ins_encode %{
7832     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7833   %}
7834   ins_pipe(ialu_reg_long);
7835 %}
7836 
7837 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7838   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7839             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7840   match(Set dst (DecodeN src));
7841   effect(KILL cr);
7842   format %{ "decode_heap_oop $dst,$src" %}
7843   ins_encode %{
7844     Register s = $src$$Register;
7845     Register d = $dst$$Register;
7846     if (s != d) {
7847       __ movq(d, s);
7848     }
7849     __ decode_heap_oop(d);
7850   %}
7851   ins_pipe(ialu_reg_long);
7852 %}
7853 
7854 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7855   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7856             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7857   match(Set dst (DecodeN src));
7858   format %{ "decode_heap_oop_not_null $dst,$src" %}
7859   ins_encode %{
7860     Register s = $src$$Register;
7861     Register d = $dst$$Register;
7862     if (s != d) {
7863       __ decode_heap_oop_not_null(d, s);
7864     } else {
7865       __ decode_heap_oop_not_null(d);
7866     }
7867   %}
7868   ins_pipe(ialu_reg_long);
7869 %}
7870 
7871 
7872 //----------Conditional Move---------------------------------------------------
7873 // Jump
7874 // dummy instruction for generating temp registers
7875 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7876   match(Jump (LShiftL switch_val shift));
7877   ins_cost(350);
7878   predicate(false);
7879   effect(TEMP dest);
7880 
7881   format %{ "leaq    $dest, table_base\n\t"
7882             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7883   ins_encode(jump_enc_offset(switch_val, shift, dest));
7884   ins_pipe(pipe_jmp);
7885   ins_pc_relative(1);
7886 %}
7887 
7888 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7889   match(Jump (AddL (LShiftL switch_val shift) offset));
7890   ins_cost(350);
7891   effect(TEMP dest);
7892 
7893   format %{ "leaq    $dest, table_base\n\t"
7894             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7895   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7896   ins_pipe(pipe_jmp);
7897   ins_pc_relative(1);
7898 %}
7899 
7900 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7901   match(Jump switch_val);
7902   ins_cost(350);
7903   effect(TEMP dest);
7904 
7905   format %{ "leaq    $dest, table_base\n\t"
7906             "jmp     [$dest + $switch_val]\n\t" %}
7907   ins_encode(jump_enc(switch_val, dest));
7908   ins_pipe(pipe_jmp);
7909   ins_pc_relative(1);
7910 %}
7911 
7912 // Conditional move
7913 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7914 %{
7915   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7916 
7917   ins_cost(200); // XXX
7918   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7919   opcode(0x0F, 0x40);
7920   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7921   ins_pipe(pipe_cmov_reg);
7922 %}
7923 
7924 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7925   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7926 
7927   ins_cost(200); // XXX
7928   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7929   opcode(0x0F, 0x40);
7930   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7931   ins_pipe(pipe_cmov_reg);
7932 %}
7933 
7934 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7935   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7936   ins_cost(200);
7937   expand %{
7938     cmovI_regU(cop, cr, dst, src);
7939   %}
7940 %}
7941 
7942 // Conditional move
7943 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7944   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7945 
7946   ins_cost(250); // XXX
7947   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7948   opcode(0x0F, 0x40);
7949   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7950   ins_pipe(pipe_cmov_mem);
7951 %}
7952 
7953 // Conditional move
7954 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7955 %{
7956   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7957 
7958   ins_cost(250); // XXX
7959   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7960   opcode(0x0F, 0x40);
7961   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7962   ins_pipe(pipe_cmov_mem);
7963 %}
7964 
7965 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7966   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7967   ins_cost(250);
7968   expand %{
7969     cmovI_memU(cop, cr, dst, src);
7970   %}
7971 %}
7972 
7973 // Conditional move
7974 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7975 %{
7976   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7977 
7978   ins_cost(200); // XXX
7979   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7980   opcode(0x0F, 0x40);
7981   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7982   ins_pipe(pipe_cmov_reg);
7983 %}
7984 
7985 // Conditional move
7986 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7987 %{
7988   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7989 
7990   ins_cost(200); // XXX
7991   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7992   opcode(0x0F, 0x40);
7993   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7994   ins_pipe(pipe_cmov_reg);
7995 %}
7996 
7997 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7998   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7999   ins_cost(200);
8000   expand %{
8001     cmovN_regU(cop, cr, dst, src);
8002   %}
8003 %}
8004 
8005 // Conditional move
8006 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
8007 %{
8008   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8009 
8010   ins_cost(200); // XXX
8011   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
8012   opcode(0x0F, 0x40);
8013   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8014   ins_pipe(pipe_cmov_reg);  // XXX
8015 %}
8016 
8017 // Conditional move
8018 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
8019 %{
8020   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8021 
8022   ins_cost(200); // XXX
8023   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
8024   opcode(0x0F, 0x40);
8025   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8026   ins_pipe(pipe_cmov_reg); // XXX
8027 %}
8028 
8029 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
8030   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8031   ins_cost(200);
8032   expand %{
8033     cmovP_regU(cop, cr, dst, src);
8034   %}
8035 %}
8036 
8037 // DISABLED: Requires the ADLC to emit a bottom_type call that
8038 // correctly meets the two pointer arguments; one is an incoming
8039 // register but the other is a memory operand.  ALSO appears to
8040 // be buggy with implicit null checks.
8041 //
8042 //// Conditional move
8043 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
8044 //%{
8045 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8046 //  ins_cost(250);
8047 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8048 //  opcode(0x0F,0x40);
8049 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
8050 //  ins_pipe( pipe_cmov_mem );
8051 //%}
8052 //
8053 //// Conditional move
8054 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
8055 //%{
8056 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8057 //  ins_cost(250);
8058 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8059 //  opcode(0x0F,0x40);
8060 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
8061 //  ins_pipe( pipe_cmov_mem );
8062 //%}
8063 
8064 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
8065 %{
8066   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8067 
8068   ins_cost(200); // XXX
8069   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
8070   opcode(0x0F, 0x40);
8071   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8072   ins_pipe(pipe_cmov_reg);  // XXX
8073 %}
8074 
8075 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
8076 %{
8077   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8078 
8079   ins_cost(200); // XXX
8080   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
8081   opcode(0x0F, 0x40);
8082   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8083   ins_pipe(pipe_cmov_mem);  // XXX
8084 %}
8085 
8086 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
8087 %{
8088   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8089 
8090   ins_cost(200); // XXX
8091   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8092   opcode(0x0F, 0x40);
8093   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8094   ins_pipe(pipe_cmov_reg); // XXX
8095 %}
8096 
8097 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
8098   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8099   ins_cost(200);
8100   expand %{
8101     cmovL_regU(cop, cr, dst, src);
8102   %}
8103 %}
8104 
8105 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
8106 %{
8107   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8108 
8109   ins_cost(200); // XXX
8110   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8111   opcode(0x0F, 0x40);
8112   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8113   ins_pipe(pipe_cmov_mem); // XXX
8114 %}
8115 
8116 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
8117   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8118   ins_cost(200);
8119   expand %{
8120     cmovL_memU(cop, cr, dst, src);
8121   %}
8122 %}
8123 
8124 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
8125 %{
8126   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8127 
8128   ins_cost(200); // XXX
8129   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8130             "movss     $dst, $src\n"
8131     "skip:" %}
8132   ins_encode(enc_cmovf_branch(cop, dst, src));
8133   ins_pipe(pipe_slow);
8134 %}
8135 
8136 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
8137 // %{
8138 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
8139 
8140 //   ins_cost(200); // XXX
8141 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8142 //             "movss     $dst, $src\n"
8143 //     "skip:" %}
8144 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
8145 //   ins_pipe(pipe_slow);
8146 // %}
8147 
8148 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
8149 %{
8150   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8151 
8152   ins_cost(200); // XXX
8153   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
8154             "movss     $dst, $src\n"
8155     "skip:" %}
8156   ins_encode(enc_cmovf_branch(cop, dst, src));
8157   ins_pipe(pipe_slow);
8158 %}
8159 
8160 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
8161   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8162   ins_cost(200);
8163   expand %{
8164     cmovF_regU(cop, cr, dst, src);
8165   %}
8166 %}
8167 
8168 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
8169 %{
8170   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8171 
8172   ins_cost(200); // XXX
8173   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8174             "movsd     $dst, $src\n"
8175     "skip:" %}
8176   ins_encode(enc_cmovd_branch(cop, dst, src));
8177   ins_pipe(pipe_slow);
8178 %}
8179 
8180 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8181 %{
8182   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8183 
8184   ins_cost(200); // XXX
8185   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8186             "movsd     $dst, $src\n"
8187     "skip:" %}
8188   ins_encode(enc_cmovd_branch(cop, dst, src));
8189   ins_pipe(pipe_slow);
8190 %}
8191 
8192 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8193   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8194   ins_cost(200);
8195   expand %{
8196     cmovD_regU(cop, cr, dst, src);
8197   %}
8198 %}
8199 
8200 //----------Arithmetic Instructions--------------------------------------------
8201 //----------Addition Instructions----------------------------------------------
8202 
8203 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8204 %{
8205   match(Set dst (AddI dst src));
8206   effect(KILL cr);
8207 
8208   format %{ "addl    $dst, $src\t# int" %}
8209   opcode(0x03);
8210   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8211   ins_pipe(ialu_reg_reg);
8212 %}
8213 
8214 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8215 %{
8216   match(Set dst (AddI dst src));
8217   effect(KILL cr);
8218 
8219   format %{ "addl    $dst, $src\t# int" %}
8220   opcode(0x81, 0x00); /* /0 id */
8221   ins_encode(OpcSErm(dst, src), Con8or32(src));
8222   ins_pipe( ialu_reg );
8223 %}
8224 
8225 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8226 %{
8227   match(Set dst (AddI dst (LoadI src)));
8228   effect(KILL cr);
8229 
8230   ins_cost(125); // XXX
8231   format %{ "addl    $dst, $src\t# int" %}
8232   opcode(0x03);
8233   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8234   ins_pipe(ialu_reg_mem);
8235 %}
8236 
8237 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8238 %{
8239   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8240   effect(KILL cr);
8241 
8242   ins_cost(150); // XXX
8243   format %{ "addl    $dst, $src\t# int" %}
8244   opcode(0x01); /* Opcode 01 /r */
8245   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8246   ins_pipe(ialu_mem_reg);
8247 %}
8248 
8249 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8250 %{
8251   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8252   effect(KILL cr);
8253 
8254   ins_cost(125); // XXX
8255   format %{ "addl    $dst, $src\t# int" %}
8256   opcode(0x81); /* Opcode 81 /0 id */
8257   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8258   ins_pipe(ialu_mem_imm);
8259 %}
8260 
8261 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8262 %{
8263   predicate(UseIncDec);
8264   match(Set dst (AddI dst src));
8265   effect(KILL cr);
8266 
8267   format %{ "incl    $dst\t# int" %}
8268   opcode(0xFF, 0x00); // FF /0
8269   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8270   ins_pipe(ialu_reg);
8271 %}
8272 
8273 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8274 %{
8275   predicate(UseIncDec);
8276   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8277   effect(KILL cr);
8278 
8279   ins_cost(125); // XXX
8280   format %{ "incl    $dst\t# int" %}
8281   opcode(0xFF); /* Opcode FF /0 */
8282   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8283   ins_pipe(ialu_mem_imm);
8284 %}
8285 
8286 // XXX why does that use AddI
8287 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8288 %{
8289   predicate(UseIncDec);
8290   match(Set dst (AddI dst src));
8291   effect(KILL cr);
8292 
8293   format %{ "decl    $dst\t# int" %}
8294   opcode(0xFF, 0x01); // FF /1
8295   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8296   ins_pipe(ialu_reg);
8297 %}
8298 
8299 // XXX why does that use AddI
8300 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8301 %{
8302   predicate(UseIncDec);
8303   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8304   effect(KILL cr);
8305 
8306   ins_cost(125); // XXX
8307   format %{ "decl    $dst\t# int" %}
8308   opcode(0xFF); /* Opcode FF /1 */
8309   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8310   ins_pipe(ialu_mem_imm);
8311 %}
8312 
8313 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8314 %{
8315   match(Set dst (AddI src0 src1));
8316 
8317   ins_cost(110);
8318   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8319   opcode(0x8D); /* 0x8D /r */
8320   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8321   ins_pipe(ialu_reg_reg);
8322 %}
8323 
8324 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8325 %{
8326   match(Set dst (AddL dst src));
8327   effect(KILL cr);
8328 
8329   format %{ "addq    $dst, $src\t# long" %}
8330   opcode(0x03);
8331   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8332   ins_pipe(ialu_reg_reg);
8333 %}
8334 
8335 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8336 %{
8337   match(Set dst (AddL dst src));
8338   effect(KILL cr);
8339 
8340   format %{ "addq    $dst, $src\t# long" %}
8341   opcode(0x81, 0x00); /* /0 id */
8342   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8343   ins_pipe( ialu_reg );
8344 %}
8345 
8346 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8347 %{
8348   match(Set dst (AddL dst (LoadL src)));
8349   effect(KILL cr);
8350 
8351   ins_cost(125); // XXX
8352   format %{ "addq    $dst, $src\t# long" %}
8353   opcode(0x03);
8354   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8355   ins_pipe(ialu_reg_mem);
8356 %}
8357 
8358 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8359 %{
8360   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8361   effect(KILL cr);
8362 
8363   ins_cost(150); // XXX
8364   format %{ "addq    $dst, $src\t# long" %}
8365   opcode(0x01); /* Opcode 01 /r */
8366   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8367   ins_pipe(ialu_mem_reg);
8368 %}
8369 
8370 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8371 %{
8372   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8373   effect(KILL cr);
8374 
8375   ins_cost(125); // XXX
8376   format %{ "addq    $dst, $src\t# long" %}
8377   opcode(0x81); /* Opcode 81 /0 id */
8378   ins_encode(REX_mem_wide(dst),
8379              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8380   ins_pipe(ialu_mem_imm);
8381 %}
8382 
8383 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8384 %{
8385   predicate(UseIncDec);
8386   match(Set dst (AddL dst src));
8387   effect(KILL cr);
8388 
8389   format %{ "incq    $dst\t# long" %}
8390   opcode(0xFF, 0x00); // FF /0
8391   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8392   ins_pipe(ialu_reg);
8393 %}
8394 
8395 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8396 %{
8397   predicate(UseIncDec);
8398   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8399   effect(KILL cr);
8400 
8401   ins_cost(125); // XXX
8402   format %{ "incq    $dst\t# long" %}
8403   opcode(0xFF); /* Opcode FF /0 */
8404   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8405   ins_pipe(ialu_mem_imm);
8406 %}
8407 
8408 // XXX why does that use AddL
8409 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8410 %{
8411   predicate(UseIncDec);
8412   match(Set dst (AddL dst src));
8413   effect(KILL cr);
8414 
8415   format %{ "decq    $dst\t# long" %}
8416   opcode(0xFF, 0x01); // FF /1
8417   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8418   ins_pipe(ialu_reg);
8419 %}
8420 
8421 // XXX why does that use AddL
8422 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8423 %{
8424   predicate(UseIncDec);
8425   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8426   effect(KILL cr);
8427 
8428   ins_cost(125); // XXX
8429   format %{ "decq    $dst\t# long" %}
8430   opcode(0xFF); /* Opcode FF /1 */
8431   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8432   ins_pipe(ialu_mem_imm);
8433 %}
8434 
8435 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8436 %{
8437   match(Set dst (AddL src0 src1));
8438 
8439   ins_cost(110);
8440   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8441   opcode(0x8D); /* 0x8D /r */
8442   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8443   ins_pipe(ialu_reg_reg);
8444 %}
8445 
8446 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8447 %{
8448   match(Set dst (AddP dst src));
8449   effect(KILL cr);
8450 
8451   format %{ "addq    $dst, $src\t# ptr" %}
8452   opcode(0x03);
8453   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8454   ins_pipe(ialu_reg_reg);
8455 %}
8456 
8457 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8458 %{
8459   match(Set dst (AddP dst src));
8460   effect(KILL cr);
8461 
8462   format %{ "addq    $dst, $src\t# ptr" %}
8463   opcode(0x81, 0x00); /* /0 id */
8464   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8465   ins_pipe( ialu_reg );
8466 %}
8467 
8468 // XXX addP mem ops ????
8469 
8470 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8471 %{
8472   match(Set dst (AddP src0 src1));
8473 
8474   ins_cost(110);
8475   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8476   opcode(0x8D); /* 0x8D /r */
8477   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8478   ins_pipe(ialu_reg_reg);
8479 %}
8480 
8481 instruct checkCastPP(rRegP dst)
8482 %{
8483   match(Set dst (CheckCastPP dst));
8484 
8485   size(0);
8486   format %{ "# checkcastPP of $dst" %}
8487   ins_encode(/* empty encoding */);
8488   ins_pipe(empty);
8489 %}
8490 
8491 instruct castPP(rRegP dst)
8492 %{
8493   match(Set dst (CastPP dst));
8494 
8495   size(0);
8496   format %{ "# castPP of $dst" %}
8497   ins_encode(/* empty encoding */);
8498   ins_pipe(empty);
8499 %}
8500 
8501 instruct castII(rRegI dst)
8502 %{
8503   match(Set dst (CastII dst));
8504 
8505   size(0);
8506   format %{ "# castII of $dst" %}
8507   ins_encode(/* empty encoding */);
8508   ins_cost(0);
8509   ins_pipe(empty);
8510 %}
8511 
8512 // LoadP-locked same as a regular LoadP when used with compare-swap
8513 instruct loadPLocked(rRegP dst, memory mem)
8514 %{
8515   match(Set dst (LoadPLocked mem));
8516 
8517   ins_cost(125); // XXX
8518   format %{ "movq    $dst, $mem\t# ptr locked" %}
8519   opcode(0x8B);
8520   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8521   ins_pipe(ialu_reg_mem); // XXX
8522 %}
8523 
8524 // LoadL-locked - same as a regular LoadL when used with compare-swap
8525 instruct loadLLocked(rRegL dst, memory mem)
8526 %{
8527   match(Set dst (LoadLLocked mem));
8528 
8529   ins_cost(125); // XXX
8530   format %{ "movq    $dst, $mem\t# long locked" %}
8531   opcode(0x8B);
8532   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8533   ins_pipe(ialu_reg_mem); // XXX
8534 %}
8535 
8536 // Conditional-store of the updated heap-top.
8537 // Used during allocation of the shared heap.
8538 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8539 
8540 instruct storePConditional(memory heap_top_ptr,
8541                            rax_RegP oldval, rRegP newval,
8542                            rFlagsReg cr)
8543 %{
8544   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8545  
8546   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8547             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8548   opcode(0x0F, 0xB1);
8549   ins_encode(lock_prefix,
8550              REX_reg_mem_wide(newval, heap_top_ptr),
8551              OpcP, OpcS,
8552              reg_mem(newval, heap_top_ptr));
8553   ins_pipe(pipe_cmpxchg);
8554 %}
8555 
8556 // Conditional-store of an int value.
8557 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8558 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8559 %{
8560   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8561   effect(KILL oldval);
8562 
8563   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8564   opcode(0x0F, 0xB1);
8565   ins_encode(lock_prefix,
8566              REX_reg_mem(newval, mem),
8567              OpcP, OpcS,
8568              reg_mem(newval, mem));
8569   ins_pipe(pipe_cmpxchg);
8570 %}
8571 
8572 // Conditional-store of a long value.
8573 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8574 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8575 %{
8576   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8577   effect(KILL oldval);
8578 
8579   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8580   opcode(0x0F, 0xB1);
8581   ins_encode(lock_prefix,
8582              REX_reg_mem_wide(newval, mem),
8583              OpcP, OpcS,
8584              reg_mem(newval, mem));
8585   ins_pipe(pipe_cmpxchg);
8586 %}
8587 
8588 
8589 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8590 instruct compareAndSwapP(rRegI res,
8591                          memory mem_ptr,
8592                          rax_RegP oldval, rRegP newval,
8593                          rFlagsReg cr)
8594 %{
8595   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8596   effect(KILL cr, KILL oldval);
8597 
8598   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8599             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8600             "sete    $res\n\t"
8601             "movzbl  $res, $res" %}
8602   opcode(0x0F, 0xB1);
8603   ins_encode(lock_prefix,
8604              REX_reg_mem_wide(newval, mem_ptr),
8605              OpcP, OpcS,
8606              reg_mem(newval, mem_ptr),
8607              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8608              REX_reg_breg(res, res), // movzbl
8609              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8610   ins_pipe( pipe_cmpxchg );
8611 %}
8612 
8613 instruct compareAndSwapL(rRegI res,
8614                          memory mem_ptr,
8615                          rax_RegL oldval, rRegL newval,
8616                          rFlagsReg cr)
8617 %{
8618   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8619   effect(KILL cr, KILL oldval);
8620 
8621   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8622             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8623             "sete    $res\n\t"
8624             "movzbl  $res, $res" %}
8625   opcode(0x0F, 0xB1);
8626   ins_encode(lock_prefix,
8627              REX_reg_mem_wide(newval, mem_ptr),
8628              OpcP, OpcS,
8629              reg_mem(newval, mem_ptr),
8630              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8631              REX_reg_breg(res, res), // movzbl
8632              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8633   ins_pipe( pipe_cmpxchg );
8634 %}
8635 
8636 instruct compareAndSwapI(rRegI res,
8637                          memory mem_ptr,
8638                          rax_RegI oldval, rRegI newval,
8639                          rFlagsReg cr)
8640 %{
8641   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8642   effect(KILL cr, KILL oldval);
8643 
8644   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8645             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8646             "sete    $res\n\t"
8647             "movzbl  $res, $res" %}
8648   opcode(0x0F, 0xB1);
8649   ins_encode(lock_prefix,
8650              REX_reg_mem(newval, mem_ptr),
8651              OpcP, OpcS,
8652              reg_mem(newval, mem_ptr),
8653              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8654              REX_reg_breg(res, res), // movzbl
8655              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8656   ins_pipe( pipe_cmpxchg );
8657 %}
8658 
8659 
8660 instruct compareAndSwapN(rRegI res,
8661                           memory mem_ptr,
8662                           rax_RegN oldval, rRegN newval,
8663                           rFlagsReg cr) %{
8664   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8665   effect(KILL cr, KILL oldval);
8666 
8667   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8668             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8669             "sete    $res\n\t"
8670             "movzbl  $res, $res" %}
8671   opcode(0x0F, 0xB1);
8672   ins_encode(lock_prefix,
8673              REX_reg_mem(newval, mem_ptr),
8674              OpcP, OpcS,
8675              reg_mem(newval, mem_ptr),
8676              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8677              REX_reg_breg(res, res), // movzbl
8678              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8679   ins_pipe( pipe_cmpxchg );
8680 %}
8681 
8682 //----------Subtraction Instructions-------------------------------------------
8683 
8684 // Integer Subtraction Instructions
8685 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8686 %{
8687   match(Set dst (SubI dst src));
8688   effect(KILL cr);
8689 
8690   format %{ "subl    $dst, $src\t# int" %}
8691   opcode(0x2B);
8692   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8693   ins_pipe(ialu_reg_reg);
8694 %}
8695 
8696 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8697 %{
8698   match(Set dst (SubI dst src));
8699   effect(KILL cr);
8700 
8701   format %{ "subl    $dst, $src\t# int" %}
8702   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8703   ins_encode(OpcSErm(dst, src), Con8or32(src));
8704   ins_pipe(ialu_reg);
8705 %}
8706 
8707 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8708 %{
8709   match(Set dst (SubI dst (LoadI src)));
8710   effect(KILL cr);
8711 
8712   ins_cost(125);
8713   format %{ "subl    $dst, $src\t# int" %}
8714   opcode(0x2B);
8715   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8716   ins_pipe(ialu_reg_mem);
8717 %}
8718 
8719 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8720 %{
8721   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8722   effect(KILL cr);
8723 
8724   ins_cost(150);
8725   format %{ "subl    $dst, $src\t# int" %}
8726   opcode(0x29); /* Opcode 29 /r */
8727   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8728   ins_pipe(ialu_mem_reg);
8729 %}
8730 
8731 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8732 %{
8733   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8734   effect(KILL cr);
8735 
8736   ins_cost(125); // XXX
8737   format %{ "subl    $dst, $src\t# int" %}
8738   opcode(0x81); /* Opcode 81 /5 id */
8739   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8740   ins_pipe(ialu_mem_imm);
8741 %}
8742 
8743 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8744 %{
8745   match(Set dst (SubL dst src));
8746   effect(KILL cr);
8747 
8748   format %{ "subq    $dst, $src\t# long" %}
8749   opcode(0x2B);
8750   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8751   ins_pipe(ialu_reg_reg);
8752 %}
8753 
8754 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8755 %{
8756   match(Set dst (SubL dst src));
8757   effect(KILL cr);
8758 
8759   format %{ "subq    $dst, $src\t# long" %}
8760   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8761   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8762   ins_pipe(ialu_reg);
8763 %}
8764 
8765 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8766 %{
8767   match(Set dst (SubL dst (LoadL src)));
8768   effect(KILL cr);
8769 
8770   ins_cost(125);
8771   format %{ "subq    $dst, $src\t# long" %}
8772   opcode(0x2B);
8773   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8774   ins_pipe(ialu_reg_mem);
8775 %}
8776 
8777 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8778 %{
8779   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8780   effect(KILL cr);
8781 
8782   ins_cost(150);
8783   format %{ "subq    $dst, $src\t# long" %}
8784   opcode(0x29); /* Opcode 29 /r */
8785   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8786   ins_pipe(ialu_mem_reg);
8787 %}
8788 
8789 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8790 %{
8791   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8792   effect(KILL cr);
8793 
8794   ins_cost(125); // XXX
8795   format %{ "subq    $dst, $src\t# long" %}
8796   opcode(0x81); /* Opcode 81 /5 id */
8797   ins_encode(REX_mem_wide(dst),
8798              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8799   ins_pipe(ialu_mem_imm);
8800 %}
8801 
8802 // Subtract from a pointer
8803 // XXX hmpf???
8804 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8805 %{
8806   match(Set dst (AddP dst (SubI zero src)));
8807   effect(KILL cr);
8808 
8809   format %{ "subq    $dst, $src\t# ptr - int" %}
8810   opcode(0x2B);
8811   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8812   ins_pipe(ialu_reg_reg);
8813 %}
8814 
8815 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8816 %{
8817   match(Set dst (SubI zero dst));
8818   effect(KILL cr);
8819 
8820   format %{ "negl    $dst\t# int" %}
8821   opcode(0xF7, 0x03);  // Opcode F7 /3
8822   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8823   ins_pipe(ialu_reg);
8824 %}
8825 
8826 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8827 %{
8828   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8829   effect(KILL cr);
8830 
8831   format %{ "negl    $dst\t# int" %}
8832   opcode(0xF7, 0x03);  // Opcode F7 /3
8833   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8834   ins_pipe(ialu_reg);
8835 %}
8836 
8837 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8838 %{
8839   match(Set dst (SubL zero dst));
8840   effect(KILL cr);
8841 
8842   format %{ "negq    $dst\t# long" %}
8843   opcode(0xF7, 0x03);  // Opcode F7 /3
8844   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8845   ins_pipe(ialu_reg);
8846 %}
8847 
8848 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8849 %{
8850   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8851   effect(KILL cr);
8852 
8853   format %{ "negq    $dst\t# long" %}
8854   opcode(0xF7, 0x03);  // Opcode F7 /3
8855   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8856   ins_pipe(ialu_reg);
8857 %}
8858 
8859 
8860 //----------Multiplication/Division Instructions-------------------------------
8861 // Integer Multiplication Instructions
8862 // Multiply Register
8863 
8864 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8865 %{
8866   match(Set dst (MulI dst src));
8867   effect(KILL cr);
8868 
8869   ins_cost(300);
8870   format %{ "imull   $dst, $src\t# int" %}
8871   opcode(0x0F, 0xAF);
8872   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8873   ins_pipe(ialu_reg_reg_alu0);
8874 %}
8875 
8876 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8877 %{
8878   match(Set dst (MulI src imm));
8879   effect(KILL cr);
8880 
8881   ins_cost(300);
8882   format %{ "imull   $dst, $src, $imm\t# int" %}
8883   opcode(0x69); /* 69 /r id */
8884   ins_encode(REX_reg_reg(dst, src),
8885              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8886   ins_pipe(ialu_reg_reg_alu0);
8887 %}
8888 
8889 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8890 %{
8891   match(Set dst (MulI dst (LoadI src)));
8892   effect(KILL cr);
8893 
8894   ins_cost(350);
8895   format %{ "imull   $dst, $src\t# int" %}
8896   opcode(0x0F, 0xAF);
8897   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8898   ins_pipe(ialu_reg_mem_alu0);
8899 %}
8900 
8901 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8902 %{
8903   match(Set dst (MulI (LoadI src) imm));
8904   effect(KILL cr);
8905 
8906   ins_cost(300);
8907   format %{ "imull   $dst, $src, $imm\t# int" %}
8908   opcode(0x69); /* 69 /r id */
8909   ins_encode(REX_reg_mem(dst, src),
8910              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8911   ins_pipe(ialu_reg_mem_alu0);
8912 %}
8913 
8914 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8915 %{
8916   match(Set dst (MulL dst src));
8917   effect(KILL cr);
8918 
8919   ins_cost(300);
8920   format %{ "imulq   $dst, $src\t# long" %}
8921   opcode(0x0F, 0xAF);
8922   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8923   ins_pipe(ialu_reg_reg_alu0);
8924 %}
8925 
8926 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8927 %{
8928   match(Set dst (MulL src imm));
8929   effect(KILL cr);
8930 
8931   ins_cost(300);
8932   format %{ "imulq   $dst, $src, $imm\t# long" %}
8933   opcode(0x69); /* 69 /r id */
8934   ins_encode(REX_reg_reg_wide(dst, src),
8935              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8936   ins_pipe(ialu_reg_reg_alu0);
8937 %}
8938 
8939 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8940 %{
8941   match(Set dst (MulL dst (LoadL src)));
8942   effect(KILL cr);
8943 
8944   ins_cost(350);
8945   format %{ "imulq   $dst, $src\t# long" %}
8946   opcode(0x0F, 0xAF);
8947   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8948   ins_pipe(ialu_reg_mem_alu0);
8949 %}
8950 
8951 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8952 %{
8953   match(Set dst (MulL (LoadL src) imm));
8954   effect(KILL cr);
8955 
8956   ins_cost(300);
8957   format %{ "imulq   $dst, $src, $imm\t# long" %}
8958   opcode(0x69); /* 69 /r id */
8959   ins_encode(REX_reg_mem_wide(dst, src),
8960              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8961   ins_pipe(ialu_reg_mem_alu0);
8962 %}
8963 
8964 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8965 %{
8966   match(Set dst (MulHiL src rax));
8967   effect(USE_KILL rax, KILL cr);
8968 
8969   ins_cost(300);
8970   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8971   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8972   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8973   ins_pipe(ialu_reg_reg_alu0);
8974 %}
8975 
8976 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8977                    rFlagsReg cr)
8978 %{
8979   match(Set rax (DivI rax div));
8980   effect(KILL rdx, KILL cr);
8981 
8982   ins_cost(30*100+10*100); // XXX
8983   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8984             "jne,s   normal\n\t"
8985             "xorl    rdx, rdx\n\t"
8986             "cmpl    $div, -1\n\t"
8987             "je,s    done\n"
8988     "normal: cdql\n\t"
8989             "idivl   $div\n"
8990     "done:"        %}
8991   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8992   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8993   ins_pipe(ialu_reg_reg_alu0);
8994 %}
8995 
8996 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8997                    rFlagsReg cr)
8998 %{
8999   match(Set rax (DivL rax div));
9000   effect(KILL rdx, KILL cr);
9001 
9002   ins_cost(30*100+10*100); // XXX
9003   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
9004             "cmpq    rax, rdx\n\t"
9005             "jne,s   normal\n\t"
9006             "xorl    rdx, rdx\n\t"
9007             "cmpq    $div, -1\n\t"
9008             "je,s    done\n"
9009     "normal: cdqq\n\t"
9010             "idivq   $div\n"
9011     "done:"        %}
9012   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9013   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9014   ins_pipe(ialu_reg_reg_alu0);
9015 %}
9016 
9017 // Integer DIVMOD with Register, both quotient and mod results
9018 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
9019                              rFlagsReg cr)
9020 %{
9021   match(DivModI rax div);
9022   effect(KILL cr);
9023 
9024   ins_cost(30*100+10*100); // XXX
9025   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
9026             "jne,s   normal\n\t"
9027             "xorl    rdx, rdx\n\t"
9028             "cmpl    $div, -1\n\t"
9029             "je,s    done\n"
9030     "normal: cdql\n\t"
9031             "idivl   $div\n"
9032     "done:"        %}
9033   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9034   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9035   ins_pipe(pipe_slow);
9036 %}
9037 
9038 // Long DIVMOD with Register, both quotient and mod results
9039 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
9040                              rFlagsReg cr)
9041 %{
9042   match(DivModL rax div);
9043   effect(KILL cr);
9044 
9045   ins_cost(30*100+10*100); // XXX
9046   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
9047             "cmpq    rax, rdx\n\t"
9048             "jne,s   normal\n\t"
9049             "xorl    rdx, rdx\n\t"
9050             "cmpq    $div, -1\n\t"
9051             "je,s    done\n"
9052     "normal: cdqq\n\t"
9053             "idivq   $div\n"
9054     "done:"        %}
9055   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9056   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9057   ins_pipe(pipe_slow);
9058 %}
9059 
9060 //----------- DivL-By-Constant-Expansions--------------------------------------
9061 // DivI cases are handled by the compiler
9062 
9063 // Magic constant, reciprocal of 10
9064 instruct loadConL_0x6666666666666667(rRegL dst)
9065 %{
9066   effect(DEF dst);
9067 
9068   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
9069   ins_encode(load_immL(dst, 0x6666666666666667));
9070   ins_pipe(ialu_reg);
9071 %}
9072 
9073 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
9074 %{
9075   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
9076 
9077   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
9078   opcode(0xF7, 0x5); /* Opcode F7 /5 */
9079   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
9080   ins_pipe(ialu_reg_reg_alu0);
9081 %}
9082 
9083 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
9084 %{
9085   effect(USE_DEF dst, KILL cr);
9086 
9087   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
9088   opcode(0xC1, 0x7); /* C1 /7 ib */
9089   ins_encode(reg_opc_imm_wide(dst, 0x3F));
9090   ins_pipe(ialu_reg);
9091 %}
9092 
9093 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
9094 %{
9095   effect(USE_DEF dst, KILL cr);
9096 
9097   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
9098   opcode(0xC1, 0x7); /* C1 /7 ib */
9099   ins_encode(reg_opc_imm_wide(dst, 0x2));
9100   ins_pipe(ialu_reg);
9101 %}
9102 
9103 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
9104 %{
9105   match(Set dst (DivL src div));
9106 
9107   ins_cost((5+8)*100);
9108   expand %{
9109     rax_RegL rax;                     // Killed temp
9110     rFlagsReg cr;                     // Killed
9111     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
9112     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
9113     sarL_rReg_63(src, cr);            // sarq  src, 63
9114     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
9115     subL_rReg(dst, src, cr);          // subl  rdx, src
9116   %}
9117 %}
9118 
9119 //-----------------------------------------------------------------------------
9120 
9121 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
9122                    rFlagsReg cr)
9123 %{
9124   match(Set rdx (ModI rax div));
9125   effect(KILL rax, KILL cr);
9126 
9127   ins_cost(300); // XXX
9128   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
9129             "jne,s   normal\n\t"
9130             "xorl    rdx, rdx\n\t"
9131             "cmpl    $div, -1\n\t"
9132             "je,s    done\n"
9133     "normal: cdql\n\t"
9134             "idivl   $div\n"
9135     "done:"        %}
9136   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9137   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9138   ins_pipe(ialu_reg_reg_alu0);
9139 %}
9140 
9141 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
9142                    rFlagsReg cr)
9143 %{
9144   match(Set rdx (ModL rax div));
9145   effect(KILL rax, KILL cr);
9146 
9147   ins_cost(300); // XXX
9148   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
9149             "cmpq    rax, rdx\n\t"
9150             "jne,s   normal\n\t"
9151             "xorl    rdx, rdx\n\t"
9152             "cmpq    $div, -1\n\t"
9153             "je,s    done\n"
9154     "normal: cdqq\n\t"
9155             "idivq   $div\n"
9156     "done:"        %}
9157   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9158   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9159   ins_pipe(ialu_reg_reg_alu0);
9160 %}
9161 
9162 // Integer Shift Instructions
9163 // Shift Left by one
9164 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9165 %{
9166   match(Set dst (LShiftI dst shift));
9167   effect(KILL cr);
9168 
9169   format %{ "sall    $dst, $shift" %}
9170   opcode(0xD1, 0x4); /* D1 /4 */
9171   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9172   ins_pipe(ialu_reg);
9173 %}
9174 
9175 // Shift Left by one
9176 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9177 %{
9178   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9179   effect(KILL cr);
9180 
9181   format %{ "sall    $dst, $shift\t" %}
9182   opcode(0xD1, 0x4); /* D1 /4 */
9183   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9184   ins_pipe(ialu_mem_imm);
9185 %}
9186 
9187 // Shift Left by 8-bit immediate
9188 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9189 %{
9190   match(Set dst (LShiftI dst shift));
9191   effect(KILL cr);
9192 
9193   format %{ "sall    $dst, $shift" %}
9194   opcode(0xC1, 0x4); /* C1 /4 ib */
9195   ins_encode(reg_opc_imm(dst, shift));
9196   ins_pipe(ialu_reg);
9197 %}
9198 
9199 // Shift Left by 8-bit immediate
9200 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9201 %{
9202   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9203   effect(KILL cr);
9204 
9205   format %{ "sall    $dst, $shift" %}
9206   opcode(0xC1, 0x4); /* C1 /4 ib */
9207   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9208   ins_pipe(ialu_mem_imm);
9209 %}
9210 
9211 // Shift Left by variable
9212 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9213 %{
9214   match(Set dst (LShiftI dst shift));
9215   effect(KILL cr);
9216 
9217   format %{ "sall    $dst, $shift" %}
9218   opcode(0xD3, 0x4); /* D3 /4 */
9219   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9220   ins_pipe(ialu_reg_reg);
9221 %}
9222 
9223 // Shift Left by variable
9224 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9225 %{
9226   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9227   effect(KILL cr);
9228 
9229   format %{ "sall    $dst, $shift" %}
9230   opcode(0xD3, 0x4); /* D3 /4 */
9231   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9232   ins_pipe(ialu_mem_reg);
9233 %}
9234 
9235 // Arithmetic shift right by one
9236 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9237 %{
9238   match(Set dst (RShiftI dst shift));
9239   effect(KILL cr);
9240 
9241   format %{ "sarl    $dst, $shift" %}
9242   opcode(0xD1, 0x7); /* D1 /7 */
9243   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9244   ins_pipe(ialu_reg);
9245 %}
9246 
9247 // Arithmetic shift right by one
9248 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9249 %{
9250   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9251   effect(KILL cr);
9252 
9253   format %{ "sarl    $dst, $shift" %}
9254   opcode(0xD1, 0x7); /* D1 /7 */
9255   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9256   ins_pipe(ialu_mem_imm);
9257 %}
9258 
9259 // Arithmetic Shift Right by 8-bit immediate
9260 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9261 %{
9262   match(Set dst (RShiftI dst shift));
9263   effect(KILL cr);
9264 
9265   format %{ "sarl    $dst, $shift" %}
9266   opcode(0xC1, 0x7); /* C1 /7 ib */
9267   ins_encode(reg_opc_imm(dst, shift));
9268   ins_pipe(ialu_mem_imm);
9269 %}
9270 
9271 // Arithmetic Shift Right by 8-bit immediate
9272 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9273 %{
9274   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9275   effect(KILL cr);
9276 
9277   format %{ "sarl    $dst, $shift" %}
9278   opcode(0xC1, 0x7); /* C1 /7 ib */
9279   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9280   ins_pipe(ialu_mem_imm);
9281 %}
9282 
9283 // Arithmetic Shift Right by variable
9284 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9285 %{
9286   match(Set dst (RShiftI dst shift));
9287   effect(KILL cr);
9288 
9289   format %{ "sarl    $dst, $shift" %}
9290   opcode(0xD3, 0x7); /* D3 /7 */
9291   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9292   ins_pipe(ialu_reg_reg);
9293 %}
9294 
9295 // Arithmetic Shift Right by variable
9296 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9297 %{
9298   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9299   effect(KILL cr);
9300 
9301   format %{ "sarl    $dst, $shift" %}
9302   opcode(0xD3, 0x7); /* D3 /7 */
9303   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9304   ins_pipe(ialu_mem_reg);
9305 %}
9306 
9307 // Logical shift right by one
9308 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9309 %{
9310   match(Set dst (URShiftI dst shift));
9311   effect(KILL cr);
9312 
9313   format %{ "shrl    $dst, $shift" %}
9314   opcode(0xD1, 0x5); /* D1 /5 */
9315   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9316   ins_pipe(ialu_reg);
9317 %}
9318 
9319 // Logical shift right by one
9320 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9321 %{
9322   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9323   effect(KILL cr);
9324 
9325   format %{ "shrl    $dst, $shift" %}
9326   opcode(0xD1, 0x5); /* D1 /5 */
9327   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9328   ins_pipe(ialu_mem_imm);
9329 %}
9330 
9331 // Logical Shift Right by 8-bit immediate
9332 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9333 %{
9334   match(Set dst (URShiftI dst shift));
9335   effect(KILL cr);
9336 
9337   format %{ "shrl    $dst, $shift" %}
9338   opcode(0xC1, 0x5); /* C1 /5 ib */
9339   ins_encode(reg_opc_imm(dst, shift));
9340   ins_pipe(ialu_reg);
9341 %}
9342 
9343 // Logical Shift Right by 8-bit immediate
9344 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9345 %{
9346   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9347   effect(KILL cr);
9348 
9349   format %{ "shrl    $dst, $shift" %}
9350   opcode(0xC1, 0x5); /* C1 /5 ib */
9351   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9352   ins_pipe(ialu_mem_imm);
9353 %}
9354 
9355 // Logical Shift Right by variable
9356 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9357 %{
9358   match(Set dst (URShiftI dst shift));
9359   effect(KILL cr);
9360 
9361   format %{ "shrl    $dst, $shift" %}
9362   opcode(0xD3, 0x5); /* D3 /5 */
9363   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9364   ins_pipe(ialu_reg_reg);
9365 %}
9366 
9367 // Logical Shift Right by variable
9368 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9369 %{
9370   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9371   effect(KILL cr);
9372 
9373   format %{ "shrl    $dst, $shift" %}
9374   opcode(0xD3, 0x5); /* D3 /5 */
9375   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9376   ins_pipe(ialu_mem_reg);
9377 %}
9378 
9379 // Long Shift Instructions
9380 // Shift Left by one
9381 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9382 %{
9383   match(Set dst (LShiftL dst shift));
9384   effect(KILL cr);
9385 
9386   format %{ "salq    $dst, $shift" %}
9387   opcode(0xD1, 0x4); /* D1 /4 */
9388   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9389   ins_pipe(ialu_reg);
9390 %}
9391 
9392 // Shift Left by one
9393 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9394 %{
9395   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9396   effect(KILL cr);
9397 
9398   format %{ "salq    $dst, $shift" %}
9399   opcode(0xD1, 0x4); /* D1 /4 */
9400   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9401   ins_pipe(ialu_mem_imm);
9402 %}
9403 
9404 // Shift Left by 8-bit immediate
9405 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9406 %{
9407   match(Set dst (LShiftL dst shift));
9408   effect(KILL cr);
9409 
9410   format %{ "salq    $dst, $shift" %}
9411   opcode(0xC1, 0x4); /* C1 /4 ib */
9412   ins_encode(reg_opc_imm_wide(dst, shift));
9413   ins_pipe(ialu_reg);
9414 %}
9415 
9416 // Shift Left by 8-bit immediate
9417 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9418 %{
9419   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9420   effect(KILL cr);
9421 
9422   format %{ "salq    $dst, $shift" %}
9423   opcode(0xC1, 0x4); /* C1 /4 ib */
9424   ins_encode(REX_mem_wide(dst), OpcP,
9425              RM_opc_mem(secondary, dst), Con8or32(shift));
9426   ins_pipe(ialu_mem_imm);
9427 %}
9428 
9429 // Shift Left by variable
9430 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9431 %{
9432   match(Set dst (LShiftL dst shift));
9433   effect(KILL cr);
9434 
9435   format %{ "salq    $dst, $shift" %}
9436   opcode(0xD3, 0x4); /* D3 /4 */
9437   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9438   ins_pipe(ialu_reg_reg);
9439 %}
9440 
9441 // Shift Left by variable
9442 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9443 %{
9444   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9445   effect(KILL cr);
9446 
9447   format %{ "salq    $dst, $shift" %}
9448   opcode(0xD3, 0x4); /* D3 /4 */
9449   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9450   ins_pipe(ialu_mem_reg);
9451 %}
9452 
9453 // Arithmetic shift right by one
9454 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9455 %{
9456   match(Set dst (RShiftL dst shift));
9457   effect(KILL cr);
9458 
9459   format %{ "sarq    $dst, $shift" %}
9460   opcode(0xD1, 0x7); /* D1 /7 */
9461   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9462   ins_pipe(ialu_reg);
9463 %}
9464 
9465 // Arithmetic shift right by one
9466 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9467 %{
9468   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9469   effect(KILL cr);
9470 
9471   format %{ "sarq    $dst, $shift" %}
9472   opcode(0xD1, 0x7); /* D1 /7 */
9473   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9474   ins_pipe(ialu_mem_imm);
9475 %}
9476 
9477 // Arithmetic Shift Right by 8-bit immediate
9478 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9479 %{
9480   match(Set dst (RShiftL dst shift));
9481   effect(KILL cr);
9482 
9483   format %{ "sarq    $dst, $shift" %}
9484   opcode(0xC1, 0x7); /* C1 /7 ib */
9485   ins_encode(reg_opc_imm_wide(dst, shift));
9486   ins_pipe(ialu_mem_imm);
9487 %}
9488 
9489 // Arithmetic Shift Right by 8-bit immediate
9490 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9491 %{
9492   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9493   effect(KILL cr);
9494 
9495   format %{ "sarq    $dst, $shift" %}
9496   opcode(0xC1, 0x7); /* C1 /7 ib */
9497   ins_encode(REX_mem_wide(dst), OpcP,
9498              RM_opc_mem(secondary, dst), Con8or32(shift));
9499   ins_pipe(ialu_mem_imm);
9500 %}
9501 
9502 // Arithmetic Shift Right by variable
9503 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9504 %{
9505   match(Set dst (RShiftL dst shift));
9506   effect(KILL cr);
9507 
9508   format %{ "sarq    $dst, $shift" %}
9509   opcode(0xD3, 0x7); /* D3 /7 */
9510   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9511   ins_pipe(ialu_reg_reg);
9512 %}
9513 
9514 // Arithmetic Shift Right by variable
9515 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9516 %{
9517   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9518   effect(KILL cr);
9519 
9520   format %{ "sarq    $dst, $shift" %}
9521   opcode(0xD3, 0x7); /* D3 /7 */
9522   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9523   ins_pipe(ialu_mem_reg);
9524 %}
9525 
9526 // Logical shift right by one
9527 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9528 %{
9529   match(Set dst (URShiftL dst shift));
9530   effect(KILL cr);
9531 
9532   format %{ "shrq    $dst, $shift" %}
9533   opcode(0xD1, 0x5); /* D1 /5 */
9534   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9535   ins_pipe(ialu_reg);
9536 %}
9537 
9538 // Logical shift right by one
9539 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9540 %{
9541   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9542   effect(KILL cr);
9543 
9544   format %{ "shrq    $dst, $shift" %}
9545   opcode(0xD1, 0x5); /* D1 /5 */
9546   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9547   ins_pipe(ialu_mem_imm);
9548 %}
9549 
9550 // Logical Shift Right by 8-bit immediate
9551 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9552 %{
9553   match(Set dst (URShiftL dst shift));
9554   effect(KILL cr);
9555 
9556   format %{ "shrq    $dst, $shift" %}
9557   opcode(0xC1, 0x5); /* C1 /5 ib */
9558   ins_encode(reg_opc_imm_wide(dst, shift));
9559   ins_pipe(ialu_reg);
9560 %}
9561 
9562 
9563 // Logical Shift Right by 8-bit immediate
9564 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9565 %{
9566   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9567   effect(KILL cr);
9568 
9569   format %{ "shrq    $dst, $shift" %}
9570   opcode(0xC1, 0x5); /* C1 /5 ib */
9571   ins_encode(REX_mem_wide(dst), OpcP,
9572              RM_opc_mem(secondary, dst), Con8or32(shift));
9573   ins_pipe(ialu_mem_imm);
9574 %}
9575 
9576 // Logical Shift Right by variable
9577 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9578 %{
9579   match(Set dst (URShiftL dst shift));
9580   effect(KILL cr);
9581 
9582   format %{ "shrq    $dst, $shift" %}
9583   opcode(0xD3, 0x5); /* D3 /5 */
9584   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9585   ins_pipe(ialu_reg_reg);
9586 %}
9587 
9588 // Logical Shift Right by variable
9589 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9590 %{
9591   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9592   effect(KILL cr);
9593 
9594   format %{ "shrq    $dst, $shift" %}
9595   opcode(0xD3, 0x5); /* D3 /5 */
9596   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9597   ins_pipe(ialu_mem_reg);
9598 %}
9599 
9600 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9601 // This idiom is used by the compiler for the i2b bytecode.
9602 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9603 %{
9604   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9605 
9606   format %{ "movsbl  $dst, $src\t# i2b" %}
9607   opcode(0x0F, 0xBE);
9608   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9609   ins_pipe(ialu_reg_reg);
9610 %}
9611 
9612 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9613 // This idiom is used by the compiler the i2s bytecode.
9614 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9615 %{
9616   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9617 
9618   format %{ "movswl  $dst, $src\t# i2s" %}
9619   opcode(0x0F, 0xBF);
9620   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9621   ins_pipe(ialu_reg_reg);
9622 %}
9623 
9624 // ROL/ROR instructions
9625 
9626 // ROL expand
9627 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9628   effect(KILL cr, USE_DEF dst);
9629 
9630   format %{ "roll    $dst" %}
9631   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9632   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9633   ins_pipe(ialu_reg);
9634 %}
9635 
9636 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9637   effect(USE_DEF dst, USE shift, KILL cr);
9638 
9639   format %{ "roll    $dst, $shift" %}
9640   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9641   ins_encode( reg_opc_imm(dst, shift) );
9642   ins_pipe(ialu_reg);
9643 %}
9644 
9645 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9646 %{
9647   effect(USE_DEF dst, USE shift, KILL cr);
9648 
9649   format %{ "roll    $dst, $shift" %}
9650   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9651   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9652   ins_pipe(ialu_reg_reg);
9653 %}
9654 // end of ROL expand
9655 
9656 // Rotate Left by one
9657 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9658 %{
9659   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9660 
9661   expand %{
9662     rolI_rReg_imm1(dst, cr);
9663   %}
9664 %}
9665 
9666 // Rotate Left by 8-bit immediate
9667 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9668 %{
9669   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9670   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9671 
9672   expand %{
9673     rolI_rReg_imm8(dst, lshift, cr);
9674   %}
9675 %}
9676 
9677 // Rotate Left by variable
9678 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9679 %{
9680   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9681 
9682   expand %{
9683     rolI_rReg_CL(dst, shift, cr);
9684   %}
9685 %}
9686 
9687 // Rotate Left by variable
9688 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9689 %{
9690   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9691 
9692   expand %{
9693     rolI_rReg_CL(dst, shift, cr);
9694   %}
9695 %}
9696 
9697 // ROR expand
9698 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9699 %{
9700   effect(USE_DEF dst, KILL cr);
9701 
9702   format %{ "rorl    $dst" %}
9703   opcode(0xD1, 0x1); /* D1 /1 */
9704   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9705   ins_pipe(ialu_reg);
9706 %}
9707 
9708 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9709 %{
9710   effect(USE_DEF dst, USE shift, KILL cr);
9711 
9712   format %{ "rorl    $dst, $shift" %}
9713   opcode(0xC1, 0x1); /* C1 /1 ib */
9714   ins_encode(reg_opc_imm(dst, shift));
9715   ins_pipe(ialu_reg);
9716 %}
9717 
9718 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9719 %{
9720   effect(USE_DEF dst, USE shift, KILL cr);
9721 
9722   format %{ "rorl    $dst, $shift" %}
9723   opcode(0xD3, 0x1); /* D3 /1 */
9724   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9725   ins_pipe(ialu_reg_reg);
9726 %}
9727 // end of ROR expand
9728 
9729 // Rotate Right by one
9730 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9731 %{
9732   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9733 
9734   expand %{
9735     rorI_rReg_imm1(dst, cr);
9736   %}
9737 %}
9738 
9739 // Rotate Right by 8-bit immediate
9740 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9741 %{
9742   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9743   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9744 
9745   expand %{
9746     rorI_rReg_imm8(dst, rshift, cr);
9747   %}
9748 %}
9749 
9750 // Rotate Right by variable
9751 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9752 %{
9753   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9754 
9755   expand %{
9756     rorI_rReg_CL(dst, shift, cr);
9757   %}
9758 %}
9759 
9760 // Rotate Right by variable
9761 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9762 %{
9763   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9764 
9765   expand %{
9766     rorI_rReg_CL(dst, shift, cr);
9767   %}
9768 %}
9769 
9770 // for long rotate
9771 // ROL expand
9772 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9773   effect(USE_DEF dst, KILL cr);
9774 
9775   format %{ "rolq    $dst" %}
9776   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9777   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9778   ins_pipe(ialu_reg);
9779 %}
9780 
9781 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9782   effect(USE_DEF dst, USE shift, KILL cr);
9783 
9784   format %{ "rolq    $dst, $shift" %}
9785   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9786   ins_encode( reg_opc_imm_wide(dst, shift) );
9787   ins_pipe(ialu_reg);
9788 %}
9789 
9790 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9791 %{
9792   effect(USE_DEF dst, USE shift, KILL cr);
9793 
9794   format %{ "rolq    $dst, $shift" %}
9795   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9796   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9797   ins_pipe(ialu_reg_reg);
9798 %}
9799 // end of ROL expand
9800 
9801 // Rotate Left by one
9802 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9803 %{
9804   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9805 
9806   expand %{
9807     rolL_rReg_imm1(dst, cr);
9808   %}
9809 %}
9810 
9811 // Rotate Left by 8-bit immediate
9812 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9813 %{
9814   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9815   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9816 
9817   expand %{
9818     rolL_rReg_imm8(dst, lshift, cr);
9819   %}
9820 %}
9821 
9822 // Rotate Left by variable
9823 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9824 %{
9825   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9826 
9827   expand %{
9828     rolL_rReg_CL(dst, shift, cr);
9829   %}
9830 %}
9831 
9832 // Rotate Left by variable
9833 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9834 %{
9835   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9836 
9837   expand %{
9838     rolL_rReg_CL(dst, shift, cr);
9839   %}
9840 %}
9841 
9842 // ROR expand
9843 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9844 %{
9845   effect(USE_DEF dst, KILL cr);
9846 
9847   format %{ "rorq    $dst" %}
9848   opcode(0xD1, 0x1); /* D1 /1 */
9849   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9850   ins_pipe(ialu_reg);
9851 %}
9852 
9853 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9854 %{
9855   effect(USE_DEF dst, USE shift, KILL cr);
9856 
9857   format %{ "rorq    $dst, $shift" %}
9858   opcode(0xC1, 0x1); /* C1 /1 ib */
9859   ins_encode(reg_opc_imm_wide(dst, shift));
9860   ins_pipe(ialu_reg);
9861 %}
9862 
9863 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9864 %{
9865   effect(USE_DEF dst, USE shift, KILL cr);
9866 
9867   format %{ "rorq    $dst, $shift" %}
9868   opcode(0xD3, 0x1); /* D3 /1 */
9869   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9870   ins_pipe(ialu_reg_reg);
9871 %}
9872 // end of ROR expand
9873 
9874 // Rotate Right by one
9875 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9876 %{
9877   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9878 
9879   expand %{
9880     rorL_rReg_imm1(dst, cr);
9881   %}
9882 %}
9883 
9884 // Rotate Right by 8-bit immediate
9885 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9886 %{
9887   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9888   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9889 
9890   expand %{
9891     rorL_rReg_imm8(dst, rshift, cr);
9892   %}
9893 %}
9894 
9895 // Rotate Right by variable
9896 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9897 %{
9898   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9899 
9900   expand %{
9901     rorL_rReg_CL(dst, shift, cr);
9902   %}
9903 %}
9904 
9905 // Rotate Right by variable
9906 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9907 %{
9908   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9909 
9910   expand %{
9911     rorL_rReg_CL(dst, shift, cr);
9912   %}
9913 %}
9914 
9915 // Logical Instructions
9916 
9917 // Integer Logical Instructions
9918 
9919 // And Instructions
9920 // And Register with Register
9921 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9922 %{
9923   match(Set dst (AndI dst src));
9924   effect(KILL cr);
9925 
9926   format %{ "andl    $dst, $src\t# int" %}
9927   opcode(0x23);
9928   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9929   ins_pipe(ialu_reg_reg);
9930 %}
9931 
9932 // And Register with Immediate 255
9933 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9934 %{
9935   match(Set dst (AndI dst src));
9936 
9937   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9938   opcode(0x0F, 0xB6);
9939   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9940   ins_pipe(ialu_reg);
9941 %}
9942 
9943 // And Register with Immediate 255 and promote to long
9944 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9945 %{
9946   match(Set dst (ConvI2L (AndI src mask)));
9947 
9948   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9949   opcode(0x0F, 0xB6);
9950   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9951   ins_pipe(ialu_reg);
9952 %}
9953 
9954 // And Register with Immediate 65535
9955 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9956 %{
9957   match(Set dst (AndI dst src));
9958 
9959   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9960   opcode(0x0F, 0xB7);
9961   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9962   ins_pipe(ialu_reg);
9963 %}
9964 
9965 // And Register with Immediate 65535 and promote to long
9966 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9967 %{
9968   match(Set dst (ConvI2L (AndI src mask)));
9969 
9970   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9971   opcode(0x0F, 0xB7);
9972   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9973   ins_pipe(ialu_reg);
9974 %}
9975 
9976 // And Register with Immediate
9977 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9978 %{
9979   match(Set dst (AndI dst src));
9980   effect(KILL cr);
9981 
9982   format %{ "andl    $dst, $src\t# int" %}
9983   opcode(0x81, 0x04); /* Opcode 81 /4 */
9984   ins_encode(OpcSErm(dst, src), Con8or32(src));
9985   ins_pipe(ialu_reg);
9986 %}
9987 
9988 // And Register with Memory
9989 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9990 %{
9991   match(Set dst (AndI dst (LoadI src)));
9992   effect(KILL cr);
9993 
9994   ins_cost(125);
9995   format %{ "andl    $dst, $src\t# int" %}
9996   opcode(0x23);
9997   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9998   ins_pipe(ialu_reg_mem);
9999 %}
10000 
10001 // And Memory with Register
10002 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10003 %{
10004   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10005   effect(KILL cr);
10006 
10007   ins_cost(150);
10008   format %{ "andl    $dst, $src\t# int" %}
10009   opcode(0x21); /* Opcode 21 /r */
10010   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10011   ins_pipe(ialu_mem_reg);
10012 %}
10013 
10014 // And Memory with Immediate
10015 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
10016 %{
10017   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10018   effect(KILL cr);
10019 
10020   ins_cost(125);
10021   format %{ "andl    $dst, $src\t# int" %}
10022   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10023   ins_encode(REX_mem(dst), OpcSE(src),
10024              RM_opc_mem(secondary, dst), Con8or32(src));
10025   ins_pipe(ialu_mem_imm);
10026 %}
10027 
10028 // Or Instructions
10029 // Or Register with Register
10030 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10031 %{
10032   match(Set dst (OrI dst src));
10033   effect(KILL cr);
10034 
10035   format %{ "orl     $dst, $src\t# int" %}
10036   opcode(0x0B);
10037   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10038   ins_pipe(ialu_reg_reg);
10039 %}
10040 
10041 // Or Register with Immediate
10042 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10043 %{
10044   match(Set dst (OrI dst src));
10045   effect(KILL cr);
10046 
10047   format %{ "orl     $dst, $src\t# int" %}
10048   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10049   ins_encode(OpcSErm(dst, src), Con8or32(src));
10050   ins_pipe(ialu_reg);
10051 %}
10052 
10053 // Or Register with Memory
10054 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10055 %{
10056   match(Set dst (OrI dst (LoadI src)));
10057   effect(KILL cr);
10058 
10059   ins_cost(125);
10060   format %{ "orl     $dst, $src\t# int" %}
10061   opcode(0x0B);
10062   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10063   ins_pipe(ialu_reg_mem);
10064 %}
10065 
10066 // Or Memory with Register
10067 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10068 %{
10069   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10070   effect(KILL cr);
10071 
10072   ins_cost(150);
10073   format %{ "orl     $dst, $src\t# int" %}
10074   opcode(0x09); /* Opcode 09 /r */
10075   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10076   ins_pipe(ialu_mem_reg);
10077 %}
10078 
10079 // Or Memory with Immediate
10080 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10081 %{
10082   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10083   effect(KILL cr);
10084 
10085   ins_cost(125);
10086   format %{ "orl     $dst, $src\t# int" %}
10087   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10088   ins_encode(REX_mem(dst), OpcSE(src),
10089              RM_opc_mem(secondary, dst), Con8or32(src));
10090   ins_pipe(ialu_mem_imm);
10091 %}
10092 
10093 // Xor Instructions
10094 // Xor Register with Register
10095 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10096 %{
10097   match(Set dst (XorI dst src));
10098   effect(KILL cr);
10099 
10100   format %{ "xorl    $dst, $src\t# int" %}
10101   opcode(0x33);
10102   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10103   ins_pipe(ialu_reg_reg);
10104 %}
10105 
10106 // Xor Register with Immediate -1
10107 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10108   match(Set dst (XorI dst imm));  
10109 
10110   format %{ "not    $dst" %}  
10111   ins_encode %{
10112      __ notl($dst$$Register);
10113   %}
10114   ins_pipe(ialu_reg);
10115 %}
10116 
10117 // Xor Register with Immediate
10118 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10119 %{
10120   match(Set dst (XorI dst src));
10121   effect(KILL cr);
10122 
10123   format %{ "xorl    $dst, $src\t# int" %}
10124   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10125   ins_encode(OpcSErm(dst, src), Con8or32(src));
10126   ins_pipe(ialu_reg);
10127 %}
10128 
10129 // Xor Register with Memory
10130 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10131 %{
10132   match(Set dst (XorI dst (LoadI src)));
10133   effect(KILL cr);
10134 
10135   ins_cost(125);
10136   format %{ "xorl    $dst, $src\t# int" %}
10137   opcode(0x33);
10138   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10139   ins_pipe(ialu_reg_mem);
10140 %}
10141 
10142 // Xor Memory with Register
10143 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10144 %{
10145   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10146   effect(KILL cr);
10147 
10148   ins_cost(150);
10149   format %{ "xorl    $dst, $src\t# int" %}
10150   opcode(0x31); /* Opcode 31 /r */
10151   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10152   ins_pipe(ialu_mem_reg);
10153 %}
10154 
10155 // Xor Memory with Immediate
10156 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10157 %{
10158   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10159   effect(KILL cr);
10160 
10161   ins_cost(125);
10162   format %{ "xorl    $dst, $src\t# int" %}
10163   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10164   ins_encode(REX_mem(dst), OpcSE(src),
10165              RM_opc_mem(secondary, dst), Con8or32(src));
10166   ins_pipe(ialu_mem_imm);
10167 %}
10168 
10169 
10170 // Long Logical Instructions
10171 
10172 // And Instructions
10173 // And Register with Register
10174 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10175 %{
10176   match(Set dst (AndL dst src));
10177   effect(KILL cr);
10178 
10179   format %{ "andq    $dst, $src\t# long" %}
10180   opcode(0x23);
10181   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10182   ins_pipe(ialu_reg_reg);
10183 %}
10184 
10185 // And Register with Immediate 255
10186 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10187 %{
10188   match(Set dst (AndL dst src));
10189 
10190   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10191   opcode(0x0F, 0xB6);
10192   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10193   ins_pipe(ialu_reg);
10194 %}
10195 
10196 // And Register with Immediate 65535
10197 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10198 %{
10199   match(Set dst (AndL dst src));
10200 
10201   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10202   opcode(0x0F, 0xB7);
10203   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10204   ins_pipe(ialu_reg);
10205 %}
10206 
10207 // And Register with Immediate
10208 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10209 %{
10210   match(Set dst (AndL dst src));
10211   effect(KILL cr);
10212 
10213   format %{ "andq    $dst, $src\t# long" %}
10214   opcode(0x81, 0x04); /* Opcode 81 /4 */
10215   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10216   ins_pipe(ialu_reg);
10217 %}
10218 
10219 // And Register with Memory
10220 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10221 %{
10222   match(Set dst (AndL dst (LoadL src)));
10223   effect(KILL cr);
10224 
10225   ins_cost(125);
10226   format %{ "andq    $dst, $src\t# long" %}
10227   opcode(0x23);
10228   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10229   ins_pipe(ialu_reg_mem);
10230 %}
10231 
10232 // And Memory with Register
10233 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10234 %{
10235   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10236   effect(KILL cr);
10237 
10238   ins_cost(150);
10239   format %{ "andq    $dst, $src\t# long" %}
10240   opcode(0x21); /* Opcode 21 /r */
10241   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10242   ins_pipe(ialu_mem_reg);
10243 %}
10244 
10245 // And Memory with Immediate
10246 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10247 %{
10248   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10249   effect(KILL cr);
10250 
10251   ins_cost(125);
10252   format %{ "andq    $dst, $src\t# long" %}
10253   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10254   ins_encode(REX_mem_wide(dst), OpcSE(src),
10255              RM_opc_mem(secondary, dst), Con8or32(src));
10256   ins_pipe(ialu_mem_imm);
10257 %}
10258 
10259 // Or Instructions
10260 // Or Register with Register
10261 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10262 %{
10263   match(Set dst (OrL dst src));
10264   effect(KILL cr);
10265 
10266   format %{ "orq     $dst, $src\t# long" %}
10267   opcode(0x0B);
10268   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10269   ins_pipe(ialu_reg_reg);
10270 %}
10271 
10272 // Use any_RegP to match R15 (TLS register) without spilling.
10273 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10274   match(Set dst (OrL dst (CastP2X src)));
10275   effect(KILL cr);
10276 
10277   format %{ "orq     $dst, $src\t# long" %}
10278   opcode(0x0B);
10279   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10280   ins_pipe(ialu_reg_reg);
10281 %}
10282 
10283 
10284 // Or Register with Immediate
10285 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10286 %{
10287   match(Set dst (OrL dst src));
10288   effect(KILL cr);
10289 
10290   format %{ "orq     $dst, $src\t# long" %}
10291   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10292   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10293   ins_pipe(ialu_reg);
10294 %}
10295 
10296 // Or Register with Memory
10297 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10298 %{
10299   match(Set dst (OrL dst (LoadL src)));
10300   effect(KILL cr);
10301 
10302   ins_cost(125);
10303   format %{ "orq     $dst, $src\t# long" %}
10304   opcode(0x0B);
10305   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10306   ins_pipe(ialu_reg_mem);
10307 %}
10308 
10309 // Or Memory with Register
10310 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10311 %{
10312   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10313   effect(KILL cr);
10314 
10315   ins_cost(150);
10316   format %{ "orq     $dst, $src\t# long" %}
10317   opcode(0x09); /* Opcode 09 /r */
10318   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10319   ins_pipe(ialu_mem_reg);
10320 %}
10321 
10322 // Or Memory with Immediate
10323 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10324 %{
10325   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10326   effect(KILL cr);
10327 
10328   ins_cost(125);
10329   format %{ "orq     $dst, $src\t# long" %}
10330   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10331   ins_encode(REX_mem_wide(dst), OpcSE(src),
10332              RM_opc_mem(secondary, dst), Con8or32(src));
10333   ins_pipe(ialu_mem_imm);
10334 %}
10335 
10336 // Xor Instructions
10337 // Xor Register with Register
10338 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10339 %{
10340   match(Set dst (XorL dst src));
10341   effect(KILL cr);
10342 
10343   format %{ "xorq    $dst, $src\t# long" %}
10344   opcode(0x33);
10345   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10346   ins_pipe(ialu_reg_reg);
10347 %}
10348 
10349 // Xor Register with Immediate -1
10350 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10351   match(Set dst (XorL dst imm));  
10352 
10353   format %{ "notq   $dst" %}  
10354   ins_encode %{
10355      __ notq($dst$$Register);
10356   %}
10357   ins_pipe(ialu_reg);
10358 %}
10359 
10360 // Xor Register with Immediate
10361 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10362 %{
10363   match(Set dst (XorL dst src));
10364   effect(KILL cr);
10365 
10366   format %{ "xorq    $dst, $src\t# long" %}
10367   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10368   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10369   ins_pipe(ialu_reg);
10370 %}
10371 
10372 // Xor Register with Memory
10373 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10374 %{
10375   match(Set dst (XorL dst (LoadL src)));
10376   effect(KILL cr);
10377 
10378   ins_cost(125);
10379   format %{ "xorq    $dst, $src\t# long" %}
10380   opcode(0x33);
10381   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10382   ins_pipe(ialu_reg_mem);
10383 %}
10384 
10385 // Xor Memory with Register
10386 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10387 %{
10388   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10389   effect(KILL cr);
10390 
10391   ins_cost(150);
10392   format %{ "xorq    $dst, $src\t# long" %}
10393   opcode(0x31); /* Opcode 31 /r */
10394   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10395   ins_pipe(ialu_mem_reg);
10396 %}
10397 
10398 // Xor Memory with Immediate
10399 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10400 %{
10401   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10402   effect(KILL cr);
10403 
10404   ins_cost(125);
10405   format %{ "xorq    $dst, $src\t# long" %}
10406   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10407   ins_encode(REX_mem_wide(dst), OpcSE(src),
10408              RM_opc_mem(secondary, dst), Con8or32(src));
10409   ins_pipe(ialu_mem_imm);
10410 %}
10411 
10412 // Convert Int to Boolean
10413 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10414 %{
10415   match(Set dst (Conv2B src));
10416   effect(KILL cr);
10417 
10418   format %{ "testl   $src, $src\t# ci2b\n\t"
10419             "setnz   $dst\n\t"
10420             "movzbl  $dst, $dst" %}
10421   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10422              setNZ_reg(dst),
10423              REX_reg_breg(dst, dst), // movzbl
10424              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10425   ins_pipe(pipe_slow); // XXX
10426 %}
10427 
10428 // Convert Pointer to Boolean
10429 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10430 %{
10431   match(Set dst (Conv2B src));
10432   effect(KILL cr);
10433 
10434   format %{ "testq   $src, $src\t# cp2b\n\t"
10435             "setnz   $dst\n\t"
10436             "movzbl  $dst, $dst" %}
10437   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10438              setNZ_reg(dst),
10439              REX_reg_breg(dst, dst), // movzbl
10440              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10441   ins_pipe(pipe_slow); // XXX
10442 %}
10443 
10444 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10445 %{
10446   match(Set dst (CmpLTMask p q));
10447   effect(KILL cr);
10448 
10449   ins_cost(400); // XXX
10450   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10451             "setlt   $dst\n\t"
10452             "movzbl  $dst, $dst\n\t"
10453             "negl    $dst" %}
10454   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10455              setLT_reg(dst),
10456              REX_reg_breg(dst, dst), // movzbl
10457              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10458              neg_reg(dst));
10459   ins_pipe(pipe_slow);
10460 %}
10461 
10462 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10463 %{
10464   match(Set dst (CmpLTMask dst zero));
10465   effect(KILL cr);
10466 
10467   ins_cost(100); // XXX
10468   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10469   opcode(0xC1, 0x7);  /* C1 /7 ib */
10470   ins_encode(reg_opc_imm(dst, 0x1F));
10471   ins_pipe(ialu_reg);
10472 %}
10473 
10474 
10475 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10476                          rRegI tmp,
10477                          rFlagsReg cr)
10478 %{
10479   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10480   effect(TEMP tmp, KILL cr);
10481 
10482   ins_cost(400); // XXX
10483   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10484             "sbbl    $tmp, $tmp\n\t"
10485             "andl    $tmp, $y\n\t"
10486             "addl    $p, $tmp" %}
10487   ins_encode(enc_cmpLTP(p, q, y, tmp));
10488   ins_pipe(pipe_cmplt);
10489 %}
10490 
10491 /* If I enable this, I encourage spilling in the inner loop of compress.
10492 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10493 %{
10494   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10495   effect( TEMP tmp, KILL cr );
10496   ins_cost(400);
10497 
10498   format %{ "SUB    $p,$q\n\t"
10499             "SBB    RCX,RCX\n\t"
10500             "AND    RCX,$y\n\t"
10501             "ADD    $p,RCX" %}
10502   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10503 %}
10504 */
10505 
10506 //---------- FP Instructions------------------------------------------------
10507 
10508 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10509 %{
10510   match(Set cr (CmpF src1 src2));
10511 
10512   ins_cost(145);
10513   format %{ "ucomiss $src1, $src2\n\t"
10514             "jnp,s   exit\n\t"
10515             "pushfq\t# saw NaN, set CF\n\t"
10516             "andq    [rsp], #0xffffff2b\n\t"
10517             "popfq\n"
10518     "exit:   nop\t# avoid branch to branch" %}
10519   opcode(0x0F, 0x2E);
10520   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10521              cmpfp_fixup);
10522   ins_pipe(pipe_slow);
10523 %}
10524 
10525 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10526   match(Set cr (CmpF src1 src2));
10527 
10528   ins_cost(145);
10529   format %{ "ucomiss $src1, $src2" %}
10530   ins_encode %{
10531     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10532   %}
10533   ins_pipe(pipe_slow);
10534 %}
10535 
10536 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10537 %{
10538   match(Set cr (CmpF src1 (LoadF src2)));
10539 
10540   ins_cost(145);
10541   format %{ "ucomiss $src1, $src2\n\t"
10542             "jnp,s   exit\n\t"
10543             "pushfq\t# saw NaN, set CF\n\t"
10544             "andq    [rsp], #0xffffff2b\n\t"
10545             "popfq\n"
10546     "exit:   nop\t# avoid branch to branch" %}
10547   opcode(0x0F, 0x2E);
10548   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10549              cmpfp_fixup);
10550   ins_pipe(pipe_slow);
10551 %}
10552 
10553 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10554   match(Set cr (CmpF src1 (LoadF src2)));
10555 
10556   ins_cost(100);
10557   format %{ "ucomiss $src1, $src2" %}
10558   opcode(0x0F, 0x2E);
10559   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10560   ins_pipe(pipe_slow);
10561 %}
10562 
10563 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10564 %{
10565   match(Set cr (CmpF src1 src2));
10566 
10567   ins_cost(145);
10568   format %{ "ucomiss $src1, $src2\n\t"
10569             "jnp,s   exit\n\t"
10570             "pushfq\t# saw NaN, set CF\n\t"
10571             "andq    [rsp], #0xffffff2b\n\t"
10572             "popfq\n"
10573     "exit:   nop\t# avoid branch to branch" %}
10574   opcode(0x0F, 0x2E);
10575   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10576              cmpfp_fixup);
10577   ins_pipe(pipe_slow);
10578 %}
10579 
10580 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10581   match(Set cr (CmpF src1 src2));
10582 
10583   ins_cost(100);
10584   format %{ "ucomiss $src1, $src2" %}
10585   opcode(0x0F, 0x2E);
10586   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10587   ins_pipe(pipe_slow);
10588 %}
10589 
10590 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10591 %{
10592   match(Set cr (CmpD src1 src2));
10593 
10594   ins_cost(145);
10595   format %{ "ucomisd $src1, $src2\n\t"
10596             "jnp,s   exit\n\t"
10597             "pushfq\t# saw NaN, set CF\n\t"
10598             "andq    [rsp], #0xffffff2b\n\t"
10599             "popfq\n"
10600     "exit:   nop\t# avoid branch to branch" %}
10601   opcode(0x66, 0x0F, 0x2E);
10602   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10603              cmpfp_fixup);
10604   ins_pipe(pipe_slow);
10605 %}
10606 
10607 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10608   match(Set cr (CmpD src1 src2));
10609 
10610   ins_cost(100);
10611   format %{ "ucomisd $src1, $src2 test" %}
10612   ins_encode %{
10613     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10614   %}
10615   ins_pipe(pipe_slow);
10616 %}
10617 
10618 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10619 %{
10620   match(Set cr (CmpD src1 (LoadD src2)));
10621 
10622   ins_cost(145);
10623   format %{ "ucomisd $src1, $src2\n\t"
10624             "jnp,s   exit\n\t"
10625             "pushfq\t# saw NaN, set CF\n\t"
10626             "andq    [rsp], #0xffffff2b\n\t"
10627             "popfq\n"
10628     "exit:   nop\t# avoid branch to branch" %}
10629   opcode(0x66, 0x0F, 0x2E);
10630   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10631              cmpfp_fixup);
10632   ins_pipe(pipe_slow);
10633 %}
10634 
10635 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10636   match(Set cr (CmpD src1 (LoadD src2)));
10637 
10638   ins_cost(100);
10639   format %{ "ucomisd $src1, $src2" %}
10640   opcode(0x66, 0x0F, 0x2E);
10641   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10642   ins_pipe(pipe_slow);
10643 %}
10644 
10645 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10646 %{
10647   match(Set cr (CmpD src1 src2));
10648 
10649   ins_cost(145);
10650   format %{ "ucomisd $src1, [$src2]\n\t"
10651             "jnp,s   exit\n\t"
10652             "pushfq\t# saw NaN, set CF\n\t"
10653             "andq    [rsp], #0xffffff2b\n\t"
10654             "popfq\n"
10655     "exit:   nop\t# avoid branch to branch" %}
10656   opcode(0x66, 0x0F, 0x2E);
10657   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10658              cmpfp_fixup);
10659   ins_pipe(pipe_slow);
10660 %}
10661 
10662 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10663   match(Set cr (CmpD src1 src2));
10664 
10665   ins_cost(100);
10666   format %{ "ucomisd $src1, [$src2]" %}
10667   opcode(0x66, 0x0F, 0x2E);
10668   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10669   ins_pipe(pipe_slow);
10670 %}
10671 
10672 // Compare into -1,0,1
10673 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10674 %{
10675   match(Set dst (CmpF3 src1 src2));
10676   effect(KILL cr);
10677 
10678   ins_cost(275);
10679   format %{ "ucomiss $src1, $src2\n\t"
10680             "movl    $dst, #-1\n\t"
10681             "jp,s    done\n\t"
10682             "jb,s    done\n\t"
10683             "setne   $dst\n\t"
10684             "movzbl  $dst, $dst\n"
10685     "done:" %}
10686 
10687   opcode(0x0F, 0x2E);
10688   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10689              cmpfp3(dst));
10690   ins_pipe(pipe_slow);
10691 %}
10692 
10693 // Compare into -1,0,1
10694 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10695 %{
10696   match(Set dst (CmpF3 src1 (LoadF src2)));
10697   effect(KILL cr);
10698 
10699   ins_cost(275);
10700   format %{ "ucomiss $src1, $src2\n\t"
10701             "movl    $dst, #-1\n\t"
10702             "jp,s    done\n\t"
10703             "jb,s    done\n\t"
10704             "setne   $dst\n\t"
10705             "movzbl  $dst, $dst\n"
10706     "done:" %}
10707 
10708   opcode(0x0F, 0x2E);
10709   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10710              cmpfp3(dst));
10711   ins_pipe(pipe_slow);
10712 %}
10713 
10714 // Compare into -1,0,1
10715 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10716 %{
10717   match(Set dst (CmpF3 src1 src2));
10718   effect(KILL cr);
10719 
10720   ins_cost(275);
10721   format %{ "ucomiss $src1, [$src2]\n\t"
10722             "movl    $dst, #-1\n\t"
10723             "jp,s    done\n\t"
10724             "jb,s    done\n\t"
10725             "setne   $dst\n\t"
10726             "movzbl  $dst, $dst\n"
10727     "done:" %}
10728 
10729   opcode(0x0F, 0x2E);
10730   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10731              cmpfp3(dst));
10732   ins_pipe(pipe_slow);
10733 %}
10734 
10735 // Compare into -1,0,1
10736 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10737 %{
10738   match(Set dst (CmpD3 src1 src2));
10739   effect(KILL cr);
10740 
10741   ins_cost(275);
10742   format %{ "ucomisd $src1, $src2\n\t"
10743             "movl    $dst, #-1\n\t"
10744             "jp,s    done\n\t"
10745             "jb,s    done\n\t"
10746             "setne   $dst\n\t"
10747             "movzbl  $dst, $dst\n"
10748     "done:" %}
10749 
10750   opcode(0x66, 0x0F, 0x2E);
10751   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10752              cmpfp3(dst));
10753   ins_pipe(pipe_slow);
10754 %}
10755 
10756 // Compare into -1,0,1
10757 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10758 %{
10759   match(Set dst (CmpD3 src1 (LoadD src2)));
10760   effect(KILL cr);
10761 
10762   ins_cost(275);
10763   format %{ "ucomisd $src1, $src2\n\t"
10764             "movl    $dst, #-1\n\t"
10765             "jp,s    done\n\t"
10766             "jb,s    done\n\t"
10767             "setne   $dst\n\t"
10768             "movzbl  $dst, $dst\n"
10769     "done:" %}
10770 
10771   opcode(0x66, 0x0F, 0x2E);
10772   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10773              cmpfp3(dst));
10774   ins_pipe(pipe_slow);
10775 %}
10776 
10777 // Compare into -1,0,1
10778 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10779 %{
10780   match(Set dst (CmpD3 src1 src2));
10781   effect(KILL cr);
10782 
10783   ins_cost(275);
10784   format %{ "ucomisd $src1, [$src2]\n\t"
10785             "movl    $dst, #-1\n\t"
10786             "jp,s    done\n\t"
10787             "jb,s    done\n\t"
10788             "setne   $dst\n\t"
10789             "movzbl  $dst, $dst\n"
10790     "done:" %}
10791 
10792   opcode(0x66, 0x0F, 0x2E);
10793   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10794              cmpfp3(dst));
10795   ins_pipe(pipe_slow);
10796 %}
10797 
10798 instruct addF_reg(regF dst, regF src)
10799 %{
10800   match(Set dst (AddF dst src));
10801 
10802   format %{ "addss   $dst, $src" %}
10803   ins_cost(150); // XXX
10804   opcode(0xF3, 0x0F, 0x58);
10805   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10806   ins_pipe(pipe_slow);
10807 %}
10808 
10809 instruct addF_mem(regF dst, memory src)
10810 %{
10811   match(Set dst (AddF dst (LoadF src)));
10812 
10813   format %{ "addss   $dst, $src" %}
10814   ins_cost(150); // XXX
10815   opcode(0xF3, 0x0F, 0x58);
10816   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10817   ins_pipe(pipe_slow);
10818 %}
10819 
10820 instruct addF_imm(regF dst, immF src)
10821 %{
10822   match(Set dst (AddF dst src));
10823 
10824   format %{ "addss   $dst, [$src]" %}
10825   ins_cost(150); // XXX
10826   opcode(0xF3, 0x0F, 0x58);
10827   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10828   ins_pipe(pipe_slow);
10829 %}
10830 
10831 instruct addD_reg(regD dst, regD src)
10832 %{
10833   match(Set dst (AddD dst src));
10834 
10835   format %{ "addsd   $dst, $src" %}
10836   ins_cost(150); // XXX
10837   opcode(0xF2, 0x0F, 0x58);
10838   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10839   ins_pipe(pipe_slow);
10840 %}
10841 
10842 instruct addD_mem(regD dst, memory src)
10843 %{
10844   match(Set dst (AddD dst (LoadD src)));
10845 
10846   format %{ "addsd   $dst, $src" %}
10847   ins_cost(150); // XXX
10848   opcode(0xF2, 0x0F, 0x58);
10849   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10850   ins_pipe(pipe_slow);
10851 %}
10852 
10853 instruct addD_imm(regD dst, immD src)
10854 %{
10855   match(Set dst (AddD dst src));
10856 
10857   format %{ "addsd   $dst, [$src]" %}
10858   ins_cost(150); // XXX
10859   opcode(0xF2, 0x0F, 0x58);
10860   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10861   ins_pipe(pipe_slow);
10862 %}
10863 
10864 instruct subF_reg(regF dst, regF src)
10865 %{
10866   match(Set dst (SubF dst src));
10867 
10868   format %{ "subss   $dst, $src" %}
10869   ins_cost(150); // XXX
10870   opcode(0xF3, 0x0F, 0x5C);
10871   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10872   ins_pipe(pipe_slow);
10873 %}
10874 
10875 instruct subF_mem(regF dst, memory src)
10876 %{
10877   match(Set dst (SubF dst (LoadF src)));
10878 
10879   format %{ "subss   $dst, $src" %}
10880   ins_cost(150); // XXX
10881   opcode(0xF3, 0x0F, 0x5C);
10882   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10883   ins_pipe(pipe_slow);
10884 %}
10885 
10886 instruct subF_imm(regF dst, immF src)
10887 %{
10888   match(Set dst (SubF dst src));
10889 
10890   format %{ "subss   $dst, [$src]" %}
10891   ins_cost(150); // XXX
10892   opcode(0xF3, 0x0F, 0x5C);
10893   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10894   ins_pipe(pipe_slow);
10895 %}
10896 
10897 instruct subD_reg(regD dst, regD src)
10898 %{
10899   match(Set dst (SubD dst src));
10900 
10901   format %{ "subsd   $dst, $src" %}
10902   ins_cost(150); // XXX
10903   opcode(0xF2, 0x0F, 0x5C);
10904   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10905   ins_pipe(pipe_slow);
10906 %}
10907 
10908 instruct subD_mem(regD dst, memory src)
10909 %{
10910   match(Set dst (SubD dst (LoadD src)));
10911 
10912   format %{ "subsd   $dst, $src" %}
10913   ins_cost(150); // XXX
10914   opcode(0xF2, 0x0F, 0x5C);
10915   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10916   ins_pipe(pipe_slow);
10917 %}
10918 
10919 instruct subD_imm(regD dst, immD src)
10920 %{
10921   match(Set dst (SubD dst src));
10922 
10923   format %{ "subsd   $dst, [$src]" %}
10924   ins_cost(150); // XXX
10925   opcode(0xF2, 0x0F, 0x5C);
10926   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10927   ins_pipe(pipe_slow);
10928 %}
10929 
10930 instruct mulF_reg(regF dst, regF src)
10931 %{
10932   match(Set dst (MulF dst src));
10933 
10934   format %{ "mulss   $dst, $src" %}
10935   ins_cost(150); // XXX
10936   opcode(0xF3, 0x0F, 0x59);
10937   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10938   ins_pipe(pipe_slow);
10939 %}
10940 
10941 instruct mulF_mem(regF dst, memory src)
10942 %{
10943   match(Set dst (MulF dst (LoadF src)));
10944 
10945   format %{ "mulss   $dst, $src" %}
10946   ins_cost(150); // XXX
10947   opcode(0xF3, 0x0F, 0x59);
10948   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10949   ins_pipe(pipe_slow);
10950 %}
10951 
10952 instruct mulF_imm(regF dst, immF src)
10953 %{
10954   match(Set dst (MulF dst src));
10955 
10956   format %{ "mulss   $dst, [$src]" %}
10957   ins_cost(150); // XXX
10958   opcode(0xF3, 0x0F, 0x59);
10959   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10960   ins_pipe(pipe_slow);
10961 %}
10962 
10963 instruct mulD_reg(regD dst, regD src)
10964 %{
10965   match(Set dst (MulD dst src));
10966 
10967   format %{ "mulsd   $dst, $src" %}
10968   ins_cost(150); // XXX
10969   opcode(0xF2, 0x0F, 0x59);
10970   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10971   ins_pipe(pipe_slow);
10972 %}
10973 
10974 instruct mulD_mem(regD dst, memory src)
10975 %{
10976   match(Set dst (MulD dst (LoadD src)));
10977 
10978   format %{ "mulsd   $dst, $src" %}
10979   ins_cost(150); // XXX
10980   opcode(0xF2, 0x0F, 0x59);
10981   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10982   ins_pipe(pipe_slow);
10983 %}
10984 
10985 instruct mulD_imm(regD dst, immD src)
10986 %{
10987   match(Set dst (MulD dst src));
10988 
10989   format %{ "mulsd   $dst, [$src]" %}
10990   ins_cost(150); // XXX
10991   opcode(0xF2, 0x0F, 0x59);
10992   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10993   ins_pipe(pipe_slow);
10994 %}
10995 
10996 instruct divF_reg(regF dst, regF src)
10997 %{
10998   match(Set dst (DivF dst src));
10999 
11000   format %{ "divss   $dst, $src" %}
11001   ins_cost(150); // XXX
11002   opcode(0xF3, 0x0F, 0x5E);
11003   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11004   ins_pipe(pipe_slow);
11005 %}
11006 
11007 instruct divF_mem(regF dst, memory src)
11008 %{
11009   match(Set dst (DivF dst (LoadF src)));
11010 
11011   format %{ "divss   $dst, $src" %}
11012   ins_cost(150); // XXX
11013   opcode(0xF3, 0x0F, 0x5E);
11014   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11015   ins_pipe(pipe_slow);
11016 %}
11017 
11018 instruct divF_imm(regF dst, immF src)
11019 %{
11020   match(Set dst (DivF dst src));
11021 
11022   format %{ "divss   $dst, [$src]" %}
11023   ins_cost(150); // XXX
11024   opcode(0xF3, 0x0F, 0x5E);
11025   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11026   ins_pipe(pipe_slow);
11027 %}
11028 
11029 instruct divD_reg(regD dst, regD src)
11030 %{
11031   match(Set dst (DivD dst src));
11032 
11033   format %{ "divsd   $dst, $src" %}
11034   ins_cost(150); // XXX
11035   opcode(0xF2, 0x0F, 0x5E);
11036   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11037   ins_pipe(pipe_slow);
11038 %}
11039 
11040 instruct divD_mem(regD dst, memory src)
11041 %{
11042   match(Set dst (DivD dst (LoadD src)));
11043 
11044   format %{ "divsd   $dst, $src" %}
11045   ins_cost(150); // XXX
11046   opcode(0xF2, 0x0F, 0x5E);
11047   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11048   ins_pipe(pipe_slow);
11049 %}
11050 
11051 instruct divD_imm(regD dst, immD src)
11052 %{
11053   match(Set dst (DivD dst src));
11054 
11055   format %{ "divsd   $dst, [$src]" %}
11056   ins_cost(150); // XXX
11057   opcode(0xF2, 0x0F, 0x5E);
11058   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11059   ins_pipe(pipe_slow);
11060 %}
11061 
11062 instruct sqrtF_reg(regF dst, regF src)
11063 %{
11064   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11065 
11066   format %{ "sqrtss  $dst, $src" %}
11067   ins_cost(150); // XXX
11068   opcode(0xF3, 0x0F, 0x51);
11069   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11070   ins_pipe(pipe_slow);
11071 %}
11072 
11073 instruct sqrtF_mem(regF dst, memory src)
11074 %{
11075   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
11076 
11077   format %{ "sqrtss  $dst, $src" %}
11078   ins_cost(150); // XXX
11079   opcode(0xF3, 0x0F, 0x51);
11080   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11081   ins_pipe(pipe_slow);
11082 %}
11083 
11084 instruct sqrtF_imm(regF dst, immF src)
11085 %{
11086   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11087 
11088   format %{ "sqrtss  $dst, [$src]" %}
11089   ins_cost(150); // XXX
11090   opcode(0xF3, 0x0F, 0x51);
11091   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11092   ins_pipe(pipe_slow);
11093 %}
11094 
11095 instruct sqrtD_reg(regD dst, regD src)
11096 %{
11097   match(Set dst (SqrtD src));
11098 
11099   format %{ "sqrtsd  $dst, $src" %}
11100   ins_cost(150); // XXX
11101   opcode(0xF2, 0x0F, 0x51);
11102   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11103   ins_pipe(pipe_slow);
11104 %}
11105 
11106 instruct sqrtD_mem(regD dst, memory src)
11107 %{
11108   match(Set dst (SqrtD (LoadD src)));
11109 
11110   format %{ "sqrtsd  $dst, $src" %}
11111   ins_cost(150); // XXX
11112   opcode(0xF2, 0x0F, 0x51);
11113   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11114   ins_pipe(pipe_slow);
11115 %}
11116 
11117 instruct sqrtD_imm(regD dst, immD src)
11118 %{
11119   match(Set dst (SqrtD src));
11120 
11121   format %{ "sqrtsd  $dst, [$src]" %}
11122   ins_cost(150); // XXX
11123   opcode(0xF2, 0x0F, 0x51);
11124   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11125   ins_pipe(pipe_slow);
11126 %}
11127 
11128 instruct absF_reg(regF dst)
11129 %{
11130   match(Set dst (AbsF dst));
11131 
11132   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
11133   ins_encode(absF_encoding(dst));
11134   ins_pipe(pipe_slow);
11135 %}
11136 
11137 instruct absD_reg(regD dst)
11138 %{
11139   match(Set dst (AbsD dst));
11140 
11141   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
11142             "# abs double by sign masking" %}
11143   ins_encode(absD_encoding(dst));
11144   ins_pipe(pipe_slow);
11145 %}
11146 
11147 instruct negF_reg(regF dst)
11148 %{
11149   match(Set dst (NegF dst));
11150 
11151   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
11152   ins_encode(negF_encoding(dst));
11153   ins_pipe(pipe_slow);
11154 %}
11155 
11156 instruct negD_reg(regD dst)
11157 %{
11158   match(Set dst (NegD dst));
11159 
11160   format %{ "xorpd   $dst, [0x8000000000000000]\t"
11161             "# neg double by sign flipping" %}
11162   ins_encode(negD_encoding(dst));
11163   ins_pipe(pipe_slow);
11164 %}
11165 
11166 // -----------Trig and Trancendental Instructions------------------------------
11167 instruct cosD_reg(regD dst) %{
11168   match(Set dst (CosD dst));
11169 
11170   format %{ "dcos   $dst\n\t" %}
11171   opcode(0xD9, 0xFF);
11172   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11173   ins_pipe( pipe_slow );
11174 %}
11175 
11176 instruct sinD_reg(regD dst) %{
11177   match(Set dst (SinD dst));
11178 
11179   format %{ "dsin   $dst\n\t" %}
11180   opcode(0xD9, 0xFE);
11181   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11182   ins_pipe( pipe_slow );
11183 %}
11184 
11185 instruct tanD_reg(regD dst) %{
11186   match(Set dst (TanD dst));
11187 
11188   format %{ "dtan   $dst\n\t" %}
11189   ins_encode( Push_SrcXD(dst),
11190               Opcode(0xD9), Opcode(0xF2),   //fptan
11191               Opcode(0xDD), Opcode(0xD8),   //fstp st
11192               Push_ResultXD(dst) );
11193   ins_pipe( pipe_slow );
11194 %}
11195 
11196 instruct log10D_reg(regD dst) %{
11197   // The source and result Double operands in XMM registers
11198   match(Set dst (Log10D dst));
11199   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11200   // fyl2x        ; compute log_10(2) * log_2(x)
11201   format %{ "fldlg2\t\t\t#Log10\n\t"
11202             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11203          %}
11204    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11205               Push_SrcXD(dst),
11206               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11207               Push_ResultXD(dst));
11208 
11209   ins_pipe( pipe_slow );
11210 %}
11211 
11212 instruct logD_reg(regD dst) %{
11213   // The source and result Double operands in XMM registers
11214   match(Set dst (LogD dst));
11215   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11216   // fyl2x        ; compute log_e(2) * log_2(x)
11217   format %{ "fldln2\t\t\t#Log_e\n\t"
11218             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11219          %}
11220   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11221               Push_SrcXD(dst),
11222               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11223               Push_ResultXD(dst));
11224   ins_pipe( pipe_slow );
11225 %}
11226 
11227 
11228 
11229 //----------Arithmetic Conversion Instructions---------------------------------
11230 
11231 instruct roundFloat_nop(regF dst)
11232 %{
11233   match(Set dst (RoundFloat dst));
11234 
11235   ins_cost(0);
11236   ins_encode();
11237   ins_pipe(empty);
11238 %}
11239 
11240 instruct roundDouble_nop(regD dst)
11241 %{
11242   match(Set dst (RoundDouble dst));
11243 
11244   ins_cost(0);
11245   ins_encode();
11246   ins_pipe(empty);
11247 %}
11248 
11249 instruct convF2D_reg_reg(regD dst, regF src)
11250 %{
11251   match(Set dst (ConvF2D src));
11252 
11253   format %{ "cvtss2sd $dst, $src" %}
11254   opcode(0xF3, 0x0F, 0x5A);
11255   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11256   ins_pipe(pipe_slow); // XXX
11257 %}
11258 
11259 instruct convF2D_reg_mem(regD dst, memory src)
11260 %{
11261   match(Set dst (ConvF2D (LoadF src)));
11262 
11263   format %{ "cvtss2sd $dst, $src" %}
11264   opcode(0xF3, 0x0F, 0x5A);
11265   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11266   ins_pipe(pipe_slow); // XXX
11267 %}
11268 
11269 instruct convD2F_reg_reg(regF dst, regD src)
11270 %{
11271   match(Set dst (ConvD2F src));
11272 
11273   format %{ "cvtsd2ss $dst, $src" %}
11274   opcode(0xF2, 0x0F, 0x5A);
11275   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11276   ins_pipe(pipe_slow); // XXX
11277 %}
11278 
11279 instruct convD2F_reg_mem(regF dst, memory src)
11280 %{
11281   match(Set dst (ConvD2F (LoadD src)));
11282 
11283   format %{ "cvtsd2ss $dst, $src" %}
11284   opcode(0xF2, 0x0F, 0x5A);
11285   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11286   ins_pipe(pipe_slow); // XXX
11287 %}
11288 
11289 // XXX do mem variants
11290 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11291 %{
11292   match(Set dst (ConvF2I src));
11293   effect(KILL cr);
11294 
11295   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11296             "cmpl    $dst, #0x80000000\n\t"
11297             "jne,s   done\n\t"
11298             "subq    rsp, #8\n\t"
11299             "movss   [rsp], $src\n\t"
11300             "call    f2i_fixup\n\t"
11301             "popq    $dst\n"
11302     "done:   "%}
11303   opcode(0xF3, 0x0F, 0x2C);
11304   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11305              f2i_fixup(dst, src));
11306   ins_pipe(pipe_slow);
11307 %}
11308 
11309 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11310 %{
11311   match(Set dst (ConvF2L src));
11312   effect(KILL cr);
11313 
11314   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11315             "cmpq    $dst, [0x8000000000000000]\n\t"
11316             "jne,s   done\n\t"
11317             "subq    rsp, #8\n\t"
11318             "movss   [rsp], $src\n\t"
11319             "call    f2l_fixup\n\t"
11320             "popq    $dst\n"
11321     "done:   "%}
11322   opcode(0xF3, 0x0F, 0x2C);
11323   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11324              f2l_fixup(dst, src));
11325   ins_pipe(pipe_slow);
11326 %}
11327 
11328 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11329 %{
11330   match(Set dst (ConvD2I src));
11331   effect(KILL cr);
11332 
11333   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11334             "cmpl    $dst, #0x80000000\n\t"
11335             "jne,s   done\n\t"
11336             "subq    rsp, #8\n\t"
11337             "movsd   [rsp], $src\n\t"
11338             "call    d2i_fixup\n\t"
11339             "popq    $dst\n"
11340     "done:   "%}
11341   opcode(0xF2, 0x0F, 0x2C);
11342   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11343              d2i_fixup(dst, src));
11344   ins_pipe(pipe_slow);
11345 %}
11346 
11347 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11348 %{
11349   match(Set dst (ConvD2L src));
11350   effect(KILL cr);
11351 
11352   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11353             "cmpq    $dst, [0x8000000000000000]\n\t"
11354             "jne,s   done\n\t"
11355             "subq    rsp, #8\n\t"
11356             "movsd   [rsp], $src\n\t"
11357             "call    d2l_fixup\n\t"
11358             "popq    $dst\n"
11359     "done:   "%}
11360   opcode(0xF2, 0x0F, 0x2C);
11361   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11362              d2l_fixup(dst, src));
11363   ins_pipe(pipe_slow);
11364 %}
11365 
11366 instruct convI2F_reg_reg(regF dst, rRegI src)
11367 %{
11368   predicate(!UseXmmI2F);
11369   match(Set dst (ConvI2F src));
11370 
11371   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11372   opcode(0xF3, 0x0F, 0x2A);
11373   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11374   ins_pipe(pipe_slow); // XXX
11375 %}
11376 
11377 instruct convI2F_reg_mem(regF dst, memory src)
11378 %{
11379   match(Set dst (ConvI2F (LoadI src)));
11380 
11381   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11382   opcode(0xF3, 0x0F, 0x2A);
11383   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11384   ins_pipe(pipe_slow); // XXX
11385 %}
11386 
11387 instruct convI2D_reg_reg(regD dst, rRegI src)
11388 %{
11389   predicate(!UseXmmI2D);
11390   match(Set dst (ConvI2D src));
11391 
11392   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11393   opcode(0xF2, 0x0F, 0x2A);
11394   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11395   ins_pipe(pipe_slow); // XXX
11396 %}
11397 
11398 instruct convI2D_reg_mem(regD dst, memory src)
11399 %{
11400   match(Set dst (ConvI2D (LoadI src)));
11401 
11402   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11403   opcode(0xF2, 0x0F, 0x2A);
11404   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11405   ins_pipe(pipe_slow); // XXX
11406 %}
11407 
11408 instruct convXI2F_reg(regF dst, rRegI src)
11409 %{
11410   predicate(UseXmmI2F);
11411   match(Set dst (ConvI2F src));
11412 
11413   format %{ "movdl $dst, $src\n\t"
11414             "cvtdq2psl $dst, $dst\t# i2f" %}
11415   ins_encode %{
11416     __ movdl($dst$$XMMRegister, $src$$Register);
11417     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11418   %}
11419   ins_pipe(pipe_slow); // XXX
11420 %}
11421 
11422 instruct convXI2D_reg(regD dst, rRegI src)
11423 %{
11424   predicate(UseXmmI2D);
11425   match(Set dst (ConvI2D src));
11426 
11427   format %{ "movdl $dst, $src\n\t"
11428             "cvtdq2pdl $dst, $dst\t# i2d" %}
11429   ins_encode %{
11430     __ movdl($dst$$XMMRegister, $src$$Register);
11431     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11432   %}
11433   ins_pipe(pipe_slow); // XXX
11434 %}
11435 
11436 instruct convL2F_reg_reg(regF dst, rRegL src)
11437 %{
11438   match(Set dst (ConvL2F src));
11439 
11440   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11441   opcode(0xF3, 0x0F, 0x2A);
11442   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11443   ins_pipe(pipe_slow); // XXX
11444 %}
11445 
11446 instruct convL2F_reg_mem(regF dst, memory src)
11447 %{
11448   match(Set dst (ConvL2F (LoadL src)));
11449 
11450   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11451   opcode(0xF3, 0x0F, 0x2A);
11452   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11453   ins_pipe(pipe_slow); // XXX
11454 %}
11455 
11456 instruct convL2D_reg_reg(regD dst, rRegL src)
11457 %{
11458   match(Set dst (ConvL2D src));
11459 
11460   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11461   opcode(0xF2, 0x0F, 0x2A);
11462   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11463   ins_pipe(pipe_slow); // XXX
11464 %}
11465 
11466 instruct convL2D_reg_mem(regD dst, memory src)
11467 %{
11468   match(Set dst (ConvL2D (LoadL src)));
11469 
11470   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11471   opcode(0xF2, 0x0F, 0x2A);
11472   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11473   ins_pipe(pipe_slow); // XXX
11474 %}
11475 
11476 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11477 %{
11478   match(Set dst (ConvI2L src));
11479 
11480   ins_cost(125);
11481   format %{ "movslq  $dst, $src\t# i2l" %}
11482   opcode(0x63); // needs REX.W
11483   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11484   ins_pipe(ialu_reg_reg);
11485 %}
11486 
11487 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11488 // %{
11489 //   match(Set dst (ConvI2L src));
11490 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11491 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11492 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11493 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11494 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11495 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11496 
11497 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11498 //   ins_encode(enc_copy(dst, src));
11499 // //   opcode(0x63); // needs REX.W
11500 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11501 //   ins_pipe(ialu_reg_reg);
11502 // %}
11503 
11504 // Zero-extend convert int to long
11505 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11506 %{
11507   match(Set dst (AndL (ConvI2L src) mask));
11508 
11509   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11510   ins_encode(enc_copy(dst, src));
11511   ins_pipe(ialu_reg_reg);
11512 %}
11513 
11514 // Zero-extend convert int to long
11515 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11516 %{
11517   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11518 
11519   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11520   opcode(0x8B);
11521   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11522   ins_pipe(ialu_reg_mem);
11523 %}
11524 
11525 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11526 %{
11527   match(Set dst (AndL src mask));
11528 
11529   format %{ "movl    $dst, $src\t# zero-extend long" %}
11530   ins_encode(enc_copy_always(dst, src));
11531   ins_pipe(ialu_reg_reg);
11532 %}
11533 
11534 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11535 %{
11536   match(Set dst (ConvL2I src));
11537 
11538   format %{ "movl    $dst, $src\t# l2i" %}
11539   ins_encode(enc_copy_always(dst, src));
11540   ins_pipe(ialu_reg_reg);
11541 %}
11542 
11543 
11544 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11545   match(Set dst (MoveF2I src));
11546   effect(DEF dst, USE src);
11547 
11548   ins_cost(125);
11549   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11550   opcode(0x8B);
11551   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11552   ins_pipe(ialu_reg_mem);
11553 %}
11554 
11555 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11556   match(Set dst (MoveI2F src));
11557   effect(DEF dst, USE src);
11558 
11559   ins_cost(125);
11560   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11561   opcode(0xF3, 0x0F, 0x10);
11562   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11563   ins_pipe(pipe_slow);
11564 %}
11565 
11566 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11567   match(Set dst (MoveD2L src));
11568   effect(DEF dst, USE src);
11569 
11570   ins_cost(125);
11571   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11572   opcode(0x8B);
11573   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11574   ins_pipe(ialu_reg_mem);
11575 %}
11576 
11577 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11578   predicate(!UseXmmLoadAndClearUpper);
11579   match(Set dst (MoveL2D src));
11580   effect(DEF dst, USE src);
11581 
11582   ins_cost(125);
11583   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11584   opcode(0x66, 0x0F, 0x12);
11585   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11586   ins_pipe(pipe_slow);
11587 %}
11588 
11589 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11590   predicate(UseXmmLoadAndClearUpper);
11591   match(Set dst (MoveL2D src));
11592   effect(DEF dst, USE src);
11593 
11594   ins_cost(125);
11595   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11596   opcode(0xF2, 0x0F, 0x10);
11597   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11598   ins_pipe(pipe_slow);
11599 %}
11600 
11601 
11602 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11603   match(Set dst (MoveF2I src));
11604   effect(DEF dst, USE src);
11605 
11606   ins_cost(95); // XXX
11607   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11608   opcode(0xF3, 0x0F, 0x11);
11609   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11610   ins_pipe(pipe_slow);
11611 %}
11612 
11613 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11614   match(Set dst (MoveI2F src));
11615   effect(DEF dst, USE src);
11616 
11617   ins_cost(100);
11618   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11619   opcode(0x89);
11620   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11621   ins_pipe( ialu_mem_reg );
11622 %}
11623 
11624 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11625   match(Set dst (MoveD2L src));
11626   effect(DEF dst, USE src);
11627 
11628   ins_cost(95); // XXX
11629   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11630   opcode(0xF2, 0x0F, 0x11);
11631   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11632   ins_pipe(pipe_slow);
11633 %}
11634 
11635 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11636   match(Set dst (MoveL2D src));
11637   effect(DEF dst, USE src);
11638 
11639   ins_cost(100);
11640   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11641   opcode(0x89);
11642   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11643   ins_pipe(ialu_mem_reg);
11644 %}
11645 
11646 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11647   match(Set dst (MoveF2I src));
11648   effect(DEF dst, USE src);
11649   ins_cost(85);
11650   format %{ "movd    $dst,$src\t# MoveF2I" %}
11651   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11652   ins_pipe( pipe_slow );
11653 %}
11654 
11655 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11656   match(Set dst (MoveD2L src));
11657   effect(DEF dst, USE src);
11658   ins_cost(85);
11659   format %{ "movd    $dst,$src\t# MoveD2L" %}
11660   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11661   ins_pipe( pipe_slow );
11662 %}
11663 
11664 // The next instructions have long latency and use Int unit. Set high cost.
11665 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11666   match(Set dst (MoveI2F src));
11667   effect(DEF dst, USE src);
11668   ins_cost(300);
11669   format %{ "movd    $dst,$src\t# MoveI2F" %}
11670   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11671   ins_pipe( pipe_slow );
11672 %}
11673 
11674 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11675   match(Set dst (MoveL2D src));
11676   effect(DEF dst, USE src);
11677   ins_cost(300);
11678   format %{ "movd    $dst,$src\t# MoveL2D" %}
11679   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11680   ins_pipe( pipe_slow );
11681 %}
11682 
11683 // Replicate scalar to packed byte (1 byte) values in xmm
11684 instruct Repl8B_reg(regD dst, regD src) %{
11685   match(Set dst (Replicate8B src));
11686   format %{ "MOVDQA  $dst,$src\n\t"
11687             "PUNPCKLBW $dst,$dst\n\t"
11688             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11689   ins_encode( pshufd_8x8(dst, src));
11690   ins_pipe( pipe_slow );
11691 %}
11692 
11693 // Replicate scalar to packed byte (1 byte) values in xmm
11694 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11695   match(Set dst (Replicate8B src));
11696   format %{ "MOVD    $dst,$src\n\t"
11697             "PUNPCKLBW $dst,$dst\n\t"
11698             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11699   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11700   ins_pipe( pipe_slow );
11701 %}
11702 
11703 // Replicate scalar zero to packed byte (1 byte) values in xmm
11704 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11705   match(Set dst (Replicate8B zero));
11706   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11707   ins_encode( pxor(dst, dst));
11708   ins_pipe( fpu_reg_reg );
11709 %}
11710 
11711 // Replicate scalar to packed shore (2 byte) values in xmm
11712 instruct Repl4S_reg(regD dst, regD src) %{
11713   match(Set dst (Replicate4S src));
11714   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11715   ins_encode( pshufd_4x16(dst, src));
11716   ins_pipe( fpu_reg_reg );
11717 %}
11718 
11719 // Replicate scalar to packed shore (2 byte) values in xmm
11720 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11721   match(Set dst (Replicate4S src));
11722   format %{ "MOVD    $dst,$src\n\t"
11723             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11724   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11725   ins_pipe( fpu_reg_reg );
11726 %}
11727 
11728 // Replicate scalar zero to packed short (2 byte) values in xmm
11729 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11730   match(Set dst (Replicate4S zero));
11731   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11732   ins_encode( pxor(dst, dst));
11733   ins_pipe( fpu_reg_reg );
11734 %}
11735 
11736 // Replicate scalar to packed char (2 byte) values in xmm
11737 instruct Repl4C_reg(regD dst, regD src) %{
11738   match(Set dst (Replicate4C src));
11739   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11740   ins_encode( pshufd_4x16(dst, src));
11741   ins_pipe( fpu_reg_reg );
11742 %}
11743 
11744 // Replicate scalar to packed char (2 byte) values in xmm
11745 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11746   match(Set dst (Replicate4C src));
11747   format %{ "MOVD    $dst,$src\n\t"
11748             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11749   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11750   ins_pipe( fpu_reg_reg );
11751 %}
11752 
11753 // Replicate scalar zero to packed char (2 byte) values in xmm
11754 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11755   match(Set dst (Replicate4C zero));
11756   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11757   ins_encode( pxor(dst, dst));
11758   ins_pipe( fpu_reg_reg );
11759 %}
11760 
11761 // Replicate scalar to packed integer (4 byte) values in xmm
11762 instruct Repl2I_reg(regD dst, regD src) %{
11763   match(Set dst (Replicate2I src));
11764   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11765   ins_encode( pshufd(dst, src, 0x00));
11766   ins_pipe( fpu_reg_reg );
11767 %}
11768 
11769 // Replicate scalar to packed integer (4 byte) values in xmm
11770 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11771   match(Set dst (Replicate2I src));
11772   format %{ "MOVD   $dst,$src\n\t"
11773             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11774   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11775   ins_pipe( fpu_reg_reg );
11776 %}
11777 
11778 // Replicate scalar zero to packed integer (2 byte) values in xmm
11779 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11780   match(Set dst (Replicate2I zero));
11781   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11782   ins_encode( pxor(dst, dst));
11783   ins_pipe( fpu_reg_reg );
11784 %}
11785 
11786 // Replicate scalar to packed single precision floating point values in xmm
11787 instruct Repl2F_reg(regD dst, regD src) %{
11788   match(Set dst (Replicate2F src));
11789   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11790   ins_encode( pshufd(dst, src, 0xe0));
11791   ins_pipe( fpu_reg_reg );
11792 %}
11793 
11794 // Replicate scalar to packed single precision floating point values in xmm
11795 instruct Repl2F_regF(regD dst, regF src) %{
11796   match(Set dst (Replicate2F src));
11797   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11798   ins_encode( pshufd(dst, src, 0xe0));
11799   ins_pipe( fpu_reg_reg );
11800 %}
11801 
11802 // Replicate scalar to packed single precision floating point values in xmm
11803 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11804   match(Set dst (Replicate2F zero));
11805   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11806   ins_encode( pxor(dst, dst));
11807   ins_pipe( fpu_reg_reg );
11808 %}
11809 
11810 
11811 // =======================================================================
11812 // fast clearing of an array
11813 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11814                   rFlagsReg cr)
11815 %{
11816   match(Set dummy (ClearArray cnt base));
11817   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11818 
11819   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11820             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11821   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11822              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11823   ins_pipe(pipe_slow);
11824 %}
11825 
11826 instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
11827                         rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
11828 %{
11829   match(Set result (StrComp str1 str2));
11830   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
11831   //ins_cost(300);
11832 
11833   format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
11834   ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11835   ins_pipe( pipe_slow );
11836 %}
11837 
11838 instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
11839                         rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr)
11840 %{
11841   predicate(UseSSE42Intrinsics);
11842   match(Set result (StrIndexOf str1 str2));
11843   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
11844 
11845   format %{ "String IndexOf $str1,$str2 -> $result   // KILL RAX, RCX, RDX" %}
11846   ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11847   ins_pipe( pipe_slow );
11848 %}
11849 
11850 // fast string equals
11851 instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3,
11852                        rcx_RegI tmp4, rax_RegI result, rFlagsReg cr)
11853 %{
11854   match(Set result (StrEquals str1 str2));
11855   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
11856 
11857   format %{ "String Equals $str1,$str2 -> $result    // KILL RBX, RCX" %}
11858   ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11859   ins_pipe( pipe_slow );
11860 %}
11861 
11862 // fast array equals
11863 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3,
11864                       rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
11865 %{
11866   match(Set result (AryEq ary1 ary2));
11867   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11868   //ins_cost(300);
11869 
11870   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL RAX, RBX" %}
11871   ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
11872   ins_pipe( pipe_slow );
11873 %}
11874 
11875 //----------Control Flow Instructions------------------------------------------
11876 // Signed compare Instructions
11877 
11878 // XXX more variants!!
11879 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11880 %{
11881   match(Set cr (CmpI op1 op2));
11882   effect(DEF cr, USE op1, USE op2);
11883 
11884   format %{ "cmpl    $op1, $op2" %}
11885   opcode(0x3B);  /* Opcode 3B /r */
11886   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11887   ins_pipe(ialu_cr_reg_reg);
11888 %}
11889 
11890 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11891 %{
11892   match(Set cr (CmpI op1 op2));
11893 
11894   format %{ "cmpl    $op1, $op2" %}
11895   opcode(0x81, 0x07); /* Opcode 81 /7 */
11896   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11897   ins_pipe(ialu_cr_reg_imm);
11898 %}
11899 
11900 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11901 %{
11902   match(Set cr (CmpI op1 (LoadI op2)));
11903 
11904   ins_cost(500); // XXX
11905   format %{ "cmpl    $op1, $op2" %}
11906   opcode(0x3B); /* Opcode 3B /r */
11907   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11908   ins_pipe(ialu_cr_reg_mem);
11909 %}
11910 
11911 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11912 %{
11913   match(Set cr (CmpI src zero));
11914 
11915   format %{ "testl   $src, $src" %}
11916   opcode(0x85);
11917   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11918   ins_pipe(ialu_cr_reg_imm);
11919 %}
11920 
11921 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11922 %{
11923   match(Set cr (CmpI (AndI src con) zero));
11924 
11925   format %{ "testl   $src, $con" %}
11926   opcode(0xF7, 0x00);
11927   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11928   ins_pipe(ialu_cr_reg_imm);
11929 %}
11930 
11931 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11932 %{
11933   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11934 
11935   format %{ "testl   $src, $mem" %}
11936   opcode(0x85);
11937   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11938   ins_pipe(ialu_cr_reg_mem);
11939 %}
11940 
11941 // Unsigned compare Instructions; really, same as signed except they
11942 // produce an rFlagsRegU instead of rFlagsReg.
11943 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11944 %{
11945   match(Set cr (CmpU op1 op2));
11946 
11947   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11948   opcode(0x3B); /* Opcode 3B /r */
11949   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11950   ins_pipe(ialu_cr_reg_reg);
11951 %}
11952 
11953 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11954 %{
11955   match(Set cr (CmpU op1 op2));
11956 
11957   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11958   opcode(0x81,0x07); /* Opcode 81 /7 */
11959   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11960   ins_pipe(ialu_cr_reg_imm);
11961 %}
11962 
11963 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11964 %{
11965   match(Set cr (CmpU op1 (LoadI op2)));
11966 
11967   ins_cost(500); // XXX
11968   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11969   opcode(0x3B); /* Opcode 3B /r */
11970   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11971   ins_pipe(ialu_cr_reg_mem);
11972 %}
11973 
11974 // // // Cisc-spilled version of cmpU_rReg
11975 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11976 // //%{
11977 // //  match(Set cr (CmpU (LoadI op1) op2));
11978 // //
11979 // //  format %{ "CMPu   $op1,$op2" %}
11980 // //  ins_cost(500);
11981 // //  opcode(0x39);  /* Opcode 39 /r */
11982 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11983 // //%}
11984 
11985 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11986 %{
11987   match(Set cr (CmpU src zero));
11988 
11989   format %{ "testl  $src, $src\t# unsigned" %}
11990   opcode(0x85);
11991   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11992   ins_pipe(ialu_cr_reg_imm);
11993 %}
11994 
11995 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11996 %{
11997   match(Set cr (CmpP op1 op2));
11998 
11999   format %{ "cmpq    $op1, $op2\t# ptr" %}
12000   opcode(0x3B); /* Opcode 3B /r */
12001   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12002   ins_pipe(ialu_cr_reg_reg);
12003 %}
12004 
12005 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12006 %{
12007   match(Set cr (CmpP op1 (LoadP op2)));
12008 
12009   ins_cost(500); // XXX
12010   format %{ "cmpq    $op1, $op2\t# ptr" %}
12011   opcode(0x3B); /* Opcode 3B /r */
12012   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12013   ins_pipe(ialu_cr_reg_mem);
12014 %}
12015 
12016 // // // Cisc-spilled version of cmpP_rReg
12017 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12018 // //%{
12019 // //  match(Set cr (CmpP (LoadP op1) op2));
12020 // //
12021 // //  format %{ "CMPu   $op1,$op2" %}
12022 // //  ins_cost(500);
12023 // //  opcode(0x39);  /* Opcode 39 /r */
12024 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12025 // //%}
12026 
12027 // XXX this is generalized by compP_rReg_mem???
12028 // Compare raw pointer (used in out-of-heap check).
12029 // Only works because non-oop pointers must be raw pointers
12030 // and raw pointers have no anti-dependencies.
12031 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12032 %{
12033   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
12034   match(Set cr (CmpP op1 (LoadP op2)));
12035 
12036   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12037   opcode(0x3B); /* Opcode 3B /r */
12038   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12039   ins_pipe(ialu_cr_reg_mem);
12040 %}
12041 
12042 // This will generate a signed flags result. This should be OK since
12043 // any compare to a zero should be eq/neq.
12044 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12045 %{
12046   match(Set cr (CmpP src zero));
12047 
12048   format %{ "testq   $src, $src\t# ptr" %}
12049   opcode(0x85);
12050   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12051   ins_pipe(ialu_cr_reg_imm);
12052 %}
12053 
12054 // This will generate a signed flags result. This should be OK since
12055 // any compare to a zero should be eq/neq.
12056 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12057 %{
12058   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
12059   match(Set cr (CmpP (LoadP op) zero));
12060 
12061   ins_cost(500); // XXX
12062   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12063   opcode(0xF7); /* Opcode F7 /0 */
12064   ins_encode(REX_mem_wide(op),
12065              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
12066   ins_pipe(ialu_cr_reg_imm);
12067 %}
12068 
12069 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12070 %{
12071   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
12072   match(Set cr (CmpP (LoadP mem) zero));
12073 
12074   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12075   ins_encode %{
12076     __ cmpq(r12, $mem$$Address);
12077   %}
12078   ins_pipe(ialu_cr_reg_mem);
12079 %}
12080 
12081 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12082 %{
12083   match(Set cr (CmpN op1 op2));
12084 
12085   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12086   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12087   ins_pipe(ialu_cr_reg_reg);
12088 %}
12089 
12090 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12091 %{
12092   match(Set cr (CmpN src (LoadN mem)));
12093 
12094   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12095   ins_encode %{
12096     __ cmpl($src$$Register, $mem$$Address);
12097   %}
12098   ins_pipe(ialu_cr_reg_mem);
12099 %}
12100 
12101 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12102   match(Set cr (CmpN op1 op2));
12103 
12104   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12105   ins_encode %{
12106     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12107   %}
12108   ins_pipe(ialu_cr_reg_imm);
12109 %}
12110 
12111 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12112 %{
12113   match(Set cr (CmpN src (LoadN mem)));
12114 
12115   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12116   ins_encode %{
12117     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12118   %}
12119   ins_pipe(ialu_cr_reg_mem);
12120 %}
12121 
12122 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12123   match(Set cr (CmpN src zero));
12124 
12125   format %{ "testl   $src, $src\t# compressed ptr" %}
12126   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12127   ins_pipe(ialu_cr_reg_imm);
12128 %}
12129 
12130 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12131 %{
12132   predicate(Universe::narrow_oop_base() != NULL);
12133   match(Set cr (CmpN (LoadN mem) zero));
12134 
12135   ins_cost(500); // XXX
12136   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12137   ins_encode %{
12138     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12139   %}
12140   ins_pipe(ialu_cr_reg_mem);
12141 %}
12142 
12143 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12144 %{
12145   predicate(Universe::narrow_oop_base() == NULL);
12146   match(Set cr (CmpN (LoadN mem) zero));
12147 
12148   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12149   ins_encode %{
12150     __ cmpl(r12, $mem$$Address);
12151   %}
12152   ins_pipe(ialu_cr_reg_mem);
12153 %}
12154 
12155 // Yanked all unsigned pointer compare operations.
12156 // Pointer compares are done with CmpP which is already unsigned.
12157 
12158 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12159 %{
12160   match(Set cr (CmpL op1 op2));
12161 
12162   format %{ "cmpq    $op1, $op2" %}
12163   opcode(0x3B);  /* Opcode 3B /r */
12164   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12165   ins_pipe(ialu_cr_reg_reg);
12166 %}
12167 
12168 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12169 %{
12170   match(Set cr (CmpL op1 op2));
12171 
12172   format %{ "cmpq    $op1, $op2" %}
12173   opcode(0x81, 0x07); /* Opcode 81 /7 */
12174   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12175   ins_pipe(ialu_cr_reg_imm);
12176 %}
12177 
12178 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12179 %{
12180   match(Set cr (CmpL op1 (LoadL op2)));
12181 
12182   format %{ "cmpq    $op1, $op2" %}
12183   opcode(0x3B); /* Opcode 3B /r */
12184   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12185   ins_pipe(ialu_cr_reg_mem);
12186 %}
12187 
12188 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12189 %{
12190   match(Set cr (CmpL src zero));
12191 
12192   format %{ "testq   $src, $src" %}
12193   opcode(0x85);
12194   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12195   ins_pipe(ialu_cr_reg_imm);
12196 %}
12197 
12198 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12199 %{
12200   match(Set cr (CmpL (AndL src con) zero));
12201 
12202   format %{ "testq   $src, $con\t# long" %}
12203   opcode(0xF7, 0x00);
12204   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12205   ins_pipe(ialu_cr_reg_imm);
12206 %}
12207 
12208 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12209 %{
12210   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12211 
12212   format %{ "testq   $src, $mem" %}
12213   opcode(0x85);
12214   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12215   ins_pipe(ialu_cr_reg_mem);
12216 %}
12217 
12218 // Manifest a CmpL result in an integer register.  Very painful.
12219 // This is the test to avoid.
12220 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12221 %{
12222   match(Set dst (CmpL3 src1 src2));
12223   effect(KILL flags);
12224 
12225   ins_cost(275); // XXX
12226   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12227             "movl    $dst, -1\n\t"
12228             "jl,s    done\n\t"
12229             "setne   $dst\n\t"
12230             "movzbl  $dst, $dst\n\t"
12231     "done:" %}
12232   ins_encode(cmpl3_flag(src1, src2, dst));
12233   ins_pipe(pipe_slow);
12234 %}
12235 
12236 //----------Max and Min--------------------------------------------------------
12237 // Min Instructions
12238 
12239 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12240 %{
12241   effect(USE_DEF dst, USE src, USE cr);
12242 
12243   format %{ "cmovlgt $dst, $src\t# min" %}
12244   opcode(0x0F, 0x4F);
12245   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12246   ins_pipe(pipe_cmov_reg);
12247 %}
12248 
12249 
12250 instruct minI_rReg(rRegI dst, rRegI src)
12251 %{
12252   match(Set dst (MinI dst src));
12253 
12254   ins_cost(200);
12255   expand %{
12256     rFlagsReg cr;
12257     compI_rReg(cr, dst, src);
12258     cmovI_reg_g(dst, src, cr);
12259   %}
12260 %}
12261 
12262 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12263 %{
12264   effect(USE_DEF dst, USE src, USE cr);
12265 
12266   format %{ "cmovllt $dst, $src\t# max" %}
12267   opcode(0x0F, 0x4C);
12268   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12269   ins_pipe(pipe_cmov_reg);
12270 %}
12271 
12272 
12273 instruct maxI_rReg(rRegI dst, rRegI src)
12274 %{
12275   match(Set dst (MaxI dst src));
12276 
12277   ins_cost(200);
12278   expand %{
12279     rFlagsReg cr;
12280     compI_rReg(cr, dst, src);
12281     cmovI_reg_l(dst, src, cr);
12282   %}
12283 %}
12284 
12285 // ============================================================================
12286 // Branch Instructions
12287 
12288 // Jump Direct - Label defines a relative address from JMP+1
12289 instruct jmpDir(label labl)
12290 %{
12291   match(Goto);
12292   effect(USE labl);
12293 
12294   ins_cost(300);
12295   format %{ "jmp     $labl" %}
12296   size(5);
12297   opcode(0xE9);
12298   ins_encode(OpcP, Lbl(labl));
12299   ins_pipe(pipe_jmp);
12300   ins_pc_relative(1);
12301 %}
12302 
12303 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12304 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12305 %{
12306   match(If cop cr);
12307   effect(USE labl);
12308 
12309   ins_cost(300);
12310   format %{ "j$cop     $labl" %}
12311   size(6);
12312   opcode(0x0F, 0x80);
12313   ins_encode(Jcc(cop, labl));
12314   ins_pipe(pipe_jcc);
12315   ins_pc_relative(1);
12316 %}
12317 
12318 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12319 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12320 %{
12321   match(CountedLoopEnd cop cr);
12322   effect(USE labl);
12323 
12324   ins_cost(300);
12325   format %{ "j$cop     $labl\t# loop end" %}
12326   size(6);
12327   opcode(0x0F, 0x80);
12328   ins_encode(Jcc(cop, labl));
12329   ins_pipe(pipe_jcc);
12330   ins_pc_relative(1);
12331 %}
12332 
12333 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12334 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12335   match(CountedLoopEnd cop cmp);
12336   effect(USE labl);
12337 
12338   ins_cost(300);
12339   format %{ "j$cop,u   $labl\t# loop end" %}
12340   size(6);
12341   opcode(0x0F, 0x80);
12342   ins_encode(Jcc(cop, labl));
12343   ins_pipe(pipe_jcc);
12344   ins_pc_relative(1);
12345 %}
12346 
12347 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12348   match(CountedLoopEnd cop cmp);
12349   effect(USE labl);
12350 
12351   ins_cost(200);
12352   format %{ "j$cop,u   $labl\t# loop end" %}
12353   size(6);
12354   opcode(0x0F, 0x80);
12355   ins_encode(Jcc(cop, labl));
12356   ins_pipe(pipe_jcc);
12357   ins_pc_relative(1);
12358 %}
12359 
12360 // Jump Direct Conditional - using unsigned comparison
12361 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12362   match(If cop cmp);
12363   effect(USE labl);
12364 
12365   ins_cost(300);
12366   format %{ "j$cop,u  $labl" %}
12367   size(6);
12368   opcode(0x0F, 0x80);
12369   ins_encode(Jcc(cop, labl));
12370   ins_pipe(pipe_jcc);
12371   ins_pc_relative(1);
12372 %}
12373 
12374 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12375   match(If cop cmp);
12376   effect(USE labl);
12377 
12378   ins_cost(200);
12379   format %{ "j$cop,u  $labl" %}
12380   size(6);
12381   opcode(0x0F, 0x80);
12382   ins_encode(Jcc(cop, labl));
12383   ins_pipe(pipe_jcc);
12384   ins_pc_relative(1);
12385 %}
12386 
12387 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12388   match(If cop cmp);
12389   effect(USE labl);
12390 
12391   ins_cost(200);
12392   format %{ $$template
12393     if ($cop$$cmpcode == Assembler::notEqual) {
12394       $$emit$$"jp,u   $labl\n\t"
12395       $$emit$$"j$cop,u   $labl"
12396     } else {
12397       $$emit$$"jp,u   done\n\t"
12398       $$emit$$"j$cop,u   $labl\n\t"
12399       $$emit$$"done:"
12400     }
12401   %}
12402   size(12);
12403   opcode(0x0F, 0x80);
12404   ins_encode %{
12405     Label* l = $labl$$label;
12406     $$$emit8$primary;
12407     emit_cc(cbuf, $secondary, Assembler::parity);
12408     int parity_disp = -1;
12409     if ($cop$$cmpcode == Assembler::notEqual) {
12410        // the two jumps 6 bytes apart so the jump distances are too
12411        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12412     } else if ($cop$$cmpcode == Assembler::equal) {
12413        parity_disp = 6;
12414     } else {
12415        ShouldNotReachHere();
12416     }
12417     emit_d32(cbuf, parity_disp);
12418     $$$emit8$primary;
12419     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12420     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12421     emit_d32(cbuf, disp);
12422   %}
12423   ins_pipe(pipe_jcc);
12424   ins_pc_relative(1);
12425 %}
12426 
12427 // ============================================================================
12428 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12429 // superklass array for an instance of the superklass.  Set a hidden
12430 // internal cache on a hit (cache is checked with exposed code in
12431 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12432 // encoding ALSO sets flags.
12433 
12434 instruct partialSubtypeCheck(rdi_RegP result,
12435                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12436                              rFlagsReg cr)
12437 %{
12438   match(Set result (PartialSubtypeCheck sub super));
12439   effect(KILL rcx, KILL cr);
12440 
12441   ins_cost(1100);  // slightly larger than the next version
12442   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12443             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12444             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12445             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12446             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12447             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12448             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12449     "miss:\t" %}
12450 
12451   opcode(0x1); // Force a XOR of RDI
12452   ins_encode(enc_PartialSubtypeCheck());
12453   ins_pipe(pipe_slow);
12454 %}
12455 
12456 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12457                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12458                                      immP0 zero,
12459                                      rdi_RegP result)
12460 %{
12461   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12462   effect(KILL rcx, KILL result);
12463 
12464   ins_cost(1000);
12465   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12466             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12467             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12468             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12469             "jne,s   miss\t\t# Missed: flags nz\n\t"
12470             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12471     "miss:\t" %}
12472 
12473   opcode(0x0); // No need to XOR RDI
12474   ins_encode(enc_PartialSubtypeCheck());
12475   ins_pipe(pipe_slow);
12476 %}
12477 
12478 // ============================================================================
12479 // Branch Instructions -- short offset versions
12480 //
12481 // These instructions are used to replace jumps of a long offset (the default
12482 // match) with jumps of a shorter offset.  These instructions are all tagged
12483 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12484 // match rules in general matching.  Instead, the ADLC generates a conversion
12485 // method in the MachNode which can be used to do in-place replacement of the
12486 // long variant with the shorter variant.  The compiler will determine if a
12487 // branch can be taken by the is_short_branch_offset() predicate in the machine
12488 // specific code section of the file.
12489 
12490 // Jump Direct - Label defines a relative address from JMP+1
12491 instruct jmpDir_short(label labl) %{
12492   match(Goto);
12493   effect(USE labl);
12494 
12495   ins_cost(300);
12496   format %{ "jmp,s   $labl" %}
12497   size(2);
12498   opcode(0xEB);
12499   ins_encode(OpcP, LblShort(labl));
12500   ins_pipe(pipe_jmp);
12501   ins_pc_relative(1);
12502   ins_short_branch(1);
12503 %}
12504 
12505 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12506 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12507   match(If cop cr);
12508   effect(USE labl);
12509 
12510   ins_cost(300);
12511   format %{ "j$cop,s   $labl" %}
12512   size(2);
12513   opcode(0x70);
12514   ins_encode(JccShort(cop, labl));
12515   ins_pipe(pipe_jcc);
12516   ins_pc_relative(1);
12517   ins_short_branch(1);
12518 %}
12519 
12520 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12521 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12522   match(CountedLoopEnd cop cr);
12523   effect(USE labl);
12524 
12525   ins_cost(300);
12526   format %{ "j$cop,s   $labl\t# loop end" %}
12527   size(2);
12528   opcode(0x70);
12529   ins_encode(JccShort(cop, labl));
12530   ins_pipe(pipe_jcc);
12531   ins_pc_relative(1);
12532   ins_short_branch(1);
12533 %}
12534 
12535 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12536 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12537   match(CountedLoopEnd cop cmp);
12538   effect(USE labl);
12539 
12540   ins_cost(300);
12541   format %{ "j$cop,us  $labl\t# loop end" %}
12542   size(2);
12543   opcode(0x70);
12544   ins_encode(JccShort(cop, labl));
12545   ins_pipe(pipe_jcc);
12546   ins_pc_relative(1);
12547   ins_short_branch(1);
12548 %}
12549 
12550 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12551   match(CountedLoopEnd cop cmp);
12552   effect(USE labl);
12553 
12554   ins_cost(300);
12555   format %{ "j$cop,us  $labl\t# loop end" %}
12556   size(2);
12557   opcode(0x70);
12558   ins_encode(JccShort(cop, labl));
12559   ins_pipe(pipe_jcc);
12560   ins_pc_relative(1);
12561   ins_short_branch(1);
12562 %}
12563 
12564 // Jump Direct Conditional - using unsigned comparison
12565 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12566   match(If cop cmp);
12567   effect(USE labl);
12568 
12569   ins_cost(300);
12570   format %{ "j$cop,us  $labl" %}
12571   size(2);
12572   opcode(0x70);
12573   ins_encode(JccShort(cop, labl));
12574   ins_pipe(pipe_jcc);
12575   ins_pc_relative(1);
12576   ins_short_branch(1);
12577 %}
12578 
12579 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12580   match(If cop cmp);
12581   effect(USE labl);
12582 
12583   ins_cost(300);
12584   format %{ "j$cop,us  $labl" %}
12585   size(2);
12586   opcode(0x70);
12587   ins_encode(JccShort(cop, labl));
12588   ins_pipe(pipe_jcc);
12589   ins_pc_relative(1);
12590   ins_short_branch(1);
12591 %}
12592 
12593 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12594   match(If cop cmp);
12595   effect(USE labl);
12596 
12597   ins_cost(300);
12598   format %{ $$template
12599     if ($cop$$cmpcode == Assembler::notEqual) {
12600       $$emit$$"jp,u,s   $labl\n\t"
12601       $$emit$$"j$cop,u,s   $labl"
12602     } else {
12603       $$emit$$"jp,u,s   done\n\t"
12604       $$emit$$"j$cop,u,s  $labl\n\t"
12605       $$emit$$"done:"
12606     }
12607   %}
12608   size(4);
12609   opcode(0x70);
12610   ins_encode %{
12611     Label* l = $labl$$label;
12612     emit_cc(cbuf, $primary, Assembler::parity);
12613     int parity_disp = -1;
12614     if ($cop$$cmpcode == Assembler::notEqual) {
12615       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12616     } else if ($cop$$cmpcode == Assembler::equal) {
12617       parity_disp = 2;
12618     } else {
12619       ShouldNotReachHere();
12620     }
12621     emit_d8(cbuf, parity_disp);
12622     emit_cc(cbuf, $primary, $cop$$cmpcode);
12623     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12624     emit_d8(cbuf, disp);
12625     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12626     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12627   %}
12628   ins_pipe(pipe_jcc);
12629   ins_pc_relative(1);
12630   ins_short_branch(1);
12631 %}
12632 
12633 // ============================================================================
12634 // inlined locking and unlocking
12635 
12636 instruct cmpFastLock(rFlagsReg cr,
12637                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12638 %{
12639   match(Set cr (FastLock object box));
12640   effect(TEMP tmp, TEMP scr);
12641 
12642   ins_cost(300);
12643   format %{ "fastlock $object,$box,$tmp,$scr" %}
12644   ins_encode(Fast_Lock(object, box, tmp, scr));
12645   ins_pipe(pipe_slow);
12646   ins_pc_relative(1);
12647 %}
12648 
12649 instruct cmpFastUnlock(rFlagsReg cr,
12650                        rRegP object, rax_RegP box, rRegP tmp)
12651 %{
12652   match(Set cr (FastUnlock object box));
12653   effect(TEMP tmp);
12654 
12655   ins_cost(300);
12656   format %{ "fastunlock $object, $box, $tmp" %}
12657   ins_encode(Fast_Unlock(object, box, tmp));
12658   ins_pipe(pipe_slow);
12659   ins_pc_relative(1);
12660 %}
12661 
12662 
12663 // ============================================================================
12664 // Safepoint Instructions
12665 instruct safePoint_poll(rFlagsReg cr)
12666 %{
12667   match(SafePoint);
12668   effect(KILL cr);
12669 
12670   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12671             "# Safepoint: poll for GC" %}
12672   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12673   ins_cost(125);
12674   ins_encode(enc_safepoint_poll);
12675   ins_pipe(ialu_reg_mem);
12676 %}
12677 
12678 // ============================================================================
12679 // Procedure Call/Return Instructions
12680 // Call Java Static Instruction
12681 // Note: If this code changes, the corresponding ret_addr_offset() and
12682 //       compute_padding() functions will have to be adjusted.
12683 instruct CallStaticJavaDirect(method meth)
12684 %{
12685   match(CallStaticJava);
12686   effect(USE meth);
12687 
12688   ins_cost(300);
12689   format %{ "call,static " %}
12690   opcode(0xE8); /* E8 cd */
12691   ins_encode(Java_Static_Call(meth), call_epilog);
12692   ins_pipe(pipe_slow);
12693   ins_pc_relative(1);
12694   ins_alignment(4);
12695 %}
12696 
12697 // Call Java Dynamic Instruction
12698 // Note: If this code changes, the corresponding ret_addr_offset() and
12699 //       compute_padding() functions will have to be adjusted.
12700 instruct CallDynamicJavaDirect(method meth)
12701 %{
12702   match(CallDynamicJava);
12703   effect(USE meth);
12704 
12705   ins_cost(300);
12706   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12707             "call,dynamic " %}
12708   opcode(0xE8); /* E8 cd */
12709   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12710   ins_pipe(pipe_slow);
12711   ins_pc_relative(1);
12712   ins_alignment(4);
12713 %}
12714 
12715 // Call Runtime Instruction
12716 instruct CallRuntimeDirect(method meth)
12717 %{
12718   match(CallRuntime);
12719   effect(USE meth);
12720 
12721   ins_cost(300);
12722   format %{ "call,runtime " %}
12723   opcode(0xE8); /* E8 cd */
12724   ins_encode(Java_To_Runtime(meth));
12725   ins_pipe(pipe_slow);
12726   ins_pc_relative(1);
12727 %}
12728 
12729 // Call runtime without safepoint
12730 instruct CallLeafDirect(method meth)
12731 %{
12732   match(CallLeaf);
12733   effect(USE meth);
12734 
12735   ins_cost(300);
12736   format %{ "call_leaf,runtime " %}
12737   opcode(0xE8); /* E8 cd */
12738   ins_encode(Java_To_Runtime(meth));
12739   ins_pipe(pipe_slow);
12740   ins_pc_relative(1);
12741 %}
12742 
12743 // Call runtime without safepoint
12744 instruct CallLeafNoFPDirect(method meth)
12745 %{
12746   match(CallLeafNoFP);
12747   effect(USE meth);
12748 
12749   ins_cost(300);
12750   format %{ "call_leaf_nofp,runtime " %}
12751   opcode(0xE8); /* E8 cd */
12752   ins_encode(Java_To_Runtime(meth));
12753   ins_pipe(pipe_slow);
12754   ins_pc_relative(1);
12755 %}
12756 
12757 // Return Instruction
12758 // Remove the return address & jump to it.
12759 // Notice: We always emit a nop after a ret to make sure there is room
12760 // for safepoint patching
12761 instruct Ret()
12762 %{
12763   match(Return);
12764 
12765   format %{ "ret" %}
12766   opcode(0xC3);
12767   ins_encode(OpcP);
12768   ins_pipe(pipe_jmp);
12769 %}
12770 
12771 // Tail Call; Jump from runtime stub to Java code.
12772 // Also known as an 'interprocedural jump'.
12773 // Target of jump will eventually return to caller.
12774 // TailJump below removes the return address.
12775 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12776 %{
12777   match(TailCall jump_target method_oop);
12778 
12779   ins_cost(300);
12780   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12781   opcode(0xFF, 0x4); /* Opcode FF /4 */
12782   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12783   ins_pipe(pipe_jmp);
12784 %}
12785 
12786 // Tail Jump; remove the return address; jump to target.
12787 // TailCall above leaves the return address around.
12788 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12789 %{
12790   match(TailJump jump_target ex_oop);
12791 
12792   ins_cost(300);
12793   format %{ "popq    rdx\t# pop return address\n\t"
12794             "jmp     $jump_target" %}
12795   opcode(0xFF, 0x4); /* Opcode FF /4 */
12796   ins_encode(Opcode(0x5a), // popq rdx
12797              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12798   ins_pipe(pipe_jmp);
12799 %}
12800 
12801 // Create exception oop: created by stack-crawling runtime code.
12802 // Created exception is now available to this handler, and is setup
12803 // just prior to jumping to this handler.  No code emitted.
12804 instruct CreateException(rax_RegP ex_oop)
12805 %{
12806   match(Set ex_oop (CreateEx));
12807 
12808   size(0);
12809   // use the following format syntax
12810   format %{ "# exception oop is in rax; no code emitted" %}
12811   ins_encode();
12812   ins_pipe(empty);
12813 %}
12814 
12815 // Rethrow exception:
12816 // The exception oop will come in the first argument position.
12817 // Then JUMP (not call) to the rethrow stub code.
12818 instruct RethrowException()
12819 %{
12820   match(Rethrow);
12821 
12822   // use the following format syntax
12823   format %{ "jmp     rethrow_stub" %}
12824   ins_encode(enc_rethrow);
12825   ins_pipe(pipe_jmp);
12826 %}
12827 
12828 
12829 //----------PEEPHOLE RULES-----------------------------------------------------
12830 // These must follow all instruction definitions as they use the names
12831 // defined in the instructions definitions.
12832 //
12833 // peepmatch ( root_instr_name [preceding_instruction]* );
12834 //
12835 // peepconstraint %{
12836 // (instruction_number.operand_name relational_op instruction_number.operand_name
12837 //  [, ...] );
12838 // // instruction numbers are zero-based using left to right order in peepmatch
12839 //
12840 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12841 // // provide an instruction_number.operand_name for each operand that appears
12842 // // in the replacement instruction's match rule
12843 //
12844 // ---------VM FLAGS---------------------------------------------------------
12845 //
12846 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12847 //
12848 // Each peephole rule is given an identifying number starting with zero and
12849 // increasing by one in the order seen by the parser.  An individual peephole
12850 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12851 // on the command-line.
12852 //
12853 // ---------CURRENT LIMITATIONS----------------------------------------------
12854 //
12855 // Only match adjacent instructions in same basic block
12856 // Only equality constraints
12857 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12858 // Only one replacement instruction
12859 //
12860 // ---------EXAMPLE----------------------------------------------------------
12861 //
12862 // // pertinent parts of existing instructions in architecture description
12863 // instruct movI(rRegI dst, rRegI src)
12864 // %{
12865 //   match(Set dst (CopyI src));
12866 // %}
12867 //
12868 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12869 // %{
12870 //   match(Set dst (AddI dst src));
12871 //   effect(KILL cr);
12872 // %}
12873 //
12874 // // Change (inc mov) to lea
12875 // peephole %{
12876 //   // increment preceeded by register-register move
12877 //   peepmatch ( incI_rReg movI );
12878 //   // require that the destination register of the increment
12879 //   // match the destination register of the move
12880 //   peepconstraint ( 0.dst == 1.dst );
12881 //   // construct a replacement instruction that sets
12882 //   // the destination to ( move's source register + one )
12883 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12884 // %}
12885 //
12886 
12887 // Implementation no longer uses movX instructions since
12888 // machine-independent system no longer uses CopyX nodes.
12889 //
12890 // peephole
12891 // %{
12892 //   peepmatch (incI_rReg movI);
12893 //   peepconstraint (0.dst == 1.dst);
12894 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12895 // %}
12896 
12897 // peephole
12898 // %{
12899 //   peepmatch (decI_rReg movI);
12900 //   peepconstraint (0.dst == 1.dst);
12901 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12902 // %}
12903 
12904 // peephole
12905 // %{
12906 //   peepmatch (addI_rReg_imm movI);
12907 //   peepconstraint (0.dst == 1.dst);
12908 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12909 // %}
12910 
12911 // peephole
12912 // %{
12913 //   peepmatch (incL_rReg movL);
12914 //   peepconstraint (0.dst == 1.dst);
12915 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12916 // %}
12917 
12918 // peephole
12919 // %{
12920 //   peepmatch (decL_rReg movL);
12921 //   peepconstraint (0.dst == 1.dst);
12922 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12923 // %}
12924 
12925 // peephole
12926 // %{
12927 //   peepmatch (addL_rReg_imm movL);
12928 //   peepconstraint (0.dst == 1.dst);
12929 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12930 // %}
12931 
12932 // peephole
12933 // %{
12934 //   peepmatch (addP_rReg_imm movP);
12935 //   peepconstraint (0.dst == 1.dst);
12936 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12937 // %}
12938 
12939 // // Change load of spilled value to only a spill
12940 // instruct storeI(memory mem, rRegI src)
12941 // %{
12942 //   match(Set mem (StoreI mem src));
12943 // %}
12944 //
12945 // instruct loadI(rRegI dst, memory mem)
12946 // %{
12947 //   match(Set dst (LoadI mem));
12948 // %}
12949 //
12950 
12951 peephole
12952 %{
12953   peepmatch (loadI storeI);
12954   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12955   peepreplace (storeI(1.mem 1.mem 1.src));
12956 %}
12957 
12958 peephole
12959 %{
12960   peepmatch (loadL storeL);
12961   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12962   peepreplace (storeL(1.mem 1.mem 1.src));
12963 %}
12964 
12965 //----------SMARTSPILL RULES---------------------------------------------------
12966 // These must follow all instruction definitions as they use the names
12967 // defined in the instructions definitions.