1 //
   2 // Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 // !!!!! Special hack to get all types of calls to specify the byte offset
 555 //       from the start of the call to the point where the return address
 556 //       will point.
 557 int MachCallStaticJavaNode::ret_addr_offset()
 558 {
 559   return 5; // 5 bytes from start of call to where return address points
 560 }
 561 
 562 int MachCallDynamicJavaNode::ret_addr_offset()
 563 {
 564   return 15; // 15 bytes from start of call to where return address points
 565 }
 566 
 567 // In os_cpu .ad file
 568 // int MachCallRuntimeNode::ret_addr_offset()
 569 
 570 // Indicate if the safepoint node needs the polling page as an input.
 571 // Since amd64 does not have absolute addressing but RIP-relative
 572 // addressing and the polling page is within 2G, it doesn't.
 573 bool SafePointNode::needs_polling_address_input()
 574 {
 575   return false;
 576 }
 577 
 578 //
 579 // Compute padding required for nodes which need alignment
 580 //
 581 
 582 // The address of the call instruction needs to be 4-byte aligned to
 583 // ensure that it does not span a cache line so that it can be patched.
 584 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 585 {
 586   current_offset += 1; // skip call opcode byte
 587   return round_to(current_offset, alignment_required()) - current_offset;
 588 }
 589 
 590 // The address of the call instruction needs to be 4-byte aligned to
 591 // ensure that it does not span a cache line so that it can be patched.
 592 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 593 {
 594   current_offset += 11; // skip movq instruction + call opcode byte
 595   return round_to(current_offset, alignment_required()) - current_offset;
 596 }
 597 
 598 #ifndef PRODUCT
 599 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 600 {
 601   st->print("INT3");
 602 }
 603 #endif
 604 
 605 // EMIT_RM()
 606 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 607 {
 608   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 609   *(cbuf.code_end()) = c;
 610   cbuf.set_code_end(cbuf.code_end() + 1);
 611 }
 612 
 613 // EMIT_CC()
 614 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 615 {
 616   unsigned char c = (unsigned char) (f1 | f2);
 617   *(cbuf.code_end()) = c;
 618   cbuf.set_code_end(cbuf.code_end() + 1);
 619 }
 620 
 621 // EMIT_OPCODE()
 622 void emit_opcode(CodeBuffer &cbuf, int code)
 623 {
 624   *(cbuf.code_end()) = (unsigned char) code;
 625   cbuf.set_code_end(cbuf.code_end() + 1);
 626 }
 627 
 628 // EMIT_OPCODE() w/ relocation information
 629 void emit_opcode(CodeBuffer &cbuf,
 630                  int code, relocInfo::relocType reloc, int offset, int format)
 631 {
 632   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 633   emit_opcode(cbuf, code);
 634 }
 635 
 636 // EMIT_D8()
 637 void emit_d8(CodeBuffer &cbuf, int d8)
 638 {
 639   *(cbuf.code_end()) = (unsigned char) d8;
 640   cbuf.set_code_end(cbuf.code_end() + 1);
 641 }
 642 
 643 // EMIT_D16()
 644 void emit_d16(CodeBuffer &cbuf, int d16)
 645 {
 646   *((short *)(cbuf.code_end())) = d16;
 647   cbuf.set_code_end(cbuf.code_end() + 2);
 648 }
 649 
 650 // EMIT_D32()
 651 void emit_d32(CodeBuffer &cbuf, int d32)
 652 {
 653   *((int *)(cbuf.code_end())) = d32;
 654   cbuf.set_code_end(cbuf.code_end() + 4);
 655 }
 656 
 657 // EMIT_D64()
 658 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 659 {
 660   *((int64_t*) (cbuf.code_end())) = d64;
 661   cbuf.set_code_end(cbuf.code_end() + 8);
 662 }
 663 
 664 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 665 void emit_d32_reloc(CodeBuffer& cbuf,
 666                     int d32,
 667                     relocInfo::relocType reloc,
 668                     int format)
 669 {
 670   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 671   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 672 
 673   *((int*) (cbuf.code_end())) = d32;
 674   cbuf.set_code_end(cbuf.code_end() + 4);
 675 }
 676 
 677 // emit 32 bit value and construct relocation entry from RelocationHolder
 678 void emit_d32_reloc(CodeBuffer& cbuf,
 679                     int d32,
 680                     RelocationHolder const& rspec,
 681                     int format)
 682 {
 683 #ifdef ASSERT
 684   if (rspec.reloc()->type() == relocInfo::oop_type &&
 685       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 686     assert(oop((intptr_t)d32)->is_oop() && oop((intptr_t)d32)->is_perm(), "cannot embed non-perm oops in code");
 687   }
 688 #endif
 689   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 690 
 691   *((int* )(cbuf.code_end())) = d32;
 692   cbuf.set_code_end(cbuf.code_end() + 4);
 693 }
 694 
 695 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 696   address next_ip = cbuf.code_end() + 4;
 697   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 698                  external_word_Relocation::spec(addr),
 699                  RELOC_DISP32);
 700 }
 701 
 702 
 703 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 704 void emit_d64_reloc(CodeBuffer& cbuf,
 705                     int64_t d64,
 706                     relocInfo::relocType reloc,
 707                     int format)
 708 {
 709   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 710 
 711   *((int64_t*) (cbuf.code_end())) = d64;
 712   cbuf.set_code_end(cbuf.code_end() + 8);
 713 }
 714 
 715 // emit 64 bit value and construct relocation entry from RelocationHolder
 716 void emit_d64_reloc(CodeBuffer& cbuf,
 717                     int64_t d64,
 718                     RelocationHolder const& rspec,
 719                     int format)
 720 {
 721 #ifdef ASSERT
 722   if (rspec.reloc()->type() == relocInfo::oop_type &&
 723       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 724     assert(oop(d64)->is_oop() && oop(d64)->is_perm(),
 725            "cannot embed non-perm oops in code");
 726   }
 727 #endif
 728   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 729 
 730   *((int64_t*) (cbuf.code_end())) = d64;
 731   cbuf.set_code_end(cbuf.code_end() + 8);
 732 }
 733 
 734 // Access stack slot for load or store
 735 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 736 {
 737   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 738   if (-0x80 <= disp && disp < 0x80) {
 739     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 740     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 741     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 742   } else {
 743     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 744     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 745     emit_d32(cbuf, disp);     // Displacement // R/M byte
 746   }
 747 }
 748 
 749    // rRegI ereg, memory mem) %{    // emit_reg_mem
 750 void encode_RegMem(CodeBuffer &cbuf,
 751                    int reg,
 752                    int base, int index, int scale, int disp, bool disp_is_oop)
 753 {
 754   assert(!disp_is_oop, "cannot have disp");
 755   int regenc = reg & 7;
 756   int baseenc = base & 7;
 757   int indexenc = index & 7;
 758 
 759   // There is no index & no scale, use form without SIB byte
 760   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 761     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 762     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 763       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 764     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 765       // If 8-bit displacement, mode 0x1
 766       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 767       emit_d8(cbuf, disp);
 768     } else {
 769       // If 32-bit displacement
 770       if (base == -1) { // Special flag for absolute address
 771         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 772         if (disp_is_oop) {
 773           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 774         } else {
 775           emit_d32(cbuf, disp);
 776         }
 777       } else {
 778         // Normal base + offset
 779         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 780         if (disp_is_oop) {
 781           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 782         } else {
 783           emit_d32(cbuf, disp);
 784         }
 785       }
 786     }
 787   } else {
 788     // Else, encode with the SIB byte
 789     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 790     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 791       // If no displacement
 792       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 793       emit_rm(cbuf, scale, indexenc, baseenc);
 794     } else {
 795       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 796         // If 8-bit displacement, mode 0x1
 797         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 798         emit_rm(cbuf, scale, indexenc, baseenc);
 799         emit_d8(cbuf, disp);
 800       } else {
 801         // If 32-bit displacement
 802         if (base == 0x04 ) {
 803           emit_rm(cbuf, 0x2, regenc, 0x4);
 804           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 805         } else {
 806           emit_rm(cbuf, 0x2, regenc, 0x4);
 807           emit_rm(cbuf, scale, indexenc, baseenc); // *
 808         }
 809         if (disp_is_oop) {
 810           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 811         } else {
 812           emit_d32(cbuf, disp);
 813         }
 814       }
 815     }
 816   }
 817 }
 818 
 819 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 820 {
 821   if (dstenc != srcenc) {
 822     if (dstenc < 8) {
 823       if (srcenc >= 8) {
 824         emit_opcode(cbuf, Assembler::REX_B);
 825         srcenc -= 8;
 826       }
 827     } else {
 828       if (srcenc < 8) {
 829         emit_opcode(cbuf, Assembler::REX_R);
 830       } else {
 831         emit_opcode(cbuf, Assembler::REX_RB);
 832         srcenc -= 8;
 833       }
 834       dstenc -= 8;
 835     }
 836 
 837     emit_opcode(cbuf, 0x8B);
 838     emit_rm(cbuf, 0x3, dstenc, srcenc);
 839   }
 840 }
 841 
 842 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 843   if( dst_encoding == src_encoding ) {
 844     // reg-reg copy, use an empty encoding
 845   } else {
 846     MacroAssembler _masm(&cbuf);
 847 
 848     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 849   }
 850 }
 851 
 852 
 853 //=============================================================================
 854 #ifndef PRODUCT
 855 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 856 {
 857   Compile* C = ra_->C;
 858 
 859   int framesize = C->frame_slots() << LogBytesPerInt;
 860   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 861   // Remove wordSize for return adr already pushed
 862   // and another for the RBP we are going to save
 863   framesize -= 2*wordSize;
 864   bool need_nop = true;
 865 
 866   // Calls to C2R adapters often do not accept exceptional returns.
 867   // We require that their callers must bang for them.  But be
 868   // careful, because some VM calls (such as call site linkage) can
 869   // use several kilobytes of stack.  But the stack safety zone should
 870   // account for that.  See bugs 4446381, 4468289, 4497237.
 871   if (C->need_stack_bang(framesize)) {
 872     st->print_cr("# stack bang"); st->print("\t");
 873     need_nop = false;
 874   }
 875   st->print_cr("pushq   rbp"); st->print("\t");
 876 
 877   if (VerifyStackAtCalls) {
 878     // Majik cookie to verify stack depth
 879     st->print_cr("pushq   0xffffffffbadb100d"
 880                   "\t# Majik cookie for stack depth check");
 881     st->print("\t");
 882     framesize -= wordSize; // Remove 2 for cookie
 883     need_nop = false;
 884   }
 885 
 886   if (framesize) {
 887     st->print("subq    rsp, #%d\t# Create frame", framesize);
 888     if (framesize < 0x80 && need_nop) {
 889       st->print("\n\tnop\t# nop for patch_verified_entry");
 890     }
 891   }
 892 }
 893 #endif
 894 
 895 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 896 {
 897   Compile* C = ra_->C;
 898 
 899   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 900   // NativeJump::patch_verified_entry will be able to patch out the entry
 901   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 902   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 903   // 6 bytes. So if we don't do the fldcw or the push then we must
 904   // use the 6 byte frame allocation even if we have no frame. :-(
 905   // If method sets FPU control word do it now
 906 
 907   int framesize = C->frame_slots() << LogBytesPerInt;
 908   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 909   // Remove wordSize for return adr already pushed
 910   // and another for the RBP we are going to save
 911   framesize -= 2*wordSize;
 912   bool need_nop = true;
 913 
 914   // Calls to C2R adapters often do not accept exceptional returns.
 915   // We require that their callers must bang for them.  But be
 916   // careful, because some VM calls (such as call site linkage) can
 917   // use several kilobytes of stack.  But the stack safety zone should
 918   // account for that.  See bugs 4446381, 4468289, 4497237.
 919   if (C->need_stack_bang(framesize)) {
 920     MacroAssembler masm(&cbuf);
 921     masm.generate_stack_overflow_check(framesize);
 922     need_nop = false;
 923   }
 924 
 925   // We always push rbp so that on return to interpreter rbp will be
 926   // restored correctly and we can correct the stack.
 927   emit_opcode(cbuf, 0x50 | RBP_enc);
 928 
 929   if (VerifyStackAtCalls) {
 930     // Majik cookie to verify stack depth
 931     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 932     emit_d32(cbuf, 0xbadb100d);
 933     framesize -= wordSize; // Remove 2 for cookie
 934     need_nop = false;
 935   }
 936 
 937   if (framesize) {
 938     emit_opcode(cbuf, Assembler::REX_W);
 939     if (framesize < 0x80) {
 940       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 941       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 942       emit_d8(cbuf, framesize);
 943       if (need_nop) {
 944         emit_opcode(cbuf, 0x90); // nop
 945       }
 946     } else {
 947       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 948       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 949       emit_d32(cbuf, framesize);
 950     }
 951   }
 952 
 953   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 954 
 955 #ifdef ASSERT
 956   if (VerifyStackAtCalls) {
 957     Label L;
 958     MacroAssembler masm(&cbuf);
 959     masm.push(rax);
 960     masm.mov(rax, rsp);
 961     masm.andptr(rax, StackAlignmentInBytes-1);
 962     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 963     masm.pop(rax);
 964     masm.jcc(Assembler::equal, L);
 965     masm.stop("Stack is not properly aligned!");
 966     masm.bind(L);
 967   }
 968 #endif
 969 }
 970 
 971 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 972 {
 973   return MachNode::size(ra_); // too many variables; just compute it
 974                               // the hard way
 975 }
 976 
 977 int MachPrologNode::reloc() const
 978 {
 979   return 0; // a large enough number
 980 }
 981 
 982 //=============================================================================
 983 #ifndef PRODUCT
 984 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 985 {
 986   Compile* C = ra_->C;
 987   int framesize = C->frame_slots() << LogBytesPerInt;
 988   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 989   // Remove word for return adr already pushed
 990   // and RBP
 991   framesize -= 2*wordSize;
 992 
 993   if (framesize) {
 994     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
 995     st->print("\t");
 996   }
 997 
 998   st->print_cr("popq\trbp");
 999   if (do_polling() && C->is_method_compilation()) {
1000     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1001                   "# Safepoint: poll for GC");
1002     st->print("\t");
1003   }
1004 }
1005 #endif
1006 
1007 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1008 {
1009   Compile* C = ra_->C;
1010   int framesize = C->frame_slots() << LogBytesPerInt;
1011   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1012   // Remove word for return adr already pushed
1013   // and RBP
1014   framesize -= 2*wordSize;
1015 
1016   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1017 
1018   if (framesize) {
1019     emit_opcode(cbuf, Assembler::REX_W);
1020     if (framesize < 0x80) {
1021       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1022       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1023       emit_d8(cbuf, framesize);
1024     } else {
1025       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1026       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1027       emit_d32(cbuf, framesize);
1028     }
1029   }
1030 
1031   // popq rbp
1032   emit_opcode(cbuf, 0x58 | RBP_enc);
1033 
1034   if (do_polling() && C->is_method_compilation()) {
1035     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1036     // XXX reg_mem doesn't support RIP-relative addressing yet
1037     cbuf.set_inst_mark();
1038     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1039     emit_opcode(cbuf, 0x85); // testl
1040     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1041     // cbuf.inst_mark() is beginning of instruction
1042     emit_d32_reloc(cbuf, os::get_polling_page());
1043 //                    relocInfo::poll_return_type,
1044   }
1045 }
1046 
1047 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1048 {
1049   Compile* C = ra_->C;
1050   int framesize = C->frame_slots() << LogBytesPerInt;
1051   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1052   // Remove word for return adr already pushed
1053   // and RBP
1054   framesize -= 2*wordSize;
1055 
1056   uint size = 0;
1057 
1058   if (do_polling() && C->is_method_compilation()) {
1059     size += 6;
1060   }
1061 
1062   // count popq rbp
1063   size++;
1064 
1065   if (framesize) {
1066     if (framesize < 0x80) {
1067       size += 4;
1068     } else if (framesize) {
1069       size += 7;
1070     }
1071   }
1072 
1073   return size;
1074 }
1075 
1076 int MachEpilogNode::reloc() const
1077 {
1078   return 2; // a large enough number
1079 }
1080 
1081 const Pipeline* MachEpilogNode::pipeline() const
1082 {
1083   return MachNode::pipeline_class();
1084 }
1085 
1086 int MachEpilogNode::safepoint_offset() const
1087 {
1088   return 0;
1089 }
1090 
1091 //=============================================================================
1092 
1093 enum RC {
1094   rc_bad,
1095   rc_int,
1096   rc_float,
1097   rc_stack
1098 };
1099 
1100 static enum RC rc_class(OptoReg::Name reg)
1101 {
1102   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1103 
1104   if (OptoReg::is_stack(reg)) return rc_stack;
1105 
1106   VMReg r = OptoReg::as_VMReg(reg);
1107 
1108   if (r->is_Register()) return rc_int;
1109 
1110   assert(r->is_XMMRegister(), "must be");
1111   return rc_float;
1112 }
1113 
1114 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1115                                        PhaseRegAlloc* ra_,
1116                                        bool do_size,
1117                                        outputStream* st) const
1118 {
1119 
1120   // Get registers to move
1121   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1122   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1123   OptoReg::Name dst_second = ra_->get_reg_second(this);
1124   OptoReg::Name dst_first = ra_->get_reg_first(this);
1125 
1126   enum RC src_second_rc = rc_class(src_second);
1127   enum RC src_first_rc = rc_class(src_first);
1128   enum RC dst_second_rc = rc_class(dst_second);
1129   enum RC dst_first_rc = rc_class(dst_first);
1130 
1131   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1132          "must move at least 1 register" );
1133 
1134   if (src_first == dst_first && src_second == dst_second) {
1135     // Self copy, no move
1136     return 0;
1137   } else if (src_first_rc == rc_stack) {
1138     // mem ->
1139     if (dst_first_rc == rc_stack) {
1140       // mem -> mem
1141       assert(src_second != dst_first, "overlap");
1142       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1143           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1144         // 64-bit
1145         int src_offset = ra_->reg2offset(src_first);
1146         int dst_offset = ra_->reg2offset(dst_first);
1147         if (cbuf) {
1148           emit_opcode(*cbuf, 0xFF);
1149           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1150 
1151           emit_opcode(*cbuf, 0x8F);
1152           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1153 
1154 #ifndef PRODUCT
1155         } else if (!do_size) {
1156           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1157                      "popq    [rsp + #%d]",
1158                      src_offset,
1159                      dst_offset);
1160 #endif
1161         }
1162         return
1163           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1164           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1165       } else {
1166         // 32-bit
1167         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1168         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1169         // No pushl/popl, so:
1170         int src_offset = ra_->reg2offset(src_first);
1171         int dst_offset = ra_->reg2offset(dst_first);
1172         if (cbuf) {
1173           emit_opcode(*cbuf, Assembler::REX_W);
1174           emit_opcode(*cbuf, 0x89);
1175           emit_opcode(*cbuf, 0x44);
1176           emit_opcode(*cbuf, 0x24);
1177           emit_opcode(*cbuf, 0xF8);
1178 
1179           emit_opcode(*cbuf, 0x8B);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, src_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, 0x89);
1186           encode_RegMem(*cbuf,
1187                         RAX_enc,
1188                         RSP_enc, 0x4, 0, dst_offset,
1189                         false);
1190 
1191           emit_opcode(*cbuf, Assembler::REX_W);
1192           emit_opcode(*cbuf, 0x8B);
1193           emit_opcode(*cbuf, 0x44);
1194           emit_opcode(*cbuf, 0x24);
1195           emit_opcode(*cbuf, 0xF8);
1196 
1197 #ifndef PRODUCT
1198         } else if (!do_size) {
1199           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1200                      "movl    rax, [rsp + #%d]\n\t"
1201                      "movl    [rsp + #%d], rax\n\t"
1202                      "movq    rax, [rsp - #8]",
1203                      src_offset,
1204                      dst_offset);
1205 #endif
1206         }
1207         return
1208           5 + // movq
1209           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1210           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1211           5; // movq
1212       }
1213     } else if (dst_first_rc == rc_int) {
1214       // mem -> gpr
1215       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1216           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1217         // 64-bit
1218         int offset = ra_->reg2offset(src_first);
1219         if (cbuf) {
1220           if (Matcher::_regEncode[dst_first] < 8) {
1221             emit_opcode(*cbuf, Assembler::REX_W);
1222           } else {
1223             emit_opcode(*cbuf, Assembler::REX_WR);
1224           }
1225           emit_opcode(*cbuf, 0x8B);
1226           encode_RegMem(*cbuf,
1227                         Matcher::_regEncode[dst_first],
1228                         RSP_enc, 0x4, 0, offset,
1229                         false);
1230 #ifndef PRODUCT
1231         } else if (!do_size) {
1232           st->print("movq    %s, [rsp + #%d]\t# spill",
1233                      Matcher::regName[dst_first],
1234                      offset);
1235 #endif
1236         }
1237         return
1238           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1239       } else {
1240         // 32-bit
1241         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1242         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1243         int offset = ra_->reg2offset(src_first);
1244         if (cbuf) {
1245           if (Matcher::_regEncode[dst_first] >= 8) {
1246             emit_opcode(*cbuf, Assembler::REX_R);
1247           }
1248           emit_opcode(*cbuf, 0x8B);
1249           encode_RegMem(*cbuf,
1250                         Matcher::_regEncode[dst_first],
1251                         RSP_enc, 0x4, 0, offset,
1252                         false);
1253 #ifndef PRODUCT
1254         } else if (!do_size) {
1255           st->print("movl    %s, [rsp + #%d]\t# spill",
1256                      Matcher::regName[dst_first],
1257                      offset);
1258 #endif
1259         }
1260         return
1261           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1262           ((Matcher::_regEncode[dst_first] < 8)
1263            ? 3
1264            : 4); // REX
1265       }
1266     } else if (dst_first_rc == rc_float) {
1267       // mem-> xmm
1268       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1269           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1270         // 64-bit
1271         int offset = ra_->reg2offset(src_first);
1272         if (cbuf) {
1273           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1274           if (Matcher::_regEncode[dst_first] >= 8) {
1275             emit_opcode(*cbuf, Assembler::REX_R);
1276           }
1277           emit_opcode(*cbuf, 0x0F);
1278           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1279           encode_RegMem(*cbuf,
1280                         Matcher::_regEncode[dst_first],
1281                         RSP_enc, 0x4, 0, offset,
1282                         false);
1283 #ifndef PRODUCT
1284         } else if (!do_size) {
1285           st->print("%s  %s, [rsp + #%d]\t# spill",
1286                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1287                      Matcher::regName[dst_first],
1288                      offset);
1289 #endif
1290         }
1291         return
1292           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1293           ((Matcher::_regEncode[dst_first] < 8)
1294            ? 5
1295            : 6); // REX
1296       } else {
1297         // 32-bit
1298         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1299         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1300         int offset = ra_->reg2offset(src_first);
1301         if (cbuf) {
1302           emit_opcode(*cbuf, 0xF3);
1303           if (Matcher::_regEncode[dst_first] >= 8) {
1304             emit_opcode(*cbuf, Assembler::REX_R);
1305           }
1306           emit_opcode(*cbuf, 0x0F);
1307           emit_opcode(*cbuf, 0x10);
1308           encode_RegMem(*cbuf,
1309                         Matcher::_regEncode[dst_first],
1310                         RSP_enc, 0x4, 0, offset,
1311                         false);
1312 #ifndef PRODUCT
1313         } else if (!do_size) {
1314           st->print("movss   %s, [rsp + #%d]\t# spill",
1315                      Matcher::regName[dst_first],
1316                      offset);
1317 #endif
1318         }
1319         return
1320           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1321           ((Matcher::_regEncode[dst_first] < 8)
1322            ? 5
1323            : 6); // REX
1324       }
1325     }
1326   } else if (src_first_rc == rc_int) {
1327     // gpr ->
1328     if (dst_first_rc == rc_stack) {
1329       // gpr -> mem
1330       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1331           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1332         // 64-bit
1333         int offset = ra_->reg2offset(dst_first);
1334         if (cbuf) {
1335           if (Matcher::_regEncode[src_first] < 8) {
1336             emit_opcode(*cbuf, Assembler::REX_W);
1337           } else {
1338             emit_opcode(*cbuf, Assembler::REX_WR);
1339           }
1340           emit_opcode(*cbuf, 0x89);
1341           encode_RegMem(*cbuf,
1342                         Matcher::_regEncode[src_first],
1343                         RSP_enc, 0x4, 0, offset,
1344                         false);
1345 #ifndef PRODUCT
1346         } else if (!do_size) {
1347           st->print("movq    [rsp + #%d], %s\t# spill",
1348                      offset,
1349                      Matcher::regName[src_first]);
1350 #endif
1351         }
1352         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1353       } else {
1354         // 32-bit
1355         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1356         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1357         int offset = ra_->reg2offset(dst_first);
1358         if (cbuf) {
1359           if (Matcher::_regEncode[src_first] >= 8) {
1360             emit_opcode(*cbuf, Assembler::REX_R);
1361           }
1362           emit_opcode(*cbuf, 0x89);
1363           encode_RegMem(*cbuf,
1364                         Matcher::_regEncode[src_first],
1365                         RSP_enc, 0x4, 0, offset,
1366                         false);
1367 #ifndef PRODUCT
1368         } else if (!do_size) {
1369           st->print("movl    [rsp + #%d], %s\t# spill",
1370                      offset,
1371                      Matcher::regName[src_first]);
1372 #endif
1373         }
1374         return
1375           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1376           ((Matcher::_regEncode[src_first] < 8)
1377            ? 3
1378            : 4); // REX
1379       }
1380     } else if (dst_first_rc == rc_int) {
1381       // gpr -> gpr
1382       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1383           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1384         // 64-bit
1385         if (cbuf) {
1386           if (Matcher::_regEncode[dst_first] < 8) {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_W);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WB);
1391             }
1392           } else {
1393             if (Matcher::_regEncode[src_first] < 8) {
1394               emit_opcode(*cbuf, Assembler::REX_WR);
1395             } else {
1396               emit_opcode(*cbuf, Assembler::REX_WRB);
1397             }
1398           }
1399           emit_opcode(*cbuf, 0x8B);
1400           emit_rm(*cbuf, 0x3,
1401                   Matcher::_regEncode[dst_first] & 7,
1402                   Matcher::_regEncode[src_first] & 7);
1403 #ifndef PRODUCT
1404         } else if (!do_size) {
1405           st->print("movq    %s, %s\t# spill",
1406                      Matcher::regName[dst_first],
1407                      Matcher::regName[src_first]);
1408 #endif
1409         }
1410         return 3; // REX
1411       } else {
1412         // 32-bit
1413         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1414         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1415         if (cbuf) {
1416           if (Matcher::_regEncode[dst_first] < 8) {
1417             if (Matcher::_regEncode[src_first] >= 8) {
1418               emit_opcode(*cbuf, Assembler::REX_B);
1419             }
1420           } else {
1421             if (Matcher::_regEncode[src_first] < 8) {
1422               emit_opcode(*cbuf, Assembler::REX_R);
1423             } else {
1424               emit_opcode(*cbuf, Assembler::REX_RB);
1425             }
1426           }
1427           emit_opcode(*cbuf, 0x8B);
1428           emit_rm(*cbuf, 0x3,
1429                   Matcher::_regEncode[dst_first] & 7,
1430                   Matcher::_regEncode[src_first] & 7);
1431 #ifndef PRODUCT
1432         } else if (!do_size) {
1433           st->print("movl    %s, %s\t# spill",
1434                      Matcher::regName[dst_first],
1435                      Matcher::regName[src_first]);
1436 #endif
1437         }
1438         return
1439           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1440           ? 2
1441           : 3; // REX
1442       }
1443     } else if (dst_first_rc == rc_float) {
1444       // gpr -> xmm
1445       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1446           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1447         // 64-bit
1448         if (cbuf) {
1449           emit_opcode(*cbuf, 0x66);
1450           if (Matcher::_regEncode[dst_first] < 8) {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_W);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WB);
1455             }
1456           } else {
1457             if (Matcher::_regEncode[src_first] < 8) {
1458               emit_opcode(*cbuf, Assembler::REX_WR);
1459             } else {
1460               emit_opcode(*cbuf, Assembler::REX_WRB);
1461             }
1462           }
1463           emit_opcode(*cbuf, 0x0F);
1464           emit_opcode(*cbuf, 0x6E);
1465           emit_rm(*cbuf, 0x3,
1466                   Matcher::_regEncode[dst_first] & 7,
1467                   Matcher::_regEncode[src_first] & 7);
1468 #ifndef PRODUCT
1469         } else if (!do_size) {
1470           st->print("movdq   %s, %s\t# spill",
1471                      Matcher::regName[dst_first],
1472                      Matcher::regName[src_first]);
1473 #endif
1474         }
1475         return 5; // REX
1476       } else {
1477         // 32-bit
1478         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1479         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1480         if (cbuf) {
1481           emit_opcode(*cbuf, 0x66);
1482           if (Matcher::_regEncode[dst_first] < 8) {
1483             if (Matcher::_regEncode[src_first] >= 8) {
1484               emit_opcode(*cbuf, Assembler::REX_B);
1485             }
1486           } else {
1487             if (Matcher::_regEncode[src_first] < 8) {
1488               emit_opcode(*cbuf, Assembler::REX_R);
1489             } else {
1490               emit_opcode(*cbuf, Assembler::REX_RB);
1491             }
1492           }
1493           emit_opcode(*cbuf, 0x0F);
1494           emit_opcode(*cbuf, 0x6E);
1495           emit_rm(*cbuf, 0x3,
1496                   Matcher::_regEncode[dst_first] & 7,
1497                   Matcher::_regEncode[src_first] & 7);
1498 #ifndef PRODUCT
1499         } else if (!do_size) {
1500           st->print("movdl   %s, %s\t# spill",
1501                      Matcher::regName[dst_first],
1502                      Matcher::regName[src_first]);
1503 #endif
1504         }
1505         return
1506           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1507           ? 4
1508           : 5; // REX
1509       }
1510     }
1511   } else if (src_first_rc == rc_float) {
1512     // xmm ->
1513     if (dst_first_rc == rc_stack) {
1514       // xmm -> mem
1515       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1516           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1517         // 64-bit
1518         int offset = ra_->reg2offset(dst_first);
1519         if (cbuf) {
1520           emit_opcode(*cbuf, 0xF2);
1521           if (Matcher::_regEncode[src_first] >= 8) {
1522               emit_opcode(*cbuf, Assembler::REX_R);
1523           }
1524           emit_opcode(*cbuf, 0x0F);
1525           emit_opcode(*cbuf, 0x11);
1526           encode_RegMem(*cbuf,
1527                         Matcher::_regEncode[src_first],
1528                         RSP_enc, 0x4, 0, offset,
1529                         false);
1530 #ifndef PRODUCT
1531         } else if (!do_size) {
1532           st->print("movsd   [rsp + #%d], %s\t# spill",
1533                      offset,
1534                      Matcher::regName[src_first]);
1535 #endif
1536         }
1537         return
1538           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1539           ((Matcher::_regEncode[src_first] < 8)
1540            ? 5
1541            : 6); // REX
1542       } else {
1543         // 32-bit
1544         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1545         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1546         int offset = ra_->reg2offset(dst_first);
1547         if (cbuf) {
1548           emit_opcode(*cbuf, 0xF3);
1549           if (Matcher::_regEncode[src_first] >= 8) {
1550               emit_opcode(*cbuf, Assembler::REX_R);
1551           }
1552           emit_opcode(*cbuf, 0x0F);
1553           emit_opcode(*cbuf, 0x11);
1554           encode_RegMem(*cbuf,
1555                         Matcher::_regEncode[src_first],
1556                         RSP_enc, 0x4, 0, offset,
1557                         false);
1558 #ifndef PRODUCT
1559         } else if (!do_size) {
1560           st->print("movss   [rsp + #%d], %s\t# spill",
1561                      offset,
1562                      Matcher::regName[src_first]);
1563 #endif
1564         }
1565         return
1566           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1567           ((Matcher::_regEncode[src_first] < 8)
1568            ? 5
1569            : 6); // REX
1570       }
1571     } else if (dst_first_rc == rc_int) {
1572       // xmm -> gpr
1573       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1574           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1575         // 64-bit
1576         if (cbuf) {
1577           emit_opcode(*cbuf, 0x66);
1578           if (Matcher::_regEncode[dst_first] < 8) {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_W);
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1583             }
1584           } else {
1585             if (Matcher::_regEncode[src_first] < 8) {
1586               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1587             } else {
1588               emit_opcode(*cbuf, Assembler::REX_WRB);
1589             }
1590           }
1591           emit_opcode(*cbuf, 0x0F);
1592           emit_opcode(*cbuf, 0x7E);
1593           emit_rm(*cbuf, 0x3,
1594                   Matcher::_regEncode[dst_first] & 7,
1595                   Matcher::_regEncode[src_first] & 7);
1596 #ifndef PRODUCT
1597         } else if (!do_size) {
1598           st->print("movdq   %s, %s\t# spill",
1599                      Matcher::regName[dst_first],
1600                      Matcher::regName[src_first]);
1601 #endif
1602         }
1603         return 5; // REX
1604       } else {
1605         // 32-bit
1606         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1607         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1608         if (cbuf) {
1609           emit_opcode(*cbuf, 0x66);
1610           if (Matcher::_regEncode[dst_first] < 8) {
1611             if (Matcher::_regEncode[src_first] >= 8) {
1612               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1613             }
1614           } else {
1615             if (Matcher::_regEncode[src_first] < 8) {
1616               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1617             } else {
1618               emit_opcode(*cbuf, Assembler::REX_RB);
1619             }
1620           }
1621           emit_opcode(*cbuf, 0x0F);
1622           emit_opcode(*cbuf, 0x7E);
1623           emit_rm(*cbuf, 0x3,
1624                   Matcher::_regEncode[dst_first] & 7,
1625                   Matcher::_regEncode[src_first] & 7);
1626 #ifndef PRODUCT
1627         } else if (!do_size) {
1628           st->print("movdl   %s, %s\t# spill",
1629                      Matcher::regName[dst_first],
1630                      Matcher::regName[src_first]);
1631 #endif
1632         }
1633         return
1634           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1635           ? 4
1636           : 5; // REX
1637       }
1638     } else if (dst_first_rc == rc_float) {
1639       // xmm -> xmm
1640       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1641           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1642         // 64-bit
1643         if (cbuf) {
1644           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1645           if (Matcher::_regEncode[dst_first] < 8) {
1646             if (Matcher::_regEncode[src_first] >= 8) {
1647               emit_opcode(*cbuf, Assembler::REX_B);
1648             }
1649           } else {
1650             if (Matcher::_regEncode[src_first] < 8) {
1651               emit_opcode(*cbuf, Assembler::REX_R);
1652             } else {
1653               emit_opcode(*cbuf, Assembler::REX_RB);
1654             }
1655           }
1656           emit_opcode(*cbuf, 0x0F);
1657           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1658           emit_rm(*cbuf, 0x3,
1659                   Matcher::_regEncode[dst_first] & 7,
1660                   Matcher::_regEncode[src_first] & 7);
1661 #ifndef PRODUCT
1662         } else if (!do_size) {
1663           st->print("%s  %s, %s\t# spill",
1664                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1665                      Matcher::regName[dst_first],
1666                      Matcher::regName[src_first]);
1667 #endif
1668         }
1669         return
1670           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1671           ? 4
1672           : 5; // REX
1673       } else {
1674         // 32-bit
1675         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1676         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1677         if (cbuf) {
1678           if (!UseXmmRegToRegMoveAll)
1679             emit_opcode(*cbuf, 0xF3);
1680           if (Matcher::_regEncode[dst_first] < 8) {
1681             if (Matcher::_regEncode[src_first] >= 8) {
1682               emit_opcode(*cbuf, Assembler::REX_B);
1683             }
1684           } else {
1685             if (Matcher::_regEncode[src_first] < 8) {
1686               emit_opcode(*cbuf, Assembler::REX_R);
1687             } else {
1688               emit_opcode(*cbuf, Assembler::REX_RB);
1689             }
1690           }
1691           emit_opcode(*cbuf, 0x0F);
1692           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1693           emit_rm(*cbuf, 0x3,
1694                   Matcher::_regEncode[dst_first] & 7,
1695                   Matcher::_regEncode[src_first] & 7);
1696 #ifndef PRODUCT
1697         } else if (!do_size) {
1698           st->print("%s  %s, %s\t# spill",
1699                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1700                      Matcher::regName[dst_first],
1701                      Matcher::regName[src_first]);
1702 #endif
1703         }
1704         return
1705           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1706           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1707           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1708       }
1709     }
1710   }
1711 
1712   assert(0," foo ");
1713   Unimplemented();
1714 
1715   return 0;
1716 }
1717 
1718 #ifndef PRODUCT
1719 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1720 {
1721   implementation(NULL, ra_, false, st);
1722 }
1723 #endif
1724 
1725 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1726 {
1727   implementation(&cbuf, ra_, false, NULL);
1728 }
1729 
1730 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1731 {
1732   return implementation(NULL, ra_, true, NULL);
1733 }
1734 
1735 //=============================================================================
1736 #ifndef PRODUCT
1737 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1738 {
1739   st->print("nop \t# %d bytes pad for loops and calls", _count);
1740 }
1741 #endif
1742 
1743 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1744 {
1745   MacroAssembler _masm(&cbuf);
1746   __ nop(_count);
1747 }
1748 
1749 uint MachNopNode::size(PhaseRegAlloc*) const
1750 {
1751   return _count;
1752 }
1753 
1754 
1755 //=============================================================================
1756 #ifndef PRODUCT
1757 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1758 {
1759   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1760   int reg = ra_->get_reg_first(this);
1761   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1762             Matcher::regName[reg], offset);
1763 }
1764 #endif
1765 
1766 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1767 {
1768   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1769   int reg = ra_->get_encode(this);
1770   if (offset >= 0x80) {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d32(cbuf, offset);
1776   } else {
1777     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1780     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781     emit_d8(cbuf, offset);
1782   }
1783 }
1784 
1785 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1786 {
1787   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1788   return (offset < 0x80) ? 5 : 8; // REX
1789 }
1790 
1791 //=============================================================================
1792 
1793 // emit call stub, compiled java to interpreter
1794 void emit_java_to_interp(CodeBuffer& cbuf)
1795 {
1796   // Stub is fixed up when the corresponding call is converted from
1797   // calling compiled code to calling interpreted code.
1798   // movq rbx, 0
1799   // jmp -5 # to self
1800 
1801   address mark = cbuf.inst_mark();  // get mark within main instrs section
1802 
1803   // Note that the code buffer's inst_mark is always relative to insts.
1804   // That's why we must use the macroassembler to generate a stub.
1805   MacroAssembler _masm(&cbuf);
1806 
1807   address base =
1808   __ start_a_stub(Compile::MAX_stubs_size);
1809   if (base == NULL)  return;  // CodeBuffer::expand failed
1810   // static stub relocation stores the instruction address of the call
1811   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1812   // static stub relocation also tags the methodOop in the code-stream.
1813   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1814   // This is recognized as unresolved by relocs/nativeinst/ic code
1815   __ jump(RuntimeAddress(__ pc()));
1816 
1817   // Update current stubs pointer and restore code_end.
1818   __ end_a_stub();
1819 }
1820 
1821 // size of call stub, compiled java to interpretor
1822 uint size_java_to_interp()
1823 {
1824   return 15;  // movq (1+1+8); jmp (1+4)
1825 }
1826 
1827 // relocation entries for call stub, compiled java to interpretor
1828 uint reloc_java_to_interp()
1829 {
1830   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1831 }
1832 
1833 //=============================================================================
1834 #ifndef PRODUCT
1835 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1836 {
1837   if (UseCompressedOops) {
1838     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1839     if (Universe::narrow_oop_shift() != 0) {
1840       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1841     }
1842     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1843   } else {
1844     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1845                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1846   }
1847   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1848   st->print_cr("\tnop");
1849   if (!OptoBreakpoint) {
1850     st->print_cr("\tnop");
1851   }
1852 }
1853 #endif
1854 
1855 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1856 {
1857   MacroAssembler masm(&cbuf);
1858 #ifdef ASSERT
1859   uint code_size = cbuf.code_size();
1860 #endif
1861   if (UseCompressedOops) {
1862     masm.load_klass(rscratch1, j_rarg0);
1863     masm.cmpptr(rax, rscratch1);
1864   } else {
1865     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1866   }
1867 
1868   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1869 
1870   /* WARNING these NOPs are critical so that verified entry point is properly
1871      aligned for patching by NativeJump::patch_verified_entry() */
1872   int nops_cnt = 1;
1873   if (!OptoBreakpoint) {
1874     // Leave space for int3
1875      nops_cnt += 1;
1876   }
1877   if (UseCompressedOops) {
1878     // ??? divisible by 4 is aligned?
1879     nops_cnt += 1;
1880   }
1881   masm.nop(nops_cnt);
1882 
1883   assert(cbuf.code_size() - code_size == size(ra_),
1884          "checking code size of inline cache node");
1885 }
1886 
1887 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1888 {
1889   if (UseCompressedOops) {
1890     if (Universe::narrow_oop_shift() == 0) {
1891       return OptoBreakpoint ? 15 : 16;
1892     } else {
1893       return OptoBreakpoint ? 19 : 20;
1894     }
1895   } else {
1896     return OptoBreakpoint ? 11 : 12;
1897   }
1898 }
1899 
1900 
1901 //=============================================================================
1902 uint size_exception_handler()
1903 {
1904   // NativeCall instruction size is the same as NativeJump.
1905   // Note that this value is also credited (in output.cpp) to
1906   // the size of the code section.
1907   return NativeJump::instruction_size;
1908 }
1909 
1910 // Emit exception handler code.
1911 int emit_exception_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's inst_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_exception_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1922   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1923   __ end_a_stub();
1924   return offset;
1925 }
1926 
1927 uint size_deopt_handler()
1928 {
1929   // three 5 byte instructions
1930   return 15;
1931 }
1932 
1933 // Emit deopt handler code.
1934 int emit_deopt_handler(CodeBuffer& cbuf)
1935 {
1936 
1937   // Note that the code buffer's inst_mark is always relative to insts.
1938   // That's why we must use the macroassembler to generate a handler.
1939   MacroAssembler _masm(&cbuf);
1940   address base =
1941   __ start_a_stub(size_deopt_handler());
1942   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1943   int offset = __ offset();
1944   address the_pc = (address) __ pc();
1945   Label next;
1946   // push a "the_pc" on the stack without destroying any registers
1947   // as they all may be live.
1948 
1949   // push address of "next"
1950   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1951   __ bind(next);
1952   // adjust it so it matches "the_pc"
1953   __ subptr(Address(rsp, 0), __ offset() - offset);
1954   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1955   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1956   __ end_a_stub();
1957   return offset;
1958 }
1959 
1960 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1961   int mark = cbuf.insts()->mark_off();
1962   MacroAssembler _masm(&cbuf);
1963   address double_address = __ double_constant(x);
1964   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1965   emit_d32_reloc(cbuf,
1966                  (int) (double_address - cbuf.code_end() - 4),
1967                  internal_word_Relocation::spec(double_address),
1968                  RELOC_DISP32);
1969 }
1970 
1971 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1972   int mark = cbuf.insts()->mark_off();
1973   MacroAssembler _masm(&cbuf);
1974   address float_address = __ float_constant(x);
1975   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1976   emit_d32_reloc(cbuf,
1977                  (int) (float_address - cbuf.code_end() - 4),
1978                  internal_word_Relocation::spec(float_address),
1979                  RELOC_DISP32);
1980 }
1981 
1982 
1983 const bool Matcher::match_rule_supported(int opcode) {
1984   if (!has_match_rule(opcode))
1985     return false;
1986 
1987   return true;  // Per default match rules are supported.
1988 }
1989 
1990 int Matcher::regnum_to_fpu_offset(int regnum)
1991 {
1992   return regnum - 32; // The FP registers are in the second chunk
1993 }
1994 
1995 // This is UltraSparc specific, true just means we have fast l2f conversion
1996 const bool Matcher::convL2FSupported(void) {
1997   return true;
1998 }
1999 
2000 // Vector width in bytes
2001 const uint Matcher::vector_width_in_bytes(void) {
2002   return 8;
2003 }
2004 
2005 // Vector ideal reg
2006 const uint Matcher::vector_ideal_reg(void) {
2007   return Op_RegD;
2008 }
2009 
2010 // Is this branch offset short enough that a short branch can be used?
2011 //
2012 // NOTE: If the platform does not provide any short branch variants, then
2013 //       this method should return false for offset 0.
2014 bool Matcher::is_short_branch_offset(int rule, int offset) {
2015   // the short version of jmpConUCF2 contains multiple branches,
2016   // making the reach slightly less
2017   if (rule == jmpConUCF2_rule)
2018     return (-126 <= offset && offset <= 125);
2019   return (-128 <= offset && offset <= 127);
2020 }
2021 
2022 const bool Matcher::isSimpleConstant64(jlong value) {
2023   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2024   //return value == (int) value;  // Cf. storeImmL and immL32.
2025 
2026   // Probably always true, even if a temp register is required.
2027   return true;
2028 }
2029 
2030 // The ecx parameter to rep stosq for the ClearArray node is in words.
2031 const bool Matcher::init_array_count_is_in_bytes = false;
2032 
2033 // Threshold size for cleararray.
2034 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2035 
2036 // Should the Matcher clone shifts on addressing modes, expecting them
2037 // to be subsumed into complex addressing expressions or compute them
2038 // into registers?  True for Intel but false for most RISCs
2039 const bool Matcher::clone_shift_expressions = true;
2040 
2041 // Is it better to copy float constants, or load them directly from
2042 // memory?  Intel can load a float constant from a direct address,
2043 // requiring no extra registers.  Most RISCs will have to materialize
2044 // an address into a register first, so they would do better to copy
2045 // the constant from stack.
2046 const bool Matcher::rematerialize_float_constants = true; // XXX
2047 
2048 // If CPU can load and store mis-aligned doubles directly then no
2049 // fixup is needed.  Else we split the double into 2 integer pieces
2050 // and move it piece-by-piece.  Only happens when passing doubles into
2051 // C code as the Java calling convention forces doubles to be aligned.
2052 const bool Matcher::misaligned_doubles_ok = true;
2053 
2054 // No-op on amd64
2055 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2056 
2057 // Advertise here if the CPU requires explicit rounding operations to
2058 // implement the UseStrictFP mode.
2059 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2060 
2061 // Do floats take an entire double register or just half?
2062 const bool Matcher::float_in_double = true;
2063 // Do ints take an entire long register or just half?
2064 const bool Matcher::int_in_long = true;
2065 
2066 // Return whether or not this register is ever used as an argument.
2067 // This function is used on startup to build the trampoline stubs in
2068 // generateOptoStub.  Registers not mentioned will be killed by the VM
2069 // call in the trampoline, and arguments in those registers not be
2070 // available to the callee.
2071 bool Matcher::can_be_java_arg(int reg)
2072 {
2073   return
2074     reg ==  RDI_num || reg ==  RDI_H_num ||
2075     reg ==  RSI_num || reg ==  RSI_H_num ||
2076     reg ==  RDX_num || reg ==  RDX_H_num ||
2077     reg ==  RCX_num || reg ==  RCX_H_num ||
2078     reg ==   R8_num || reg ==   R8_H_num ||
2079     reg ==   R9_num || reg ==   R9_H_num ||
2080     reg ==  R12_num || reg ==  R12_H_num ||
2081     reg == XMM0_num || reg == XMM0_H_num ||
2082     reg == XMM1_num || reg == XMM1_H_num ||
2083     reg == XMM2_num || reg == XMM2_H_num ||
2084     reg == XMM3_num || reg == XMM3_H_num ||
2085     reg == XMM4_num || reg == XMM4_H_num ||
2086     reg == XMM5_num || reg == XMM5_H_num ||
2087     reg == XMM6_num || reg == XMM6_H_num ||
2088     reg == XMM7_num || reg == XMM7_H_num;
2089 }
2090 
2091 bool Matcher::is_spillable_arg(int reg)
2092 {
2093   return can_be_java_arg(reg);
2094 }
2095 
2096 // Register for DIVI projection of divmodI
2097 RegMask Matcher::divI_proj_mask() {
2098   return INT_RAX_REG_mask;
2099 }
2100 
2101 // Register for MODI projection of divmodI
2102 RegMask Matcher::modI_proj_mask() {
2103   return INT_RDX_REG_mask;
2104 }
2105 
2106 // Register for DIVL projection of divmodL
2107 RegMask Matcher::divL_proj_mask() {
2108   return LONG_RAX_REG_mask;
2109 }
2110 
2111 // Register for MODL projection of divmodL
2112 RegMask Matcher::modL_proj_mask() {
2113   return LONG_RDX_REG_mask;
2114 }
2115 
2116 static Address build_address(int b, int i, int s, int d) {
2117   Register index = as_Register(i);
2118   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2119   if (index == rsp) {
2120     index = noreg;
2121     scale = Address::no_scale;
2122   }
2123   Address addr(as_Register(b), index, scale, d);
2124   return addr;
2125 }
2126 
2127 %}
2128 
2129 //----------ENCODING BLOCK-----------------------------------------------------
2130 // This block specifies the encoding classes used by the compiler to
2131 // output byte streams.  Encoding classes are parameterized macros
2132 // used by Machine Instruction Nodes in order to generate the bit
2133 // encoding of the instruction.  Operands specify their base encoding
2134 // interface with the interface keyword.  There are currently
2135 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2136 // COND_INTER.  REG_INTER causes an operand to generate a function
2137 // which returns its register number when queried.  CONST_INTER causes
2138 // an operand to generate a function which returns the value of the
2139 // constant when queried.  MEMORY_INTER causes an operand to generate
2140 // four functions which return the Base Register, the Index Register,
2141 // the Scale Value, and the Offset Value of the operand when queried.
2142 // COND_INTER causes an operand to generate six functions which return
2143 // the encoding code (ie - encoding bits for the instruction)
2144 // associated with each basic boolean condition for a conditional
2145 // instruction.
2146 //
2147 // Instructions specify two basic values for encoding.  Again, a
2148 // function is available to check if the constant displacement is an
2149 // oop. They use the ins_encode keyword to specify their encoding
2150 // classes (which must be a sequence of enc_class names, and their
2151 // parameters, specified in the encoding block), and they use the
2152 // opcode keyword to specify, in order, their primary, secondary, and
2153 // tertiary opcode.  Only the opcode sections which a particular
2154 // instruction needs for encoding need to be specified.
2155 encode %{
2156   // Build emit functions for each basic byte or larger field in the
2157   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2158   // from C++ code in the enc_class source block.  Emit functions will
2159   // live in the main source block for now.  In future, we can
2160   // generalize this by adding a syntax that specifies the sizes of
2161   // fields in an order, so that the adlc can build the emit functions
2162   // automagically
2163 
2164   // Emit primary opcode
2165   enc_class OpcP
2166   %{
2167     emit_opcode(cbuf, $primary);
2168   %}
2169 
2170   // Emit secondary opcode
2171   enc_class OpcS
2172   %{
2173     emit_opcode(cbuf, $secondary);
2174   %}
2175 
2176   // Emit tertiary opcode
2177   enc_class OpcT
2178   %{
2179     emit_opcode(cbuf, $tertiary);
2180   %}
2181 
2182   // Emit opcode directly
2183   enc_class Opcode(immI d8)
2184   %{
2185     emit_opcode(cbuf, $d8$$constant);
2186   %}
2187 
2188   // Emit size prefix
2189   enc_class SizePrefix
2190   %{
2191     emit_opcode(cbuf, 0x66);
2192   %}
2193 
2194   enc_class reg(rRegI reg)
2195   %{
2196     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2197   %}
2198 
2199   enc_class reg_reg(rRegI dst, rRegI src)
2200   %{
2201     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2202   %}
2203 
2204   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2205   %{
2206     emit_opcode(cbuf, $opcode$$constant);
2207     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2208   %}
2209 
2210   enc_class cmpfp_fixup()
2211   %{
2212     // jnp,s exit
2213     emit_opcode(cbuf, 0x7B);
2214     emit_d8(cbuf, 0x0A);
2215 
2216     // pushfq
2217     emit_opcode(cbuf, 0x9C);
2218 
2219     // andq $0xffffff2b, (%rsp)
2220     emit_opcode(cbuf, Assembler::REX_W);
2221     emit_opcode(cbuf, 0x81);
2222     emit_opcode(cbuf, 0x24);
2223     emit_opcode(cbuf, 0x24);
2224     emit_d32(cbuf, 0xffffff2b);
2225 
2226     // popfq
2227     emit_opcode(cbuf, 0x9D);
2228 
2229     // nop (target for branch to avoid branch to branch)
2230     emit_opcode(cbuf, 0x90);
2231   %}
2232 
2233   enc_class cmpfp3(rRegI dst)
2234   %{
2235     int dstenc = $dst$$reg;
2236 
2237     // movl $dst, -1
2238     if (dstenc >= 8) {
2239       emit_opcode(cbuf, Assembler::REX_B);
2240     }
2241     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2242     emit_d32(cbuf, -1);
2243 
2244     // jp,s done
2245     emit_opcode(cbuf, 0x7A);
2246     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2247 
2248     // jb,s done
2249     emit_opcode(cbuf, 0x72);
2250     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2251 
2252     // setne $dst
2253     if (dstenc >= 4) {
2254       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2255     }
2256     emit_opcode(cbuf, 0x0F);
2257     emit_opcode(cbuf, 0x95);
2258     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2259 
2260     // movzbl $dst, $dst
2261     if (dstenc >= 4) {
2262       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2263     }
2264     emit_opcode(cbuf, 0x0F);
2265     emit_opcode(cbuf, 0xB6);
2266     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2267   %}
2268 
2269   enc_class cdql_enc(no_rax_rdx_RegI div)
2270   %{
2271     // Full implementation of Java idiv and irem; checks for
2272     // special case as described in JVM spec., p.243 & p.271.
2273     //
2274     //         normal case                           special case
2275     //
2276     // input : rax: dividend                         min_int
2277     //         reg: divisor                          -1
2278     //
2279     // output: rax: quotient  (= rax idiv reg)       min_int
2280     //         rdx: remainder (= rax irem reg)       0
2281     //
2282     //  Code sequnce:
2283     //
2284     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2285     //    5:   75 07/08                jne    e <normal>
2286     //    7:   33 d2                   xor    %edx,%edx
2287     //  [div >= 8 -> offset + 1]
2288     //  [REX_B]
2289     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2290     //    c:   74 03/04                je     11 <done>
2291     // 000000000000000e <normal>:
2292     //    e:   99                      cltd
2293     //  [div >= 8 -> offset + 1]
2294     //  [REX_B]
2295     //    f:   f7 f9                   idiv   $div
2296     // 0000000000000011 <done>:
2297 
2298     // cmp    $0x80000000,%eax
2299     emit_opcode(cbuf, 0x3d);
2300     emit_d8(cbuf, 0x00);
2301     emit_d8(cbuf, 0x00);
2302     emit_d8(cbuf, 0x00);
2303     emit_d8(cbuf, 0x80);
2304 
2305     // jne    e <normal>
2306     emit_opcode(cbuf, 0x75);
2307     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2308 
2309     // xor    %edx,%edx
2310     emit_opcode(cbuf, 0x33);
2311     emit_d8(cbuf, 0xD2);
2312 
2313     // cmp    $0xffffffffffffffff,%ecx
2314     if ($div$$reg >= 8) {
2315       emit_opcode(cbuf, Assembler::REX_B);
2316     }
2317     emit_opcode(cbuf, 0x83);
2318     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2319     emit_d8(cbuf, 0xFF);
2320 
2321     // je     11 <done>
2322     emit_opcode(cbuf, 0x74);
2323     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2324 
2325     // <normal>
2326     // cltd
2327     emit_opcode(cbuf, 0x99);
2328 
2329     // idivl (note: must be emitted by the user of this rule)
2330     // <done>
2331   %}
2332 
2333   enc_class cdqq_enc(no_rax_rdx_RegL div)
2334   %{
2335     // Full implementation of Java ldiv and lrem; checks for
2336     // special case as described in JVM spec., p.243 & p.271.
2337     //
2338     //         normal case                           special case
2339     //
2340     // input : rax: dividend                         min_long
2341     //         reg: divisor                          -1
2342     //
2343     // output: rax: quotient  (= rax idiv reg)       min_long
2344     //         rdx: remainder (= rax irem reg)       0
2345     //
2346     //  Code sequnce:
2347     //
2348     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2349     //    7:   00 00 80
2350     //    a:   48 39 d0                cmp    %rdx,%rax
2351     //    d:   75 08                   jne    17 <normal>
2352     //    f:   33 d2                   xor    %edx,%edx
2353     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2354     //   15:   74 05                   je     1c <done>
2355     // 0000000000000017 <normal>:
2356     //   17:   48 99                   cqto
2357     //   19:   48 f7 f9                idiv   $div
2358     // 000000000000001c <done>:
2359 
2360     // mov    $0x8000000000000000,%rdx
2361     emit_opcode(cbuf, Assembler::REX_W);
2362     emit_opcode(cbuf, 0xBA);
2363     emit_d8(cbuf, 0x00);
2364     emit_d8(cbuf, 0x00);
2365     emit_d8(cbuf, 0x00);
2366     emit_d8(cbuf, 0x00);
2367     emit_d8(cbuf, 0x00);
2368     emit_d8(cbuf, 0x00);
2369     emit_d8(cbuf, 0x00);
2370     emit_d8(cbuf, 0x80);
2371 
2372     // cmp    %rdx,%rax
2373     emit_opcode(cbuf, Assembler::REX_W);
2374     emit_opcode(cbuf, 0x39);
2375     emit_d8(cbuf, 0xD0);
2376 
2377     // jne    17 <normal>
2378     emit_opcode(cbuf, 0x75);
2379     emit_d8(cbuf, 0x08);
2380 
2381     // xor    %edx,%edx
2382     emit_opcode(cbuf, 0x33);
2383     emit_d8(cbuf, 0xD2);
2384 
2385     // cmp    $0xffffffffffffffff,$div
2386     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2387     emit_opcode(cbuf, 0x83);
2388     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2389     emit_d8(cbuf, 0xFF);
2390 
2391     // je     1e <done>
2392     emit_opcode(cbuf, 0x74);
2393     emit_d8(cbuf, 0x05);
2394 
2395     // <normal>
2396     // cqto
2397     emit_opcode(cbuf, Assembler::REX_W);
2398     emit_opcode(cbuf, 0x99);
2399 
2400     // idivq (note: must be emitted by the user of this rule)
2401     // <done>
2402   %}
2403 
2404   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2405   enc_class OpcSE(immI imm)
2406   %{
2407     // Emit primary opcode and set sign-extend bit
2408     // Check for 8-bit immediate, and set sign extend bit in opcode
2409     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2410       emit_opcode(cbuf, $primary | 0x02);
2411     } else {
2412       // 32-bit immediate
2413       emit_opcode(cbuf, $primary);
2414     }
2415   %}
2416 
2417   enc_class OpcSErm(rRegI dst, immI imm)
2418   %{
2419     // OpcSEr/m
2420     int dstenc = $dst$$reg;
2421     if (dstenc >= 8) {
2422       emit_opcode(cbuf, Assembler::REX_B);
2423       dstenc -= 8;
2424     }
2425     // Emit primary opcode and set sign-extend bit
2426     // Check for 8-bit immediate, and set sign extend bit in opcode
2427     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2428       emit_opcode(cbuf, $primary | 0x02);
2429     } else {
2430       // 32-bit immediate
2431       emit_opcode(cbuf, $primary);
2432     }
2433     // Emit r/m byte with secondary opcode, after primary opcode.
2434     emit_rm(cbuf, 0x3, $secondary, dstenc);
2435   %}
2436 
2437   enc_class OpcSErm_wide(rRegL dst, immI imm)
2438   %{
2439     // OpcSEr/m
2440     int dstenc = $dst$$reg;
2441     if (dstenc < 8) {
2442       emit_opcode(cbuf, Assembler::REX_W);
2443     } else {
2444       emit_opcode(cbuf, Assembler::REX_WB);
2445       dstenc -= 8;
2446     }
2447     // Emit primary opcode and set sign-extend bit
2448     // Check for 8-bit immediate, and set sign extend bit in opcode
2449     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2450       emit_opcode(cbuf, $primary | 0x02);
2451     } else {
2452       // 32-bit immediate
2453       emit_opcode(cbuf, $primary);
2454     }
2455     // Emit r/m byte with secondary opcode, after primary opcode.
2456     emit_rm(cbuf, 0x3, $secondary, dstenc);
2457   %}
2458 
2459   enc_class Con8or32(immI imm)
2460   %{
2461     // Check for 8-bit immediate, and set sign extend bit in opcode
2462     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2463       $$$emit8$imm$$constant;
2464     } else {
2465       // 32-bit immediate
2466       $$$emit32$imm$$constant;
2467     }
2468   %}
2469 
2470   enc_class Lbl(label labl)
2471   %{
2472     // JMP, CALL
2473     Label* l = $labl$$label;
2474     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2475   %}
2476 
2477   enc_class LblShort(label labl)
2478   %{
2479     // JMP, CALL
2480     Label* l = $labl$$label;
2481     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2482     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2483     emit_d8(cbuf, disp);
2484   %}
2485 
2486   enc_class opc2_reg(rRegI dst)
2487   %{
2488     // BSWAP
2489     emit_cc(cbuf, $secondary, $dst$$reg);
2490   %}
2491 
2492   enc_class opc3_reg(rRegI dst)
2493   %{
2494     // BSWAP
2495     emit_cc(cbuf, $tertiary, $dst$$reg);
2496   %}
2497 
2498   enc_class reg_opc(rRegI div)
2499   %{
2500     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2501     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2502   %}
2503 
2504   enc_class Jcc(cmpOp cop, label labl)
2505   %{
2506     // JCC
2507     Label* l = $labl$$label;
2508     $$$emit8$primary;
2509     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2510     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2511   %}
2512 
2513   enc_class JccShort (cmpOp cop, label labl)
2514   %{
2515   // JCC
2516     Label *l = $labl$$label;
2517     emit_cc(cbuf, $primary, $cop$$cmpcode);
2518     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2519     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2520     emit_d8(cbuf, disp);
2521   %}
2522 
2523   enc_class enc_cmov(cmpOp cop)
2524   %{
2525     // CMOV
2526     $$$emit8$primary;
2527     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2528   %}
2529 
2530   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2531   %{
2532     // Invert sense of branch from sense of cmov
2533     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2534     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2535                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2536                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2537     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2538     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2539     if ($dst$$reg < 8) {
2540       if ($src$$reg >= 8) {
2541         emit_opcode(cbuf, Assembler::REX_B);
2542       }
2543     } else {
2544       if ($src$$reg < 8) {
2545         emit_opcode(cbuf, Assembler::REX_R);
2546       } else {
2547         emit_opcode(cbuf, Assembler::REX_RB);
2548       }
2549     }
2550     emit_opcode(cbuf, 0x0F);
2551     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2552     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2553   %}
2554 
2555   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2556   %{
2557     // Invert sense of branch from sense of cmov
2558     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2559     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2560 
2561     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2562     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2563     if ($dst$$reg < 8) {
2564       if ($src$$reg >= 8) {
2565         emit_opcode(cbuf, Assembler::REX_B);
2566       }
2567     } else {
2568       if ($src$$reg < 8) {
2569         emit_opcode(cbuf, Assembler::REX_R);
2570       } else {
2571         emit_opcode(cbuf, Assembler::REX_RB);
2572       }
2573     }
2574     emit_opcode(cbuf, 0x0F);
2575     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2576     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2577   %}
2578 
2579   enc_class enc_PartialSubtypeCheck()
2580   %{
2581     Register Rrdi = as_Register(RDI_enc); // result register
2582     Register Rrax = as_Register(RAX_enc); // super class
2583     Register Rrcx = as_Register(RCX_enc); // killed
2584     Register Rrsi = as_Register(RSI_enc); // sub class
2585     Label miss;
2586     const bool set_cond_codes = true;
2587 
2588     MacroAssembler _masm(&cbuf);
2589     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2590                                      NULL, &miss,
2591                                      /*set_cond_codes:*/ true);
2592     if ($primary) {
2593       __ xorptr(Rrdi, Rrdi);
2594     }
2595     __ bind(miss);
2596   %}
2597 
2598   enc_class Java_To_Interpreter(method meth)
2599   %{
2600     // CALL Java_To_Interpreter
2601     // This is the instruction starting address for relocation info.
2602     cbuf.set_inst_mark();
2603     $$$emit8$primary;
2604     // CALL directly to the runtime
2605     emit_d32_reloc(cbuf,
2606                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2607                    runtime_call_Relocation::spec(),
2608                    RELOC_DISP32);
2609   %}
2610 
2611   enc_class Java_Static_Call(method meth)
2612   %{
2613     // JAVA STATIC CALL
2614     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2615     // determine who we intended to call.
2616     cbuf.set_inst_mark();
2617     $$$emit8$primary;
2618 
2619     if (!_method) {
2620       emit_d32_reloc(cbuf,
2621                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2622                      runtime_call_Relocation::spec(),
2623                      RELOC_DISP32);
2624     } else if (_optimized_virtual) {
2625       emit_d32_reloc(cbuf,
2626                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2627                      opt_virtual_call_Relocation::spec(),
2628                      RELOC_DISP32);
2629     } else {
2630       emit_d32_reloc(cbuf,
2631                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2632                      static_call_Relocation::spec(),
2633                      RELOC_DISP32);
2634     }
2635     if (_method) {
2636       // Emit stub for static call
2637       emit_java_to_interp(cbuf);
2638     }
2639   %}
2640 
2641   enc_class Java_Dynamic_Call(method meth)
2642   %{
2643     // JAVA DYNAMIC CALL
2644     // !!!!!
2645     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2646     // emit_call_dynamic_prologue( cbuf );
2647     cbuf.set_inst_mark();
2648 
2649     // movq rax, -1
2650     emit_opcode(cbuf, Assembler::REX_W);
2651     emit_opcode(cbuf, 0xB8 | RAX_enc);
2652     emit_d64_reloc(cbuf,
2653                    (int64_t) Universe::non_oop_word(),
2654                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2655     address virtual_call_oop_addr = cbuf.inst_mark();
2656     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2657     // who we intended to call.
2658     cbuf.set_inst_mark();
2659     $$$emit8$primary;
2660     emit_d32_reloc(cbuf,
2661                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2662                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2663                    RELOC_DISP32);
2664   %}
2665 
2666   enc_class Java_Compiled_Call(method meth)
2667   %{
2668     // JAVA COMPILED CALL
2669     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2670 
2671     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2672     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2673 
2674     // callq *disp(%rax)
2675     cbuf.set_inst_mark();
2676     $$$emit8$primary;
2677     if (disp < 0x80) {
2678       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2679       emit_d8(cbuf, disp); // Displacement
2680     } else {
2681       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2682       emit_d32(cbuf, disp); // Displacement
2683     }
2684   %}
2685 
2686   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2687   %{
2688     // SAL, SAR, SHR
2689     int dstenc = $dst$$reg;
2690     if (dstenc >= 8) {
2691       emit_opcode(cbuf, Assembler::REX_B);
2692       dstenc -= 8;
2693     }
2694     $$$emit8$primary;
2695     emit_rm(cbuf, 0x3, $secondary, dstenc);
2696     $$$emit8$shift$$constant;
2697   %}
2698 
2699   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2700   %{
2701     // SAL, SAR, SHR
2702     int dstenc = $dst$$reg;
2703     if (dstenc < 8) {
2704       emit_opcode(cbuf, Assembler::REX_W);
2705     } else {
2706       emit_opcode(cbuf, Assembler::REX_WB);
2707       dstenc -= 8;
2708     }
2709     $$$emit8$primary;
2710     emit_rm(cbuf, 0x3, $secondary, dstenc);
2711     $$$emit8$shift$$constant;
2712   %}
2713 
2714   enc_class load_immI(rRegI dst, immI src)
2715   %{
2716     int dstenc = $dst$$reg;
2717     if (dstenc >= 8) {
2718       emit_opcode(cbuf, Assembler::REX_B);
2719       dstenc -= 8;
2720     }
2721     emit_opcode(cbuf, 0xB8 | dstenc);
2722     $$$emit32$src$$constant;
2723   %}
2724 
2725   enc_class load_immL(rRegL dst, immL src)
2726   %{
2727     int dstenc = $dst$$reg;
2728     if (dstenc < 8) {
2729       emit_opcode(cbuf, Assembler::REX_W);
2730     } else {
2731       emit_opcode(cbuf, Assembler::REX_WB);
2732       dstenc -= 8;
2733     }
2734     emit_opcode(cbuf, 0xB8 | dstenc);
2735     emit_d64(cbuf, $src$$constant);
2736   %}
2737 
2738   enc_class load_immUL32(rRegL dst, immUL32 src)
2739   %{
2740     // same as load_immI, but this time we care about zeroes in the high word
2741     int dstenc = $dst$$reg;
2742     if (dstenc >= 8) {
2743       emit_opcode(cbuf, Assembler::REX_B);
2744       dstenc -= 8;
2745     }
2746     emit_opcode(cbuf, 0xB8 | dstenc);
2747     $$$emit32$src$$constant;
2748   %}
2749 
2750   enc_class load_immL32(rRegL dst, immL32 src)
2751   %{
2752     int dstenc = $dst$$reg;
2753     if (dstenc < 8) {
2754       emit_opcode(cbuf, Assembler::REX_W);
2755     } else {
2756       emit_opcode(cbuf, Assembler::REX_WB);
2757       dstenc -= 8;
2758     }
2759     emit_opcode(cbuf, 0xC7);
2760     emit_rm(cbuf, 0x03, 0x00, dstenc);
2761     $$$emit32$src$$constant;
2762   %}
2763 
2764   enc_class load_immP31(rRegP dst, immP32 src)
2765   %{
2766     // same as load_immI, but this time we care about zeroes in the high word
2767     int dstenc = $dst$$reg;
2768     if (dstenc >= 8) {
2769       emit_opcode(cbuf, Assembler::REX_B);
2770       dstenc -= 8;
2771     }
2772     emit_opcode(cbuf, 0xB8 | dstenc);
2773     $$$emit32$src$$constant;
2774   %}
2775 
2776   enc_class load_immP(rRegP dst, immP src)
2777   %{
2778     int dstenc = $dst$$reg;
2779     if (dstenc < 8) {
2780       emit_opcode(cbuf, Assembler::REX_W);
2781     } else {
2782       emit_opcode(cbuf, Assembler::REX_WB);
2783       dstenc -= 8;
2784     }
2785     emit_opcode(cbuf, 0xB8 | dstenc);
2786     // This next line should be generated from ADLC
2787     if ($src->constant_is_oop()) {
2788       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2789     } else {
2790       emit_d64(cbuf, $src$$constant);
2791     }
2792   %}
2793 
2794   enc_class load_immF(regF dst, immF con)
2795   %{
2796     // XXX reg_mem doesn't support RIP-relative addressing yet
2797     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2798     emit_float_constant(cbuf, $con$$constant);
2799   %}
2800 
2801   enc_class load_immD(regD dst, immD con)
2802   %{
2803     // XXX reg_mem doesn't support RIP-relative addressing yet
2804     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2805     emit_double_constant(cbuf, $con$$constant);
2806   %}
2807 
2808   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2809     emit_opcode(cbuf, 0xF3);
2810     if ($dst$$reg >= 8) {
2811       emit_opcode(cbuf, Assembler::REX_R);
2812     }
2813     emit_opcode(cbuf, 0x0F);
2814     emit_opcode(cbuf, 0x10);
2815     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2816     emit_float_constant(cbuf, $con$$constant);
2817   %}
2818 
2819   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2820     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2821     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2822     if ($dst$$reg >= 8) {
2823       emit_opcode(cbuf, Assembler::REX_R);
2824     }
2825     emit_opcode(cbuf, 0x0F);
2826     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2827     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2828     emit_double_constant(cbuf, $con$$constant);
2829   %}
2830 
2831   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2832   enc_class enc_copy(rRegI dst, rRegI src)
2833   %{
2834     encode_copy(cbuf, $dst$$reg, $src$$reg);
2835   %}
2836 
2837   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2838   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2839     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2840   %}
2841 
2842   enc_class enc_copy_always(rRegI dst, rRegI src)
2843   %{
2844     int srcenc = $src$$reg;
2845     int dstenc = $dst$$reg;
2846 
2847     if (dstenc < 8) {
2848       if (srcenc >= 8) {
2849         emit_opcode(cbuf, Assembler::REX_B);
2850         srcenc -= 8;
2851       }
2852     } else {
2853       if (srcenc < 8) {
2854         emit_opcode(cbuf, Assembler::REX_R);
2855       } else {
2856         emit_opcode(cbuf, Assembler::REX_RB);
2857         srcenc -= 8;
2858       }
2859       dstenc -= 8;
2860     }
2861 
2862     emit_opcode(cbuf, 0x8B);
2863     emit_rm(cbuf, 0x3, dstenc, srcenc);
2864   %}
2865 
2866   enc_class enc_copy_wide(rRegL dst, rRegL src)
2867   %{
2868     int srcenc = $src$$reg;
2869     int dstenc = $dst$$reg;
2870 
2871     if (dstenc != srcenc) {
2872       if (dstenc < 8) {
2873         if (srcenc < 8) {
2874           emit_opcode(cbuf, Assembler::REX_W);
2875         } else {
2876           emit_opcode(cbuf, Assembler::REX_WB);
2877           srcenc -= 8;
2878         }
2879       } else {
2880         if (srcenc < 8) {
2881           emit_opcode(cbuf, Assembler::REX_WR);
2882         } else {
2883           emit_opcode(cbuf, Assembler::REX_WRB);
2884           srcenc -= 8;
2885         }
2886         dstenc -= 8;
2887       }
2888       emit_opcode(cbuf, 0x8B);
2889       emit_rm(cbuf, 0x3, dstenc, srcenc);
2890     }
2891   %}
2892 
2893   enc_class Con32(immI src)
2894   %{
2895     // Output immediate
2896     $$$emit32$src$$constant;
2897   %}
2898 
2899   enc_class Con64(immL src)
2900   %{
2901     // Output immediate
2902     emit_d64($src$$constant);
2903   %}
2904 
2905   enc_class Con32F_as_bits(immF src)
2906   %{
2907     // Output Float immediate bits
2908     jfloat jf = $src$$constant;
2909     jint jf_as_bits = jint_cast(jf);
2910     emit_d32(cbuf, jf_as_bits);
2911   %}
2912 
2913   enc_class Con16(immI src)
2914   %{
2915     // Output immediate
2916     $$$emit16$src$$constant;
2917   %}
2918 
2919   // How is this different from Con32??? XXX
2920   enc_class Con_d32(immI src)
2921   %{
2922     emit_d32(cbuf,$src$$constant);
2923   %}
2924 
2925   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2926     // Output immediate memory reference
2927     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2928     emit_d32(cbuf, 0x00);
2929   %}
2930 
2931   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2932     MacroAssembler masm(&cbuf);
2933 
2934     Register switch_reg = as_Register($switch_val$$reg);
2935     Register dest_reg   = as_Register($dest$$reg);
2936     address table_base  = masm.address_table_constant(_index2label);
2937 
2938     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2939     // to do that and the compiler is using that register as one it can allocate.
2940     // So we build it all by hand.
2941     // Address index(noreg, switch_reg, Address::times_1);
2942     // ArrayAddress dispatch(table, index);
2943 
2944     Address dispatch(dest_reg, switch_reg, Address::times_1);
2945 
2946     masm.lea(dest_reg, InternalAddress(table_base));
2947     masm.jmp(dispatch);
2948   %}
2949 
2950   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2951     MacroAssembler masm(&cbuf);
2952 
2953     Register switch_reg = as_Register($switch_val$$reg);
2954     Register dest_reg   = as_Register($dest$$reg);
2955     address table_base  = masm.address_table_constant(_index2label);
2956 
2957     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2958     // to do that and the compiler is using that register as one it can allocate.
2959     // So we build it all by hand.
2960     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2961     // ArrayAddress dispatch(table, index);
2962 
2963     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2964 
2965     masm.lea(dest_reg, InternalAddress(table_base));
2966     masm.jmp(dispatch);
2967   %}
2968 
2969   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2970     MacroAssembler masm(&cbuf);
2971 
2972     Register switch_reg = as_Register($switch_val$$reg);
2973     Register dest_reg   = as_Register($dest$$reg);
2974     address table_base  = masm.address_table_constant(_index2label);
2975 
2976     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2977     // to do that and the compiler is using that register as one it can allocate.
2978     // So we build it all by hand.
2979     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2980     // ArrayAddress dispatch(table, index);
2981 
2982     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2983     masm.lea(dest_reg, InternalAddress(table_base));
2984     masm.jmp(dispatch);
2985 
2986   %}
2987 
2988   enc_class lock_prefix()
2989   %{
2990     if (os::is_MP()) {
2991       emit_opcode(cbuf, 0xF0); // lock
2992     }
2993   %}
2994 
2995   enc_class REX_mem(memory mem)
2996   %{
2997     if ($mem$$base >= 8) {
2998       if ($mem$$index < 8) {
2999         emit_opcode(cbuf, Assembler::REX_B);
3000       } else {
3001         emit_opcode(cbuf, Assembler::REX_XB);
3002       }
3003     } else {
3004       if ($mem$$index >= 8) {
3005         emit_opcode(cbuf, Assembler::REX_X);
3006       }
3007     }
3008   %}
3009 
3010   enc_class REX_mem_wide(memory mem)
3011   %{
3012     if ($mem$$base >= 8) {
3013       if ($mem$$index < 8) {
3014         emit_opcode(cbuf, Assembler::REX_WB);
3015       } else {
3016         emit_opcode(cbuf, Assembler::REX_WXB);
3017       }
3018     } else {
3019       if ($mem$$index < 8) {
3020         emit_opcode(cbuf, Assembler::REX_W);
3021       } else {
3022         emit_opcode(cbuf, Assembler::REX_WX);
3023       }
3024     }
3025   %}
3026 
3027   // for byte regs
3028   enc_class REX_breg(rRegI reg)
3029   %{
3030     if ($reg$$reg >= 4) {
3031       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3032     }
3033   %}
3034 
3035   // for byte regs
3036   enc_class REX_reg_breg(rRegI dst, rRegI src)
3037   %{
3038     if ($dst$$reg < 8) {
3039       if ($src$$reg >= 4) {
3040         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3041       }
3042     } else {
3043       if ($src$$reg < 8) {
3044         emit_opcode(cbuf, Assembler::REX_R);
3045       } else {
3046         emit_opcode(cbuf, Assembler::REX_RB);
3047       }
3048     }
3049   %}
3050 
3051   // for byte regs
3052   enc_class REX_breg_mem(rRegI reg, memory mem)
3053   %{
3054     if ($reg$$reg < 8) {
3055       if ($mem$$base < 8) {
3056         if ($mem$$index >= 8) {
3057           emit_opcode(cbuf, Assembler::REX_X);
3058         } else if ($reg$$reg >= 4) {
3059           emit_opcode(cbuf, Assembler::REX);
3060         }
3061       } else {
3062         if ($mem$$index < 8) {
3063           emit_opcode(cbuf, Assembler::REX_B);
3064         } else {
3065           emit_opcode(cbuf, Assembler::REX_XB);
3066         }
3067       }
3068     } else {
3069       if ($mem$$base < 8) {
3070         if ($mem$$index < 8) {
3071           emit_opcode(cbuf, Assembler::REX_R);
3072         } else {
3073           emit_opcode(cbuf, Assembler::REX_RX);
3074         }
3075       } else {
3076         if ($mem$$index < 8) {
3077           emit_opcode(cbuf, Assembler::REX_RB);
3078         } else {
3079           emit_opcode(cbuf, Assembler::REX_RXB);
3080         }
3081       }
3082     }
3083   %}
3084 
3085   enc_class REX_reg(rRegI reg)
3086   %{
3087     if ($reg$$reg >= 8) {
3088       emit_opcode(cbuf, Assembler::REX_B);
3089     }
3090   %}
3091 
3092   enc_class REX_reg_wide(rRegI reg)
3093   %{
3094     if ($reg$$reg < 8) {
3095       emit_opcode(cbuf, Assembler::REX_W);
3096     } else {
3097       emit_opcode(cbuf, Assembler::REX_WB);
3098     }
3099   %}
3100 
3101   enc_class REX_reg_reg(rRegI dst, rRegI src)
3102   %{
3103     if ($dst$$reg < 8) {
3104       if ($src$$reg >= 8) {
3105         emit_opcode(cbuf, Assembler::REX_B);
3106       }
3107     } else {
3108       if ($src$$reg < 8) {
3109         emit_opcode(cbuf, Assembler::REX_R);
3110       } else {
3111         emit_opcode(cbuf, Assembler::REX_RB);
3112       }
3113     }
3114   %}
3115 
3116   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3117   %{
3118     if ($dst$$reg < 8) {
3119       if ($src$$reg < 8) {
3120         emit_opcode(cbuf, Assembler::REX_W);
3121       } else {
3122         emit_opcode(cbuf, Assembler::REX_WB);
3123       }
3124     } else {
3125       if ($src$$reg < 8) {
3126         emit_opcode(cbuf, Assembler::REX_WR);
3127       } else {
3128         emit_opcode(cbuf, Assembler::REX_WRB);
3129       }
3130     }
3131   %}
3132 
3133   enc_class REX_reg_mem(rRegI reg, memory mem)
3134   %{
3135     if ($reg$$reg < 8) {
3136       if ($mem$$base < 8) {
3137         if ($mem$$index >= 8) {
3138           emit_opcode(cbuf, Assembler::REX_X);
3139         }
3140       } else {
3141         if ($mem$$index < 8) {
3142           emit_opcode(cbuf, Assembler::REX_B);
3143         } else {
3144           emit_opcode(cbuf, Assembler::REX_XB);
3145         }
3146       }
3147     } else {
3148       if ($mem$$base < 8) {
3149         if ($mem$$index < 8) {
3150           emit_opcode(cbuf, Assembler::REX_R);
3151         } else {
3152           emit_opcode(cbuf, Assembler::REX_RX);
3153         }
3154       } else {
3155         if ($mem$$index < 8) {
3156           emit_opcode(cbuf, Assembler::REX_RB);
3157         } else {
3158           emit_opcode(cbuf, Assembler::REX_RXB);
3159         }
3160       }
3161     }
3162   %}
3163 
3164   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3165   %{
3166     if ($reg$$reg < 8) {
3167       if ($mem$$base < 8) {
3168         if ($mem$$index < 8) {
3169           emit_opcode(cbuf, Assembler::REX_W);
3170         } else {
3171           emit_opcode(cbuf, Assembler::REX_WX);
3172         }
3173       } else {
3174         if ($mem$$index < 8) {
3175           emit_opcode(cbuf, Assembler::REX_WB);
3176         } else {
3177           emit_opcode(cbuf, Assembler::REX_WXB);
3178         }
3179       }
3180     } else {
3181       if ($mem$$base < 8) {
3182         if ($mem$$index < 8) {
3183           emit_opcode(cbuf, Assembler::REX_WR);
3184         } else {
3185           emit_opcode(cbuf, Assembler::REX_WRX);
3186         }
3187       } else {
3188         if ($mem$$index < 8) {
3189           emit_opcode(cbuf, Assembler::REX_WRB);
3190         } else {
3191           emit_opcode(cbuf, Assembler::REX_WRXB);
3192         }
3193       }
3194     }
3195   %}
3196 
3197   enc_class reg_mem(rRegI ereg, memory mem)
3198   %{
3199     // High registers handle in encode_RegMem
3200     int reg = $ereg$$reg;
3201     int base = $mem$$base;
3202     int index = $mem$$index;
3203     int scale = $mem$$scale;
3204     int disp = $mem$$disp;
3205     bool disp_is_oop = $mem->disp_is_oop();
3206 
3207     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3208   %}
3209 
3210   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3211   %{
3212     int rm_byte_opcode = $rm_opcode$$constant;
3213 
3214     // High registers handle in encode_RegMem
3215     int base = $mem$$base;
3216     int index = $mem$$index;
3217     int scale = $mem$$scale;
3218     int displace = $mem$$disp;
3219 
3220     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3221                                             // working with static
3222                                             // globals
3223     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3224                   disp_is_oop);
3225   %}
3226 
3227   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3228   %{
3229     int reg_encoding = $dst$$reg;
3230     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3231     int index        = 0x04;            // 0x04 indicates no index
3232     int scale        = 0x00;            // 0x00 indicates no scale
3233     int displace     = $src1$$constant; // 0x00 indicates no displacement
3234     bool disp_is_oop = false;
3235     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3236                   disp_is_oop);
3237   %}
3238 
3239   enc_class neg_reg(rRegI dst)
3240   %{
3241     int dstenc = $dst$$reg;
3242     if (dstenc >= 8) {
3243       emit_opcode(cbuf, Assembler::REX_B);
3244       dstenc -= 8;
3245     }
3246     // NEG $dst
3247     emit_opcode(cbuf, 0xF7);
3248     emit_rm(cbuf, 0x3, 0x03, dstenc);
3249   %}
3250 
3251   enc_class neg_reg_wide(rRegI dst)
3252   %{
3253     int dstenc = $dst$$reg;
3254     if (dstenc < 8) {
3255       emit_opcode(cbuf, Assembler::REX_W);
3256     } else {
3257       emit_opcode(cbuf, Assembler::REX_WB);
3258       dstenc -= 8;
3259     }
3260     // NEG $dst
3261     emit_opcode(cbuf, 0xF7);
3262     emit_rm(cbuf, 0x3, 0x03, dstenc);
3263   %}
3264 
3265   enc_class setLT_reg(rRegI dst)
3266   %{
3267     int dstenc = $dst$$reg;
3268     if (dstenc >= 8) {
3269       emit_opcode(cbuf, Assembler::REX_B);
3270       dstenc -= 8;
3271     } else if (dstenc >= 4) {
3272       emit_opcode(cbuf, Assembler::REX);
3273     }
3274     // SETLT $dst
3275     emit_opcode(cbuf, 0x0F);
3276     emit_opcode(cbuf, 0x9C);
3277     emit_rm(cbuf, 0x3, 0x0, dstenc);
3278   %}
3279 
3280   enc_class setNZ_reg(rRegI dst)
3281   %{
3282     int dstenc = $dst$$reg;
3283     if (dstenc >= 8) {
3284       emit_opcode(cbuf, Assembler::REX_B);
3285       dstenc -= 8;
3286     } else if (dstenc >= 4) {
3287       emit_opcode(cbuf, Assembler::REX);
3288     }
3289     // SETNZ $dst
3290     emit_opcode(cbuf, 0x0F);
3291     emit_opcode(cbuf, 0x95);
3292     emit_rm(cbuf, 0x3, 0x0, dstenc);
3293   %}
3294 
3295   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3296                        rcx_RegI tmp)
3297   %{
3298     // cadd_cmpLT
3299 
3300     int tmpReg = $tmp$$reg;
3301 
3302     int penc = $p$$reg;
3303     int qenc = $q$$reg;
3304     int yenc = $y$$reg;
3305 
3306     // subl $p,$q
3307     if (penc < 8) {
3308       if (qenc >= 8) {
3309         emit_opcode(cbuf, Assembler::REX_B);
3310       }
3311     } else {
3312       if (qenc < 8) {
3313         emit_opcode(cbuf, Assembler::REX_R);
3314       } else {
3315         emit_opcode(cbuf, Assembler::REX_RB);
3316       }
3317     }
3318     emit_opcode(cbuf, 0x2B);
3319     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3320 
3321     // sbbl $tmp, $tmp
3322     emit_opcode(cbuf, 0x1B);
3323     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3324 
3325     // andl $tmp, $y
3326     if (yenc >= 8) {
3327       emit_opcode(cbuf, Assembler::REX_B);
3328     }
3329     emit_opcode(cbuf, 0x23);
3330     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3331 
3332     // addl $p,$tmp
3333     if (penc >= 8) {
3334         emit_opcode(cbuf, Assembler::REX_R);
3335     }
3336     emit_opcode(cbuf, 0x03);
3337     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3338   %}
3339 
3340   // Compare the lonogs and set -1, 0, or 1 into dst
3341   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3342   %{
3343     int src1enc = $src1$$reg;
3344     int src2enc = $src2$$reg;
3345     int dstenc = $dst$$reg;
3346 
3347     // cmpq $src1, $src2
3348     if (src1enc < 8) {
3349       if (src2enc < 8) {
3350         emit_opcode(cbuf, Assembler::REX_W);
3351       } else {
3352         emit_opcode(cbuf, Assembler::REX_WB);
3353       }
3354     } else {
3355       if (src2enc < 8) {
3356         emit_opcode(cbuf, Assembler::REX_WR);
3357       } else {
3358         emit_opcode(cbuf, Assembler::REX_WRB);
3359       }
3360     }
3361     emit_opcode(cbuf, 0x3B);
3362     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3363 
3364     // movl $dst, -1
3365     if (dstenc >= 8) {
3366       emit_opcode(cbuf, Assembler::REX_B);
3367     }
3368     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3369     emit_d32(cbuf, -1);
3370 
3371     // jl,s done
3372     emit_opcode(cbuf, 0x7C);
3373     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3374 
3375     // setne $dst
3376     if (dstenc >= 4) {
3377       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3378     }
3379     emit_opcode(cbuf, 0x0F);
3380     emit_opcode(cbuf, 0x95);
3381     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3382 
3383     // movzbl $dst, $dst
3384     if (dstenc >= 4) {
3385       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3386     }
3387     emit_opcode(cbuf, 0x0F);
3388     emit_opcode(cbuf, 0xB6);
3389     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3390   %}
3391 
3392   enc_class Push_ResultXD(regD dst) %{
3393     int dstenc = $dst$$reg;
3394 
3395     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3396 
3397     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3398     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3399     if (dstenc >= 8) {
3400       emit_opcode(cbuf, Assembler::REX_R);
3401     }
3402     emit_opcode  (cbuf, 0x0F );
3403     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3404     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3405 
3406     // add rsp,8
3407     emit_opcode(cbuf, Assembler::REX_W);
3408     emit_opcode(cbuf,0x83);
3409     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3410     emit_d8(cbuf,0x08);
3411   %}
3412 
3413   enc_class Push_SrcXD(regD src) %{
3414     int srcenc = $src$$reg;
3415 
3416     // subq rsp,#8
3417     emit_opcode(cbuf, Assembler::REX_W);
3418     emit_opcode(cbuf, 0x83);
3419     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3420     emit_d8(cbuf, 0x8);
3421 
3422     // movsd [rsp],src
3423     emit_opcode(cbuf, 0xF2);
3424     if (srcenc >= 8) {
3425       emit_opcode(cbuf, Assembler::REX_R);
3426     }
3427     emit_opcode(cbuf, 0x0F);
3428     emit_opcode(cbuf, 0x11);
3429     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3430 
3431     // fldd [rsp]
3432     emit_opcode(cbuf, 0x66);
3433     emit_opcode(cbuf, 0xDD);
3434     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3435   %}
3436 
3437 
3438   enc_class movq_ld(regD dst, memory mem) %{
3439     MacroAssembler _masm(&cbuf);
3440     __ movq($dst$$XMMRegister, $mem$$Address);
3441   %}
3442 
3443   enc_class movq_st(memory mem, regD src) %{
3444     MacroAssembler _masm(&cbuf);
3445     __ movq($mem$$Address, $src$$XMMRegister);
3446   %}
3447 
3448   enc_class pshufd_8x8(regF dst, regF src) %{
3449     MacroAssembler _masm(&cbuf);
3450 
3451     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3452     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3453     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3454   %}
3455 
3456   enc_class pshufd_4x16(regF dst, regF src) %{
3457     MacroAssembler _masm(&cbuf);
3458 
3459     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3460   %}
3461 
3462   enc_class pshufd(regD dst, regD src, int mode) %{
3463     MacroAssembler _masm(&cbuf);
3464 
3465     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3466   %}
3467 
3468   enc_class pxor(regD dst, regD src) %{
3469     MacroAssembler _masm(&cbuf);
3470 
3471     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3472   %}
3473 
3474   enc_class mov_i2x(regD dst, rRegI src) %{
3475     MacroAssembler _masm(&cbuf);
3476 
3477     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3478   %}
3479 
3480   // obj: object to lock
3481   // box: box address (header location) -- killed
3482   // tmp: rax -- killed
3483   // scr: rbx -- killed
3484   //
3485   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3486   // from i486.ad.  See that file for comments.
3487   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3488   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3489 
3490 
3491   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3492   %{
3493     Register objReg = as_Register((int)$obj$$reg);
3494     Register boxReg = as_Register((int)$box$$reg);
3495     Register tmpReg = as_Register($tmp$$reg);
3496     Register scrReg = as_Register($scr$$reg);
3497     MacroAssembler masm(&cbuf);
3498 
3499     // Verify uniqueness of register assignments -- necessary but not sufficient
3500     assert (objReg != boxReg && objReg != tmpReg &&
3501             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3502 
3503     if (_counters != NULL) {
3504       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3505     }
3506     if (EmitSync & 1) {
3507         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3508         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3509         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3510     } else
3511     if (EmitSync & 2) {
3512         Label DONE_LABEL;
3513         if (UseBiasedLocking) {
3514            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3515           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3516         }
3517         // QQQ was movl...
3518         masm.movptr(tmpReg, 0x1);
3519         masm.orptr(tmpReg, Address(objReg, 0));
3520         masm.movptr(Address(boxReg, 0), tmpReg);
3521         if (os::is_MP()) {
3522           masm.lock();
3523         }
3524         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3525         masm.jcc(Assembler::equal, DONE_LABEL);
3526 
3527         // Recursive locking
3528         masm.subptr(tmpReg, rsp);
3529         masm.andptr(tmpReg, 7 - os::vm_page_size());
3530         masm.movptr(Address(boxReg, 0), tmpReg);
3531 
3532         masm.bind(DONE_LABEL);
3533         masm.nop(); // avoid branch to branch
3534     } else {
3535         Label DONE_LABEL, IsInflated, Egress;
3536 
3537         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3538         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3539         masm.jcc   (Assembler::notZero, IsInflated) ; 
3540          
3541         // it's stack-locked, biased or neutral
3542         // TODO: optimize markword triage order to reduce the number of
3543         // conditional branches in the most common cases.
3544         // Beware -- there's a subtle invariant that fetch of the markword
3545         // at [FETCH], below, will never observe a biased encoding (*101b).
3546         // If this invariant is not held we'll suffer exclusion (safety) failure.
3547 
3548         if (UseBiasedLocking && !UseOptoBiasInlining) {
3549           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3550           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3551         }
3552 
3553         // was q will it destroy high?
3554         masm.orl   (tmpReg, 1) ; 
3555         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3556         if (os::is_MP()) { masm.lock(); } 
3557         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3558         if (_counters != NULL) {
3559            masm.cond_inc32(Assembler::equal,
3560                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3561         }
3562         masm.jcc   (Assembler::equal, DONE_LABEL);
3563 
3564         // Recursive locking
3565         masm.subptr(tmpReg, rsp);
3566         masm.andptr(tmpReg, 7 - os::vm_page_size());
3567         masm.movptr(Address(boxReg, 0), tmpReg);
3568         if (_counters != NULL) {
3569            masm.cond_inc32(Assembler::equal,
3570                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3571         }
3572         masm.jmp   (DONE_LABEL) ;
3573 
3574         masm.bind  (IsInflated) ;
3575         // It's inflated
3576 
3577         // TODO: someday avoid the ST-before-CAS penalty by
3578         // relocating (deferring) the following ST.
3579         // We should also think about trying a CAS without having
3580         // fetched _owner.  If the CAS is successful we may
3581         // avoid an RTO->RTS upgrade on the $line.
3582         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3583         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3584 
3585         masm.mov    (boxReg, tmpReg) ; 
3586         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3587         masm.testptr(tmpReg, tmpReg) ;   
3588         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3589 
3590         // It's inflated and appears unlocked
3591         if (os::is_MP()) { masm.lock(); } 
3592         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3593         // Intentional fall-through into DONE_LABEL ...
3594 
3595         masm.bind  (DONE_LABEL) ;
3596         masm.nop   () ;                 // avoid jmp to jmp
3597     }
3598   %}
3599 
3600   // obj: object to unlock
3601   // box: box address (displaced header location), killed
3602   // RBX: killed tmp; cannot be obj nor box
3603   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3604   %{
3605 
3606     Register objReg = as_Register($obj$$reg);
3607     Register boxReg = as_Register($box$$reg);
3608     Register tmpReg = as_Register($tmp$$reg);
3609     MacroAssembler masm(&cbuf);
3610 
3611     if (EmitSync & 4) { 
3612        masm.cmpptr(rsp, 0) ; 
3613     } else
3614     if (EmitSync & 8) {
3615        Label DONE_LABEL;
3616        if (UseBiasedLocking) {
3617          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3618        }
3619 
3620        // Check whether the displaced header is 0
3621        //(=> recursive unlock)
3622        masm.movptr(tmpReg, Address(boxReg, 0));
3623        masm.testptr(tmpReg, tmpReg);
3624        masm.jcc(Assembler::zero, DONE_LABEL);
3625 
3626        // If not recursive lock, reset the header to displaced header
3627        if (os::is_MP()) {
3628          masm.lock();
3629        }
3630        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3631        masm.bind(DONE_LABEL);
3632        masm.nop(); // avoid branch to branch
3633     } else {
3634        Label DONE_LABEL, Stacked, CheckSucc ;
3635 
3636        if (UseBiasedLocking && !UseOptoBiasInlining) {
3637          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3638        }
3639         
3640        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3641        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3642        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3643        masm.testl (tmpReg, 0x02) ; 
3644        masm.jcc   (Assembler::zero, Stacked) ; 
3645         
3646        // It's inflated
3647        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3648        masm.xorptr(boxReg, r15_thread) ; 
3649        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3650        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3651        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3652        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3653        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3654        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3655        masm.jmp   (DONE_LABEL) ; 
3656         
3657        if ((EmitSync & 65536) == 0) { 
3658          Label LSuccess, LGoSlowPath ;
3659          masm.bind  (CheckSucc) ;
3660          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3661          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3662 
3663          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3664          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3665          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3666          // are all faster when the write buffer is populated.
3667          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3668          if (os::is_MP()) {
3669             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3670          }
3671          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3672          masm.jcc   (Assembler::notZero, LSuccess) ;
3673 
3674          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3675          if (os::is_MP()) { masm.lock(); }
3676          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3677          masm.jcc   (Assembler::notEqual, LSuccess) ;
3678          // Intentional fall-through into slow-path
3679 
3680          masm.bind  (LGoSlowPath) ;
3681          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3682          masm.jmp   (DONE_LABEL) ;
3683 
3684          masm.bind  (LSuccess) ;
3685          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3686          masm.jmp   (DONE_LABEL) ;
3687        }
3688 
3689        masm.bind  (Stacked) ; 
3690        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3691        if (os::is_MP()) { masm.lock(); } 
3692        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3693 
3694        if (EmitSync & 65536) {
3695           masm.bind (CheckSucc) ;
3696        }
3697        masm.bind(DONE_LABEL);
3698        if (EmitSync & 32768) {
3699           masm.nop();                      // avoid branch to branch
3700        }
3701     }
3702   %}
3703 
3704   enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
3705                         rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
3706     Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3707           POP_LABEL, DONE_LABEL, CONT_LABEL,
3708           WHILE_HEAD_LABEL;
3709     MacroAssembler masm(&cbuf);
3710 
3711     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3712     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
3713 
3714     // Get the first character position in both strings
3715     //         [8] char array, [12] offset, [16] count
3716     int value_offset  = java_lang_String::value_offset_in_bytes();
3717     int offset_offset = java_lang_String::offset_offset_in_bytes();
3718     int count_offset  = java_lang_String::count_offset_in_bytes();
3719     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3720 
3721     masm.load_heap_oop(rax, Address(rsi, value_offset));
3722     masm.movl(rcx, Address(rsi, offset_offset));
3723     masm.lea(rax, Address(rax, rcx, Address::times_2, base_offset));
3724     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3725     masm.movl(rcx, Address(rdi, offset_offset));
3726     masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3727 
3728     // Compute the minimum of the string lengths(rsi) and the
3729     // difference of the string lengths (stack)
3730 
3731     // do the conditional move stuff
3732     masm.movl(rdi, Address(rdi, count_offset));
3733     masm.movl(rsi, Address(rsi, count_offset));
3734     masm.movl(rcx, rdi);
3735     masm.subl(rdi, rsi);
3736     masm.push(rdi);
3737     masm.cmov(Assembler::lessEqual, rsi, rcx);
3738 
3739     // Is the minimum length zero?
3740     masm.bind(RCX_GOOD_LABEL);
3741     masm.testl(rsi, rsi);
3742     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3743 
3744     // Load first characters
3745     masm.load_unsigned_short(rcx, Address(rbx, 0));
3746     masm.load_unsigned_short(rdi, Address(rax, 0));
3747 
3748     // Compare first characters
3749     masm.subl(rcx, rdi);
3750     masm.jcc(Assembler::notZero,  POP_LABEL);
3751     masm.decrementl(rsi);
3752     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3753 
3754     {
3755       // Check after comparing first character to see if strings are equivalent
3756       Label LSkip2;
3757       // Check if the strings start at same location
3758       masm.cmpptr(rbx, rax);
3759       masm.jccb(Assembler::notEqual, LSkip2);
3760 
3761       // Check if the length difference is zero (from stack)
3762       masm.cmpl(Address(rsp, 0), 0x0);
3763       masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3764 
3765       // Strings might not be equivalent
3766       masm.bind(LSkip2);
3767     }
3768 
3769     // Advance to next character
3770     masm.addptr(rax, 2);
3771     masm.addptr(rbx, 2);
3772 
3773     if (UseSSE42Intrinsics) {
3774       // With SSE4.2, use double quad vector compare
3775       Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3776       // Setup to compare 16-byte vectors
3777       masm.movl(rdi, rsi);
3778       masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
3779       masm.andl(rdi, 0x00000007); // rdi holds the tail count
3780       masm.testl(rsi, rsi);
3781       masm.jccb(Assembler::zero, COMPARE_TAIL);
3782 
3783       masm.lea(rax, Address(rax, rsi, Address::times_2));
3784       masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3785       masm.negptr(rsi);
3786 
3787       masm.bind(COMPARE_VECTORS);
3788       masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
3789       masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
3790       masm.pxor(tmp1Reg, tmp2Reg);
3791       masm.ptest(tmp1Reg, tmp1Reg);
3792       masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
3793       masm.addptr(rsi, 8);
3794       masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3795       masm.jmpb(COMPARE_TAIL);
3796 
3797       // Mismatched characters in the vectors
3798       masm.bind(VECTOR_NOT_EQUAL);
3799       masm.lea(rax, Address(rax, rsi, Address::times_2));
3800       masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3801       masm.movl(rdi, 8);
3802 
3803       // Compare tail (< 8 chars), or rescan last vectors to
3804       // find 1st mismatched characters
3805       masm.bind(COMPARE_TAIL);
3806       masm.testl(rdi, rdi);
3807       masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3808       masm.movl(rsi, rdi);
3809       // Fallthru to tail compare
3810     }
3811 
3812     // Shift RAX and RBX to the end of the arrays, negate min
3813     masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
3814     masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
3815     masm.negptr(rsi);
3816 
3817     // Compare the rest of the characters
3818     masm.bind(WHILE_HEAD_LABEL);
3819     masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
3820     masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
3821     masm.subl(rcx, rdi);
3822     masm.jccb(Assembler::notZero, POP_LABEL);
3823     masm.increment(rsi);
3824     masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3825 
3826     // Strings are equal up to min length.  Return the length difference.
3827     masm.bind(LENGTH_DIFF_LABEL);
3828     masm.pop(rcx);
3829     masm.jmpb(DONE_LABEL);
3830 
3831     // Discard the stored length difference
3832     masm.bind(POP_LABEL);
3833     masm.addptr(rsp, 8);
3834 
3835     // That's it
3836     masm.bind(DONE_LABEL);
3837   %}
3838 
3839  enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
3840                         rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{
3841     // SSE4.2 version
3842     Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
3843           SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
3844     MacroAssembler masm(&cbuf);
3845 
3846     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3847 
3848     // Get the first character position in both strings
3849     //         [8] char array, [12] offset, [16] count
3850     int value_offset  = java_lang_String::value_offset_in_bytes();
3851     int offset_offset = java_lang_String::offset_offset_in_bytes();
3852     int count_offset  = java_lang_String::count_offset_in_bytes();
3853     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3854 
3855     // Get counts for string and substr
3856     masm.movl(rdx, Address(rsi, count_offset));
3857     masm.movl(rax, Address(rdi, count_offset));
3858     // Check for substr count > string count
3859     masm.cmpl(rax, rdx);
3860     masm.jcc(Assembler::greater, RET_NEG_ONE);
3861 
3862     // Start the indexOf operation
3863     // Get start addr of string
3864     masm.load_heap_oop(rbx, Address(rsi, value_offset));
3865     masm.movl(rcx, Address(rsi, offset_offset));
3866     masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
3867     masm.push(rsi);
3868 
3869     // Get start addr of substr
3870     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3871     masm.movl(rcx, Address(rdi, offset_offset));
3872     masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
3873     masm.push(rdi);
3874     masm.push(rax);
3875     masm.jmpb(PREP_FOR_SCAN);
3876 
3877     // Substr count saved at sp
3878     // Substr saved at sp+8
3879     // String saved at sp+16
3880 
3881     // Prep to load substr for scan
3882     masm.bind(LOAD_SUBSTR);
3883     masm.movptr(rdi, Address(rsp, 8));
3884     masm.movl(rax, Address(rsp, 0));
3885 
3886     // Load substr
3887     masm.bind(PREP_FOR_SCAN);
3888     masm.movdqu(tmp1Reg, Address(rdi, 0));
3889     masm.addq(rdx, 8);    // prime the loop
3890     masm.subptr(rsi, 16);
3891 
3892     // Scan string for substr in 16-byte vectors
3893     masm.bind(SCAN_TO_SUBSTR);
3894     masm.subq(rdx, 8);
3895     masm.addptr(rsi, 16);
3896     masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3897     masm.jcc(Assembler::above, SCAN_TO_SUBSTR);
3898     masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND);
3899 
3900     // Fallthru: found a potential substr
3901 
3902     //Make sure string is still long enough
3903     masm.subl(rdx, rcx);
3904     masm.cmpl(rdx, rax);
3905     masm.jccb(Assembler::negative, RET_NOT_FOUND);
3906     // Compute start addr of substr
3907     masm.lea(rsi, Address(rsi, rcx, Address::times_2));
3908     masm.movptr(rbx, rsi);
3909 
3910     // Compare potential substr
3911     masm.addq(rdx, 8);        // prime the loop
3912     masm.addq(rax, 8);
3913     masm.subptr(rsi, 16);
3914     masm.subptr(rdi, 16);
3915 
3916     // Scan 16-byte vectors of string and substr
3917     masm.bind(SCAN_SUBSTR);
3918     masm.subq(rax, 8);
3919     masm.subq(rdx, 8);
3920     masm.addptr(rsi, 16);
3921     masm.addptr(rdi, 16);
3922     masm.movdqu(tmp1Reg, Address(rdi, 0));
3923     masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3924     masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
3925     masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
3926 
3927     // Compute substr offset
3928     masm.movptr(rsi, Address(rsp, 16));
3929     masm.subptr(rbx, rsi);
3930     masm.shrl(rbx, 1);
3931     masm.jmpb(CLEANUP);
3932 
3933     masm.bind(RET_NEG_ONE);
3934     masm.movl(rbx, -1);
3935     masm.jmpb(DONE);
3936 
3937     masm.bind(RET_NOT_FOUND);
3938     masm.movl(rbx, -1);
3939 
3940     masm.bind(CLEANUP);
3941     masm.addptr(rsp, 24);
3942 
3943     masm.bind(DONE);
3944   %}
3945 
3946   enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
3947                               rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{
3948     Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
3949     MacroAssembler masm(&cbuf);
3950 
3951     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3952     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
3953 
3954     int value_offset  = java_lang_String::value_offset_in_bytes();
3955     int offset_offset = java_lang_String::offset_offset_in_bytes();
3956     int count_offset  = java_lang_String::count_offset_in_bytes();
3957     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3958 
3959     // does source == target string?
3960     masm.cmpptr(rdi, rsi);
3961     masm.jcc(Assembler::equal, RET_TRUE);
3962 
3963     // get and compare counts
3964     masm.movl(rcx, Address(rdi, count_offset));
3965     masm.movl(rax, Address(rsi, count_offset));
3966     masm.cmpl(rcx, rax);
3967     masm.jcc(Assembler::notEqual, RET_FALSE);
3968     masm.testl(rax, rax);
3969     masm.jcc(Assembler::zero, RET_TRUE);
3970 
3971     // get source string offset and value
3972     masm.load_heap_oop(rbx, Address(rsi, value_offset));
3973     masm.movl(rax, Address(rsi, offset_offset));
3974     masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset));
3975 
3976     // get compare string offset and value
3977     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3978     masm.movl(rax, Address(rdi, offset_offset));
3979     masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset));
3980 
3981     // Set byte count
3982     masm.shll(rcx, 1);
3983     masm.movl(rax, rcx);
3984 
3985     if (UseSSE42Intrinsics) {
3986       // With SSE4.2, use double quad vector compare
3987       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3988       // Compare 16-byte vectors
3989       masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
3990       masm.andl(rax, 0x0000000e);  // tail count (in bytes)
3991       masm.testl(rcx, rcx);
3992       masm.jccb(Assembler::zero, COMPARE_TAIL);
3993       masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3994       masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3995       masm.negptr(rcx);
3996 
3997       masm.bind(COMPARE_WIDE_VECTORS);
3998       masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
3999       masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
4000       masm.pxor(tmp1Reg, tmp2Reg);
4001       masm.ptest(tmp1Reg, tmp1Reg);
4002       masm.jccb(Assembler::notZero, RET_FALSE);
4003       masm.addptr(rcx, 16);
4004       masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4005       masm.bind(COMPARE_TAIL);
4006       masm.movl(rcx, rax);
4007       // Fallthru to tail compare
4008     }
4009 
4010     // Compare 4-byte vectors
4011     masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
4012     masm.andl(rax, 0x00000002);  // tail char (in bytes)
4013     masm.testl(rcx, rcx);
4014     masm.jccb(Assembler::zero, COMPARE_CHAR);
4015     masm.lea(rdi, Address(rdi, rcx, Address::times_1));
4016     masm.lea(rsi, Address(rsi, rcx, Address::times_1));
4017     masm.negptr(rcx);
4018 
4019     masm.bind(COMPARE_VECTORS);
4020     masm.movl(rbx, Address(rdi, rcx, Address::times_1));
4021     masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
4022     masm.jccb(Assembler::notEqual, RET_FALSE);
4023     masm.addptr(rcx, 4);
4024     masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4025 
4026     // Compare trailing char (final 2 bytes), if any
4027     masm.bind(COMPARE_CHAR);
4028     masm.testl(rax, rax);
4029     masm.jccb(Assembler::zero, RET_TRUE);
4030     masm.load_unsigned_short(rbx, Address(rdi, 0));
4031     masm.load_unsigned_short(rcx, Address(rsi, 0));
4032     masm.cmpl(rbx, rcx);
4033     masm.jccb(Assembler::notEqual, RET_FALSE);
4034 
4035     masm.bind(RET_TRUE);
4036     masm.movl(rax, 1);   // return true
4037     masm.jmpb(DONE);
4038 
4039     masm.bind(RET_FALSE);
4040     masm.xorl(rax, rax); // return false
4041 
4042     masm.bind(DONE);
4043   %}
4044 
4045   enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2,
4046                              rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
4047     Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
4048     MacroAssembler masm(&cbuf);
4049 
4050     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
4051     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
4052     Register ary1Reg      = as_Register($ary1$$reg);
4053     Register ary2Reg      = as_Register($ary2$$reg);
4054     Register tmp3Reg      = as_Register($tmp3$$reg);
4055     Register tmp4Reg      = as_Register($tmp4$$reg);
4056     Register resultReg    = as_Register($result$$reg);
4057 
4058     int length_offset  = arrayOopDesc::length_offset_in_bytes();
4059     int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
4060 
4061     // Check the input args
4062     masm.cmpq(ary1Reg, ary2Reg);
4063     masm.jcc(Assembler::equal, TRUE_LABEL);
4064     masm.testq(ary1Reg, ary1Reg);
4065     masm.jcc(Assembler::zero, FALSE_LABEL);
4066     masm.testq(ary2Reg, ary2Reg);
4067     masm.jcc(Assembler::zero, FALSE_LABEL);
4068 
4069     // Check the lengths
4070     masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
4071     masm.movl(resultReg, Address(ary2Reg, length_offset));
4072     masm.cmpl(tmp4Reg, resultReg);
4073     masm.jcc(Assembler::notEqual, FALSE_LABEL);
4074     masm.testl(resultReg, resultReg);
4075     masm.jcc(Assembler::zero, TRUE_LABEL);
4076 
4077     //load array address
4078     masm.lea(ary1Reg, Address(ary1Reg, base_offset));
4079     masm.lea(ary2Reg, Address(ary2Reg, base_offset));
4080 
4081     //set byte count
4082     masm.shll(tmp4Reg, 1);
4083     masm.movl(resultReg,tmp4Reg);
4084 
4085     if (UseSSE42Intrinsics){
4086       // With SSE4.2, use double quad vector compare
4087       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
4088       // Compare 16-byte vectors
4089       masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
4090       masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
4091       masm.testl(tmp4Reg, tmp4Reg);
4092       masm.jccb(Assembler::zero, COMPARE_TAIL);
4093       masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4094       masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4095       masm.negptr(tmp4Reg);
4096 
4097       masm.bind(COMPARE_WIDE_VECTORS);
4098       masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4099       masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4100       masm.pxor(tmp1Reg, tmp2Reg);
4101       masm.ptest(tmp1Reg, tmp1Reg);
4102 
4103       masm.jccb(Assembler::notZero, FALSE_LABEL);
4104       masm.addptr(tmp4Reg, 16);
4105       masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4106       masm.bind(COMPARE_TAIL);
4107       masm.movl(tmp4Reg, resultReg);
4108       // Fallthru to tail compare
4109     }
4110 
4111    // Compare 4-byte vectors
4112     masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
4113     masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
4114     masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char
4115     masm.jccb(Assembler::zero, COMPARE_CHAR);
4116     masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4117     masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4118     masm.negptr(tmp4Reg);
4119 
4120     masm.bind(COMPARE_VECTORS);
4121     masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4122     masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4123     masm.jccb(Assembler::notEqual, FALSE_LABEL);
4124     masm.addptr(tmp4Reg, 4);
4125     masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4126 
4127     // Compare trailing char (final 2 bytes), if any
4128     masm.bind(COMPARE_CHAR);
4129     masm.testl(resultReg, resultReg);
4130     masm.jccb(Assembler::zero, TRUE_LABEL);
4131     masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
4132     masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
4133     masm.cmpl(tmp3Reg, tmp4Reg);
4134     masm.jccb(Assembler::notEqual, FALSE_LABEL);
4135 
4136     masm.bind(TRUE_LABEL);
4137     masm.movl(resultReg, 1);   // return true
4138     masm.jmpb(DONE);
4139 
4140     masm.bind(FALSE_LABEL);
4141     masm.xorl(resultReg, resultReg); // return false
4142 
4143     // That's it
4144     masm.bind(DONE);
4145   %}
4146 
4147   enc_class enc_rethrow()
4148   %{
4149     cbuf.set_inst_mark();
4150     emit_opcode(cbuf, 0xE9); // jmp entry
4151     emit_d32_reloc(cbuf,
4152                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
4153                    runtime_call_Relocation::spec(),
4154                    RELOC_DISP32);
4155   %}
4156 
4157   enc_class absF_encoding(regF dst)
4158   %{
4159     int dstenc = $dst$$reg;
4160     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
4161 
4162     cbuf.set_inst_mark();
4163     if (dstenc >= 8) {
4164       emit_opcode(cbuf, Assembler::REX_R);
4165       dstenc -= 8;
4166     }
4167     // XXX reg_mem doesn't support RIP-relative addressing yet
4168     emit_opcode(cbuf, 0x0F);
4169     emit_opcode(cbuf, 0x54);
4170     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4171     emit_d32_reloc(cbuf, signmask_address);
4172   %}
4173 
4174   enc_class absD_encoding(regD dst)
4175   %{
4176     int dstenc = $dst$$reg;
4177     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
4178 
4179     cbuf.set_inst_mark();
4180     emit_opcode(cbuf, 0x66);
4181     if (dstenc >= 8) {
4182       emit_opcode(cbuf, Assembler::REX_R);
4183       dstenc -= 8;
4184     }
4185     // XXX reg_mem doesn't support RIP-relative addressing yet
4186     emit_opcode(cbuf, 0x0F);
4187     emit_opcode(cbuf, 0x54);
4188     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4189     emit_d32_reloc(cbuf, signmask_address);
4190   %}
4191 
4192   enc_class negF_encoding(regF dst)
4193   %{
4194     int dstenc = $dst$$reg;
4195     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
4196 
4197     cbuf.set_inst_mark();
4198     if (dstenc >= 8) {
4199       emit_opcode(cbuf, Assembler::REX_R);
4200       dstenc -= 8;
4201     }
4202     // XXX reg_mem doesn't support RIP-relative addressing yet
4203     emit_opcode(cbuf, 0x0F);
4204     emit_opcode(cbuf, 0x57);
4205     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4206     emit_d32_reloc(cbuf, signflip_address);
4207   %}
4208 
4209   enc_class negD_encoding(regD dst)
4210   %{
4211     int dstenc = $dst$$reg;
4212     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
4213 
4214     cbuf.set_inst_mark();
4215     emit_opcode(cbuf, 0x66);
4216     if (dstenc >= 8) {
4217       emit_opcode(cbuf, Assembler::REX_R);
4218       dstenc -= 8;
4219     }
4220     // XXX reg_mem doesn't support RIP-relative addressing yet
4221     emit_opcode(cbuf, 0x0F);
4222     emit_opcode(cbuf, 0x57);
4223     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4224     emit_d32_reloc(cbuf, signflip_address);
4225   %}
4226 
4227   enc_class f2i_fixup(rRegI dst, regF src)
4228   %{
4229     int dstenc = $dst$$reg;
4230     int srcenc = $src$$reg;
4231 
4232     // cmpl $dst, #0x80000000
4233     if (dstenc >= 8) {
4234       emit_opcode(cbuf, Assembler::REX_B);
4235     }
4236     emit_opcode(cbuf, 0x81);
4237     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4238     emit_d32(cbuf, 0x80000000);
4239 
4240     // jne,s done
4241     emit_opcode(cbuf, 0x75);
4242     if (srcenc < 8 && dstenc < 8) {
4243       emit_d8(cbuf, 0xF);
4244     } else if (srcenc >= 8 && dstenc >= 8) {
4245       emit_d8(cbuf, 0x11);
4246     } else {
4247       emit_d8(cbuf, 0x10);
4248     }
4249 
4250     // subq rsp, #8
4251     emit_opcode(cbuf, Assembler::REX_W);
4252     emit_opcode(cbuf, 0x83);
4253     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4254     emit_d8(cbuf, 8);
4255 
4256     // movss [rsp], $src
4257     emit_opcode(cbuf, 0xF3);
4258     if (srcenc >= 8) {
4259       emit_opcode(cbuf, Assembler::REX_R);
4260     }
4261     emit_opcode(cbuf, 0x0F);
4262     emit_opcode(cbuf, 0x11);
4263     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4264 
4265     // call f2i_fixup
4266     cbuf.set_inst_mark();
4267     emit_opcode(cbuf, 0xE8);
4268     emit_d32_reloc(cbuf,
4269                    (int)
4270                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
4271                    runtime_call_Relocation::spec(),
4272                    RELOC_DISP32);
4273 
4274     // popq $dst
4275     if (dstenc >= 8) {
4276       emit_opcode(cbuf, Assembler::REX_B);
4277     }
4278     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4279 
4280     // done:
4281   %}
4282 
4283   enc_class f2l_fixup(rRegL dst, regF src)
4284   %{
4285     int dstenc = $dst$$reg;
4286     int srcenc = $src$$reg;
4287     address const_address = (address) StubRoutines::x86::double_sign_flip();
4288 
4289     // cmpq $dst, [0x8000000000000000]
4290     cbuf.set_inst_mark();
4291     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4292     emit_opcode(cbuf, 0x39);
4293     // XXX reg_mem doesn't support RIP-relative addressing yet
4294     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4295     emit_d32_reloc(cbuf, const_address);
4296 
4297 
4298     // jne,s done
4299     emit_opcode(cbuf, 0x75);
4300     if (srcenc < 8 && dstenc < 8) {
4301       emit_d8(cbuf, 0xF);
4302     } else if (srcenc >= 8 && dstenc >= 8) {
4303       emit_d8(cbuf, 0x11);
4304     } else {
4305       emit_d8(cbuf, 0x10);
4306     }
4307 
4308     // subq rsp, #8
4309     emit_opcode(cbuf, Assembler::REX_W);
4310     emit_opcode(cbuf, 0x83);
4311     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4312     emit_d8(cbuf, 8);
4313 
4314     // movss [rsp], $src
4315     emit_opcode(cbuf, 0xF3);
4316     if (srcenc >= 8) {
4317       emit_opcode(cbuf, Assembler::REX_R);
4318     }
4319     emit_opcode(cbuf, 0x0F);
4320     emit_opcode(cbuf, 0x11);
4321     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4322 
4323     // call f2l_fixup
4324     cbuf.set_inst_mark();
4325     emit_opcode(cbuf, 0xE8);
4326     emit_d32_reloc(cbuf,
4327                    (int)
4328                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
4329                    runtime_call_Relocation::spec(),
4330                    RELOC_DISP32);
4331 
4332     // popq $dst
4333     if (dstenc >= 8) {
4334       emit_opcode(cbuf, Assembler::REX_B);
4335     }
4336     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4337 
4338     // done:
4339   %}
4340 
4341   enc_class d2i_fixup(rRegI dst, regD src)
4342   %{
4343     int dstenc = $dst$$reg;
4344     int srcenc = $src$$reg;
4345 
4346     // cmpl $dst, #0x80000000
4347     if (dstenc >= 8) {
4348       emit_opcode(cbuf, Assembler::REX_B);
4349     }
4350     emit_opcode(cbuf, 0x81);
4351     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4352     emit_d32(cbuf, 0x80000000);
4353 
4354     // jne,s done
4355     emit_opcode(cbuf, 0x75);
4356     if (srcenc < 8 && dstenc < 8) {
4357       emit_d8(cbuf, 0xF);
4358     } else if (srcenc >= 8 && dstenc >= 8) {
4359       emit_d8(cbuf, 0x11);
4360     } else {
4361       emit_d8(cbuf, 0x10);
4362     }
4363 
4364     // subq rsp, #8
4365     emit_opcode(cbuf, Assembler::REX_W);
4366     emit_opcode(cbuf, 0x83);
4367     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4368     emit_d8(cbuf, 8);
4369 
4370     // movsd [rsp], $src
4371     emit_opcode(cbuf, 0xF2);
4372     if (srcenc >= 8) {
4373       emit_opcode(cbuf, Assembler::REX_R);
4374     }
4375     emit_opcode(cbuf, 0x0F);
4376     emit_opcode(cbuf, 0x11);
4377     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4378 
4379     // call d2i_fixup
4380     cbuf.set_inst_mark();
4381     emit_opcode(cbuf, 0xE8);
4382     emit_d32_reloc(cbuf,
4383                    (int)
4384                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
4385                    runtime_call_Relocation::spec(),
4386                    RELOC_DISP32);
4387 
4388     // popq $dst
4389     if (dstenc >= 8) {
4390       emit_opcode(cbuf, Assembler::REX_B);
4391     }
4392     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4393 
4394     // done:
4395   %}
4396 
4397   enc_class d2l_fixup(rRegL dst, regD src)
4398   %{
4399     int dstenc = $dst$$reg;
4400     int srcenc = $src$$reg;
4401     address const_address = (address) StubRoutines::x86::double_sign_flip();
4402 
4403     // cmpq $dst, [0x8000000000000000]
4404     cbuf.set_inst_mark();
4405     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4406     emit_opcode(cbuf, 0x39);
4407     // XXX reg_mem doesn't support RIP-relative addressing yet
4408     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4409     emit_d32_reloc(cbuf, const_address);
4410 
4411 
4412     // jne,s done
4413     emit_opcode(cbuf, 0x75);
4414     if (srcenc < 8 && dstenc < 8) {
4415       emit_d8(cbuf, 0xF);
4416     } else if (srcenc >= 8 && dstenc >= 8) {
4417       emit_d8(cbuf, 0x11);
4418     } else {
4419       emit_d8(cbuf, 0x10);
4420     }
4421 
4422     // subq rsp, #8
4423     emit_opcode(cbuf, Assembler::REX_W);
4424     emit_opcode(cbuf, 0x83);
4425     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4426     emit_d8(cbuf, 8);
4427 
4428     // movsd [rsp], $src
4429     emit_opcode(cbuf, 0xF2);
4430     if (srcenc >= 8) {
4431       emit_opcode(cbuf, Assembler::REX_R);
4432     }
4433     emit_opcode(cbuf, 0x0F);
4434     emit_opcode(cbuf, 0x11);
4435     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4436 
4437     // call d2l_fixup
4438     cbuf.set_inst_mark();
4439     emit_opcode(cbuf, 0xE8);
4440     emit_d32_reloc(cbuf,
4441                    (int)
4442                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4443                    runtime_call_Relocation::spec(),
4444                    RELOC_DISP32);
4445 
4446     // popq $dst
4447     if (dstenc >= 8) {
4448       emit_opcode(cbuf, Assembler::REX_B);
4449     }
4450     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4451 
4452     // done:
4453   %}
4454 
4455   // Safepoint Poll.  This polls the safepoint page, and causes an
4456   // exception if it is not readable. Unfortunately, it kills
4457   // RFLAGS in the process.
4458   enc_class enc_safepoint_poll
4459   %{
4460     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4461     // XXX reg_mem doesn't support RIP-relative addressing yet
4462     cbuf.set_inst_mark();
4463     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4464     emit_opcode(cbuf, 0x85); // testl
4465     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4466     // cbuf.inst_mark() is beginning of instruction
4467     emit_d32_reloc(cbuf, os::get_polling_page());
4468 //                    relocInfo::poll_type,
4469   %}
4470 %}
4471 
4472 
4473 
4474 //----------FRAME--------------------------------------------------------------
4475 // Definition of frame structure and management information.
4476 //
4477 //  S T A C K   L A Y O U T    Allocators stack-slot number
4478 //                             |   (to get allocators register number
4479 //  G  Owned by    |        |  v    add OptoReg::stack0())
4480 //  r   CALLER     |        |
4481 //  o     |        +--------+      pad to even-align allocators stack-slot
4482 //  w     V        |  pad0  |        numbers; owned by CALLER
4483 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4484 //  h     ^        |   in   |  5
4485 //        |        |  args  |  4   Holes in incoming args owned by SELF
4486 //  |     |        |        |  3
4487 //  |     |        +--------+
4488 //  V     |        | old out|      Empty on Intel, window on Sparc
4489 //        |    old |preserve|      Must be even aligned.
4490 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4491 //        |        |   in   |  3   area for Intel ret address
4492 //     Owned by    |preserve|      Empty on Sparc.
4493 //       SELF      +--------+
4494 //        |        |  pad2  |  2   pad to align old SP
4495 //        |        +--------+  1
4496 //        |        | locks  |  0
4497 //        |        +--------+----> OptoReg::stack0(), even aligned
4498 //        |        |  pad1  | 11   pad to align new SP
4499 //        |        +--------+
4500 //        |        |        | 10
4501 //        |        | spills |  9   spills
4502 //        V        |        |  8   (pad0 slot for callee)
4503 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4504 //        ^        |  out   |  7
4505 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4506 //     Owned by    +--------+
4507 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4508 //        |    new |preserve|      Must be even-aligned.
4509 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4510 //        |        |        |
4511 //
4512 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4513 //         known from SELF's arguments and the Java calling convention.
4514 //         Region 6-7 is determined per call site.
4515 // Note 2: If the calling convention leaves holes in the incoming argument
4516 //         area, those holes are owned by SELF.  Holes in the outgoing area
4517 //         are owned by the CALLEE.  Holes should not be nessecary in the
4518 //         incoming area, as the Java calling convention is completely under
4519 //         the control of the AD file.  Doubles can be sorted and packed to
4520 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4521 //         varargs C calling conventions.
4522 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4523 //         even aligned with pad0 as needed.
4524 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4525 //         region 6-11 is even aligned; it may be padded out more so that
4526 //         the region from SP to FP meets the minimum stack alignment.
4527 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4528 //         alignment.  Region 11, pad1, may be dynamically extended so that
4529 //         SP meets the minimum alignment.
4530 
4531 frame
4532 %{
4533   // What direction does stack grow in (assumed to be same for C & Java)
4534   stack_direction(TOWARDS_LOW);
4535 
4536   // These three registers define part of the calling convention
4537   // between compiled code and the interpreter.
4538   inline_cache_reg(RAX);                // Inline Cache Register
4539   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4540                                         // calling interpreter
4541 
4542   // Optional: name the operand used by cisc-spilling to access
4543   // [stack_pointer + offset]
4544   cisc_spilling_operand_name(indOffset32);
4545 
4546   // Number of stack slots consumed by locking an object
4547   sync_stack_slots(2);
4548 
4549   // Compiled code's Frame Pointer
4550   frame_pointer(RSP);
4551 
4552   // Interpreter stores its frame pointer in a register which is
4553   // stored to the stack by I2CAdaptors.
4554   // I2CAdaptors convert from interpreted java to compiled java.
4555   interpreter_frame_pointer(RBP);
4556 
4557   // Stack alignment requirement
4558   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4559 
4560   // Number of stack slots between incoming argument block and the start of
4561   // a new frame.  The PROLOG must add this many slots to the stack.  The
4562   // EPILOG must remove this many slots.  amd64 needs two slots for
4563   // return address.
4564   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4565 
4566   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4567   // for calls to C.  Supports the var-args backing area for register parms.
4568   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4569 
4570   // The after-PROLOG location of the return address.  Location of
4571   // return address specifies a type (REG or STACK) and a number
4572   // representing the register number (i.e. - use a register name) or
4573   // stack slot.
4574   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4575   // Otherwise, it is above the locks and verification slot and alignment word
4576   return_addr(STACK - 2 +
4577               round_to(2 + 2 * VerifyStackAtCalls +
4578                        Compile::current()->fixed_slots(),
4579                        WordsPerLong * 2));
4580 
4581   // Body of function which returns an integer array locating
4582   // arguments either in registers or in stack slots.  Passed an array
4583   // of ideal registers called "sig" and a "length" count.  Stack-slot
4584   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4585   // arguments for a CALLEE.  Incoming stack arguments are
4586   // automatically biased by the preserve_stack_slots field above.
4587 
4588   calling_convention
4589   %{
4590     // No difference between ingoing/outgoing just pass false
4591     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4592   %}
4593 
4594   c_calling_convention
4595   %{
4596     // This is obviously always outgoing
4597     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4598   %}
4599 
4600   // Location of compiled Java return values.  Same as C for now.
4601   return_value
4602   %{
4603     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4604            "only return normal values");
4605 
4606     static const int lo[Op_RegL + 1] = {
4607       0,
4608       0,
4609       RAX_num,  // Op_RegN
4610       RAX_num,  // Op_RegI
4611       RAX_num,  // Op_RegP
4612       XMM0_num, // Op_RegF
4613       XMM0_num, // Op_RegD
4614       RAX_num   // Op_RegL
4615     };
4616     static const int hi[Op_RegL + 1] = {
4617       0,
4618       0,
4619       OptoReg::Bad, // Op_RegN
4620       OptoReg::Bad, // Op_RegI
4621       RAX_H_num,    // Op_RegP
4622       OptoReg::Bad, // Op_RegF
4623       XMM0_H_num,   // Op_RegD
4624       RAX_H_num     // Op_RegL
4625     };
4626     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4627     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4628   %}
4629 %}
4630 
4631 //----------ATTRIBUTES---------------------------------------------------------
4632 //----------Operand Attributes-------------------------------------------------
4633 op_attrib op_cost(0);        // Required cost attribute
4634 
4635 //----------Instruction Attributes---------------------------------------------
4636 ins_attrib ins_cost(100);       // Required cost attribute
4637 ins_attrib ins_size(8);         // Required size attribute (in bits)
4638 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4639 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4640                                 // a non-matching short branch variant
4641                                 // of some long branch?
4642 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4643                                 // be a power of 2) specifies the
4644                                 // alignment that some part of the
4645                                 // instruction (not necessarily the
4646                                 // start) requires.  If > 1, a
4647                                 // compute_padding() function must be
4648                                 // provided for the instruction
4649 
4650 //----------OPERANDS-----------------------------------------------------------
4651 // Operand definitions must precede instruction definitions for correct parsing
4652 // in the ADLC because operands constitute user defined types which are used in
4653 // instruction definitions.
4654 
4655 //----------Simple Operands----------------------------------------------------
4656 // Immediate Operands
4657 // Integer Immediate
4658 operand immI()
4659 %{
4660   match(ConI);
4661 
4662   op_cost(10);
4663   format %{ %}
4664   interface(CONST_INTER);
4665 %}
4666 
4667 // Constant for test vs zero
4668 operand immI0()
4669 %{
4670   predicate(n->get_int() == 0);
4671   match(ConI);
4672 
4673   op_cost(0);
4674   format %{ %}
4675   interface(CONST_INTER);
4676 %}
4677 
4678 // Constant for increment
4679 operand immI1()
4680 %{
4681   predicate(n->get_int() == 1);
4682   match(ConI);
4683 
4684   op_cost(0);
4685   format %{ %}
4686   interface(CONST_INTER);
4687 %}
4688 
4689 // Constant for decrement
4690 operand immI_M1()
4691 %{
4692   predicate(n->get_int() == -1);
4693   match(ConI);
4694 
4695   op_cost(0);
4696   format %{ %}
4697   interface(CONST_INTER);
4698 %}
4699 
4700 // Valid scale values for addressing modes
4701 operand immI2()
4702 %{
4703   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4704   match(ConI);
4705 
4706   format %{ %}
4707   interface(CONST_INTER);
4708 %}
4709 
4710 operand immI8()
4711 %{
4712   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4713   match(ConI);
4714 
4715   op_cost(5);
4716   format %{ %}
4717   interface(CONST_INTER);
4718 %}
4719 
4720 operand immI16()
4721 %{
4722   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4723   match(ConI);
4724 
4725   op_cost(10);
4726   format %{ %}
4727   interface(CONST_INTER);
4728 %}
4729 
4730 // Constant for long shifts
4731 operand immI_32()
4732 %{
4733   predicate( n->get_int() == 32 );
4734   match(ConI);
4735 
4736   op_cost(0);
4737   format %{ %}
4738   interface(CONST_INTER);
4739 %}
4740 
4741 // Constant for long shifts
4742 operand immI_64()
4743 %{
4744   predicate( n->get_int() == 64 );
4745   match(ConI);
4746 
4747   op_cost(0);
4748   format %{ %}
4749   interface(CONST_INTER);
4750 %}
4751 
4752 // Pointer Immediate
4753 operand immP()
4754 %{
4755   match(ConP);
4756 
4757   op_cost(10);
4758   format %{ %}
4759   interface(CONST_INTER);
4760 %}
4761 
4762 // NULL Pointer Immediate
4763 operand immP0()
4764 %{
4765   predicate(n->get_ptr() == 0);
4766   match(ConP);
4767 
4768   op_cost(5);
4769   format %{ %}
4770   interface(CONST_INTER);
4771 %}
4772 
4773 // Pointer Immediate
4774 operand immN() %{
4775   match(ConN);
4776 
4777   op_cost(10);
4778   format %{ %}
4779   interface(CONST_INTER);
4780 %}
4781 
4782 // NULL Pointer Immediate
4783 operand immN0() %{
4784   predicate(n->get_narrowcon() == 0);
4785   match(ConN);
4786 
4787   op_cost(5);
4788   format %{ %}
4789   interface(CONST_INTER);
4790 %}
4791 
4792 operand immP31()
4793 %{
4794   predicate(!n->as_Type()->type()->isa_oopptr()
4795             && (n->get_ptr() >> 31) == 0);
4796   match(ConP);
4797 
4798   op_cost(5);
4799   format %{ %}
4800   interface(CONST_INTER);
4801 %}
4802 
4803 
4804 // Long Immediate
4805 operand immL()
4806 %{
4807   match(ConL);
4808 
4809   op_cost(20);
4810   format %{ %}
4811   interface(CONST_INTER);
4812 %}
4813 
4814 // Long Immediate 8-bit
4815 operand immL8()
4816 %{
4817   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4818   match(ConL);
4819 
4820   op_cost(5);
4821   format %{ %}
4822   interface(CONST_INTER);
4823 %}
4824 
4825 // Long Immediate 32-bit unsigned
4826 operand immUL32()
4827 %{
4828   predicate(n->get_long() == (unsigned int) (n->get_long()));
4829   match(ConL);
4830 
4831   op_cost(10);
4832   format %{ %}
4833   interface(CONST_INTER);
4834 %}
4835 
4836 // Long Immediate 32-bit signed
4837 operand immL32()
4838 %{
4839   predicate(n->get_long() == (int) (n->get_long()));
4840   match(ConL);
4841 
4842   op_cost(15);
4843   format %{ %}
4844   interface(CONST_INTER);
4845 %}
4846 
4847 // Long Immediate zero
4848 operand immL0()
4849 %{
4850   predicate(n->get_long() == 0L);
4851   match(ConL);
4852 
4853   op_cost(10);
4854   format %{ %}
4855   interface(CONST_INTER);
4856 %}
4857 
4858 // Constant for increment
4859 operand immL1()
4860 %{
4861   predicate(n->get_long() == 1);
4862   match(ConL);
4863 
4864   format %{ %}
4865   interface(CONST_INTER);
4866 %}
4867 
4868 // Constant for decrement
4869 operand immL_M1()
4870 %{
4871   predicate(n->get_long() == -1);
4872   match(ConL);
4873 
4874   format %{ %}
4875   interface(CONST_INTER);
4876 %}
4877 
4878 // Long Immediate: the value 10
4879 operand immL10()
4880 %{
4881   predicate(n->get_long() == 10);
4882   match(ConL);
4883 
4884   format %{ %}
4885   interface(CONST_INTER);
4886 %}
4887 
4888 // Long immediate from 0 to 127.
4889 // Used for a shorter form of long mul by 10.
4890 operand immL_127()
4891 %{
4892   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4893   match(ConL);
4894 
4895   op_cost(10);
4896   format %{ %}
4897   interface(CONST_INTER);
4898 %}
4899 
4900 // Long Immediate: low 32-bit mask
4901 operand immL_32bits()
4902 %{
4903   predicate(n->get_long() == 0xFFFFFFFFL);
4904   match(ConL);
4905   op_cost(20);
4906 
4907   format %{ %}
4908   interface(CONST_INTER);
4909 %}
4910 
4911 // Float Immediate zero
4912 operand immF0()
4913 %{
4914   predicate(jint_cast(n->getf()) == 0);
4915   match(ConF);
4916 
4917   op_cost(5);
4918   format %{ %}
4919   interface(CONST_INTER);
4920 %}
4921 
4922 // Float Immediate
4923 operand immF()
4924 %{
4925   match(ConF);
4926 
4927   op_cost(15);
4928   format %{ %}
4929   interface(CONST_INTER);
4930 %}
4931 
4932 // Double Immediate zero
4933 operand immD0()
4934 %{
4935   predicate(jlong_cast(n->getd()) == 0);
4936   match(ConD);
4937 
4938   op_cost(5);
4939   format %{ %}
4940   interface(CONST_INTER);
4941 %}
4942 
4943 // Double Immediate
4944 operand immD()
4945 %{
4946   match(ConD);
4947 
4948   op_cost(15);
4949   format %{ %}
4950   interface(CONST_INTER);
4951 %}
4952 
4953 // Immediates for special shifts (sign extend)
4954 
4955 // Constants for increment
4956 operand immI_16()
4957 %{
4958   predicate(n->get_int() == 16);
4959   match(ConI);
4960 
4961   format %{ %}
4962   interface(CONST_INTER);
4963 %}
4964 
4965 operand immI_24()
4966 %{
4967   predicate(n->get_int() == 24);
4968   match(ConI);
4969 
4970   format %{ %}
4971   interface(CONST_INTER);
4972 %}
4973 
4974 // Constant for byte-wide masking
4975 operand immI_255()
4976 %{
4977   predicate(n->get_int() == 255);
4978   match(ConI);
4979 
4980   format %{ %}
4981   interface(CONST_INTER);
4982 %}
4983 
4984 // Constant for short-wide masking
4985 operand immI_65535()
4986 %{
4987   predicate(n->get_int() == 65535);
4988   match(ConI);
4989 
4990   format %{ %}
4991   interface(CONST_INTER);
4992 %}
4993 
4994 // Constant for byte-wide masking
4995 operand immL_255()
4996 %{
4997   predicate(n->get_long() == 255);
4998   match(ConL);
4999 
5000   format %{ %}
5001   interface(CONST_INTER);
5002 %}
5003 
5004 // Constant for short-wide masking
5005 operand immL_65535()
5006 %{
5007   predicate(n->get_long() == 65535);
5008   match(ConL);
5009 
5010   format %{ %}
5011   interface(CONST_INTER);
5012 %}
5013 
5014 // Register Operands
5015 // Integer Register
5016 operand rRegI()
5017 %{
5018   constraint(ALLOC_IN_RC(int_reg));
5019   match(RegI);
5020 
5021   match(rax_RegI);
5022   match(rbx_RegI);
5023   match(rcx_RegI);
5024   match(rdx_RegI);
5025   match(rdi_RegI);
5026 
5027   format %{ %}
5028   interface(REG_INTER);
5029 %}
5030 
5031 // Special Registers
5032 operand rax_RegI()
5033 %{
5034   constraint(ALLOC_IN_RC(int_rax_reg));
5035   match(RegI);
5036   match(rRegI);
5037 
5038   format %{ "RAX" %}
5039   interface(REG_INTER);
5040 %}
5041 
5042 // Special Registers
5043 operand rbx_RegI()
5044 %{
5045   constraint(ALLOC_IN_RC(int_rbx_reg));
5046   match(RegI);
5047   match(rRegI);
5048 
5049   format %{ "RBX" %}
5050   interface(REG_INTER);
5051 %}
5052 
5053 operand rcx_RegI()
5054 %{
5055   constraint(ALLOC_IN_RC(int_rcx_reg));
5056   match(RegI);
5057   match(rRegI);
5058 
5059   format %{ "RCX" %}
5060   interface(REG_INTER);
5061 %}
5062 
5063 operand rdx_RegI()
5064 %{
5065   constraint(ALLOC_IN_RC(int_rdx_reg));
5066   match(RegI);
5067   match(rRegI);
5068 
5069   format %{ "RDX" %}
5070   interface(REG_INTER);
5071 %}
5072 
5073 operand rdi_RegI()
5074 %{
5075   constraint(ALLOC_IN_RC(int_rdi_reg));
5076   match(RegI);
5077   match(rRegI);
5078 
5079   format %{ "RDI" %}
5080   interface(REG_INTER);
5081 %}
5082 
5083 operand no_rcx_RegI()
5084 %{
5085   constraint(ALLOC_IN_RC(int_no_rcx_reg));
5086   match(RegI);
5087   match(rax_RegI);
5088   match(rbx_RegI);
5089   match(rdx_RegI);
5090   match(rdi_RegI);
5091 
5092   format %{ %}
5093   interface(REG_INTER);
5094 %}
5095 
5096 operand no_rax_rdx_RegI()
5097 %{
5098   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5099   match(RegI);
5100   match(rbx_RegI);
5101   match(rcx_RegI);
5102   match(rdi_RegI);
5103 
5104   format %{ %}
5105   interface(REG_INTER);
5106 %}
5107 
5108 // Pointer Register
5109 operand any_RegP()
5110 %{
5111   constraint(ALLOC_IN_RC(any_reg));
5112   match(RegP);
5113   match(rax_RegP);
5114   match(rbx_RegP);
5115   match(rdi_RegP);
5116   match(rsi_RegP);
5117   match(rbp_RegP);
5118   match(r15_RegP);
5119   match(rRegP);
5120 
5121   format %{ %}
5122   interface(REG_INTER);
5123 %}
5124 
5125 operand rRegP()
5126 %{
5127   constraint(ALLOC_IN_RC(ptr_reg));
5128   match(RegP);
5129   match(rax_RegP);
5130   match(rbx_RegP);
5131   match(rdi_RegP);
5132   match(rsi_RegP);
5133   match(rbp_RegP);
5134   match(r15_RegP);  // See Q&A below about r15_RegP.
5135 
5136   format %{ %}
5137   interface(REG_INTER);
5138 %}
5139 
5140 operand rRegN() %{
5141   constraint(ALLOC_IN_RC(int_reg));
5142   match(RegN);
5143 
5144   format %{ %}
5145   interface(REG_INTER);
5146 %}
5147 
5148 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5149 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5150 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
5151 // The output of an instruction is controlled by the allocator, which respects
5152 // register class masks, not match rules.  Unless an instruction mentions
5153 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5154 // by the allocator as an input.
5155 
5156 operand no_rax_RegP()
5157 %{
5158   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
5159   match(RegP);
5160   match(rbx_RegP);
5161   match(rsi_RegP);
5162   match(rdi_RegP);
5163 
5164   format %{ %}
5165   interface(REG_INTER);
5166 %}
5167 
5168 operand no_rbp_RegP()
5169 %{
5170   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
5171   match(RegP);
5172   match(rbx_RegP);
5173   match(rsi_RegP);
5174   match(rdi_RegP);
5175 
5176   format %{ %}
5177   interface(REG_INTER);
5178 %}
5179 
5180 operand no_rax_rbx_RegP()
5181 %{
5182   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
5183   match(RegP);
5184   match(rsi_RegP);
5185   match(rdi_RegP);
5186 
5187   format %{ %}
5188   interface(REG_INTER);
5189 %}
5190 
5191 // Special Registers
5192 // Return a pointer value
5193 operand rax_RegP()
5194 %{
5195   constraint(ALLOC_IN_RC(ptr_rax_reg));
5196   match(RegP);
5197   match(rRegP);
5198 
5199   format %{ %}
5200   interface(REG_INTER);
5201 %}
5202 
5203 // Special Registers
5204 // Return a compressed pointer value
5205 operand rax_RegN()
5206 %{
5207   constraint(ALLOC_IN_RC(int_rax_reg));
5208   match(RegN);
5209   match(rRegN);
5210 
5211   format %{ %}
5212   interface(REG_INTER);
5213 %}
5214 
5215 // Used in AtomicAdd
5216 operand rbx_RegP()
5217 %{
5218   constraint(ALLOC_IN_RC(ptr_rbx_reg));
5219   match(RegP);
5220   match(rRegP);
5221 
5222   format %{ %}
5223   interface(REG_INTER);
5224 %}
5225 
5226 operand rsi_RegP()
5227 %{
5228   constraint(ALLOC_IN_RC(ptr_rsi_reg));
5229   match(RegP);
5230   match(rRegP);
5231 
5232   format %{ %}
5233   interface(REG_INTER);
5234 %}
5235 
5236 // Used in rep stosq
5237 operand rdi_RegP()
5238 %{
5239   constraint(ALLOC_IN_RC(ptr_rdi_reg));
5240   match(RegP);
5241   match(rRegP);
5242 
5243   format %{ %}
5244   interface(REG_INTER);
5245 %}
5246 
5247 operand rbp_RegP()
5248 %{
5249   constraint(ALLOC_IN_RC(ptr_rbp_reg));
5250   match(RegP);
5251   match(rRegP);
5252 
5253   format %{ %}
5254   interface(REG_INTER);
5255 %}
5256 
5257 operand r15_RegP()
5258 %{
5259   constraint(ALLOC_IN_RC(ptr_r15_reg));
5260   match(RegP);
5261   match(rRegP);
5262 
5263   format %{ %}
5264   interface(REG_INTER);
5265 %}
5266 
5267 operand rRegL()
5268 %{
5269   constraint(ALLOC_IN_RC(long_reg));
5270   match(RegL);
5271   match(rax_RegL);
5272   match(rdx_RegL);
5273 
5274   format %{ %}
5275   interface(REG_INTER);
5276 %}
5277 
5278 // Special Registers
5279 operand no_rax_rdx_RegL()
5280 %{
5281   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5282   match(RegL);
5283   match(rRegL);
5284 
5285   format %{ %}
5286   interface(REG_INTER);
5287 %}
5288 
5289 operand no_rax_RegL()
5290 %{
5291   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5292   match(RegL);
5293   match(rRegL);
5294   match(rdx_RegL);
5295 
5296   format %{ %}
5297   interface(REG_INTER);
5298 %}
5299 
5300 operand no_rcx_RegL()
5301 %{
5302   constraint(ALLOC_IN_RC(long_no_rcx_reg));
5303   match(RegL);
5304   match(rRegL);
5305 
5306   format %{ %}
5307   interface(REG_INTER);
5308 %}
5309 
5310 operand rax_RegL()
5311 %{
5312   constraint(ALLOC_IN_RC(long_rax_reg));
5313   match(RegL);
5314   match(rRegL);
5315 
5316   format %{ "RAX" %}
5317   interface(REG_INTER);
5318 %}
5319 
5320 operand rcx_RegL()
5321 %{
5322   constraint(ALLOC_IN_RC(long_rcx_reg));
5323   match(RegL);
5324   match(rRegL);
5325 
5326   format %{ %}
5327   interface(REG_INTER);
5328 %}
5329 
5330 operand rdx_RegL()
5331 %{
5332   constraint(ALLOC_IN_RC(long_rdx_reg));
5333   match(RegL);
5334   match(rRegL);
5335 
5336   format %{ %}
5337   interface(REG_INTER);
5338 %}
5339 
5340 // Flags register, used as output of compare instructions
5341 operand rFlagsReg()
5342 %{
5343   constraint(ALLOC_IN_RC(int_flags));
5344   match(RegFlags);
5345 
5346   format %{ "RFLAGS" %}
5347   interface(REG_INTER);
5348 %}
5349 
5350 // Flags register, used as output of FLOATING POINT compare instructions
5351 operand rFlagsRegU()
5352 %{
5353   constraint(ALLOC_IN_RC(int_flags));
5354   match(RegFlags);
5355 
5356   format %{ "RFLAGS_U" %}
5357   interface(REG_INTER);
5358 %}
5359 
5360 operand rFlagsRegUCF() %{
5361   constraint(ALLOC_IN_RC(int_flags));
5362   match(RegFlags);
5363   predicate(false);
5364 
5365   format %{ "RFLAGS_U_CF" %}
5366   interface(REG_INTER);
5367 %}
5368 
5369 // Float register operands
5370 operand regF()
5371 %{
5372   constraint(ALLOC_IN_RC(float_reg));
5373   match(RegF);
5374 
5375   format %{ %}
5376   interface(REG_INTER);
5377 %}
5378 
5379 // Double register operands
5380 operand regD() 
5381 %{
5382   constraint(ALLOC_IN_RC(double_reg));
5383   match(RegD);
5384 
5385   format %{ %}
5386   interface(REG_INTER);
5387 %}
5388 
5389 
5390 //----------Memory Operands----------------------------------------------------
5391 // Direct Memory Operand
5392 // operand direct(immP addr)
5393 // %{
5394 //   match(addr);
5395 
5396 //   format %{ "[$addr]" %}
5397 //   interface(MEMORY_INTER) %{
5398 //     base(0xFFFFFFFF);
5399 //     index(0x4);
5400 //     scale(0x0);
5401 //     disp($addr);
5402 //   %}
5403 // %}
5404 
5405 // Indirect Memory Operand
5406 operand indirect(any_RegP reg)
5407 %{
5408   constraint(ALLOC_IN_RC(ptr_reg));
5409   match(reg);
5410 
5411   format %{ "[$reg]" %}
5412   interface(MEMORY_INTER) %{
5413     base($reg);
5414     index(0x4);
5415     scale(0x0);
5416     disp(0x0);
5417   %}
5418 %}
5419 
5420 // Indirect Memory Plus Short Offset Operand
5421 operand indOffset8(any_RegP reg, immL8 off)
5422 %{
5423   constraint(ALLOC_IN_RC(ptr_reg));
5424   match(AddP reg off);
5425 
5426   format %{ "[$reg + $off (8-bit)]" %}
5427   interface(MEMORY_INTER) %{
5428     base($reg);
5429     index(0x4);
5430     scale(0x0);
5431     disp($off);
5432   %}
5433 %}
5434 
5435 // Indirect Memory Plus Long Offset Operand
5436 operand indOffset32(any_RegP reg, immL32 off)
5437 %{
5438   constraint(ALLOC_IN_RC(ptr_reg));
5439   match(AddP reg off);
5440 
5441   format %{ "[$reg + $off (32-bit)]" %}
5442   interface(MEMORY_INTER) %{
5443     base($reg);
5444     index(0x4);
5445     scale(0x0);
5446     disp($off);
5447   %}
5448 %}
5449 
5450 // Indirect Memory Plus Index Register Plus Offset Operand
5451 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5452 %{
5453   constraint(ALLOC_IN_RC(ptr_reg));
5454   match(AddP (AddP reg lreg) off);
5455 
5456   op_cost(10);
5457   format %{"[$reg + $off + $lreg]" %}
5458   interface(MEMORY_INTER) %{
5459     base($reg);
5460     index($lreg);
5461     scale(0x0);
5462     disp($off);
5463   %}
5464 %}
5465 
5466 // Indirect Memory Plus Index Register Plus Offset Operand
5467 operand indIndex(any_RegP reg, rRegL lreg)
5468 %{
5469   constraint(ALLOC_IN_RC(ptr_reg));
5470   match(AddP reg lreg);
5471 
5472   op_cost(10);
5473   format %{"[$reg + $lreg]" %}
5474   interface(MEMORY_INTER) %{
5475     base($reg);
5476     index($lreg);
5477     scale(0x0);
5478     disp(0x0);
5479   %}
5480 %}
5481 
5482 // Indirect Memory Times Scale Plus Index Register
5483 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5484 %{
5485   constraint(ALLOC_IN_RC(ptr_reg));
5486   match(AddP reg (LShiftL lreg scale));
5487 
5488   op_cost(10);
5489   format %{"[$reg + $lreg << $scale]" %}
5490   interface(MEMORY_INTER) %{
5491     base($reg);
5492     index($lreg);
5493     scale($scale);
5494     disp(0x0);
5495   %}
5496 %}
5497 
5498 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5499 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5500 %{
5501   constraint(ALLOC_IN_RC(ptr_reg));
5502   match(AddP (AddP reg (LShiftL lreg scale)) off);
5503 
5504   op_cost(10);
5505   format %{"[$reg + $off + $lreg << $scale]" %}
5506   interface(MEMORY_INTER) %{
5507     base($reg);
5508     index($lreg);
5509     scale($scale);
5510     disp($off);
5511   %}
5512 %}
5513 
5514 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5515 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5516 %{
5517   constraint(ALLOC_IN_RC(ptr_reg));
5518   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5519   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5520 
5521   op_cost(10);
5522   format %{"[$reg + $off + $idx << $scale]" %}
5523   interface(MEMORY_INTER) %{
5524     base($reg);
5525     index($idx);
5526     scale($scale);
5527     disp($off);
5528   %}
5529 %}
5530 
5531 // Indirect Narrow Oop Plus Offset Operand
5532 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5533 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5534 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5535   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5536   constraint(ALLOC_IN_RC(ptr_reg));
5537   match(AddP (DecodeN reg) off);
5538 
5539   op_cost(10);
5540   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5541   interface(MEMORY_INTER) %{
5542     base(0xc); // R12
5543     index($reg);
5544     scale(0x3);
5545     disp($off);
5546   %}
5547 %}
5548 
5549 // Indirect Memory Operand
5550 operand indirectNarrow(rRegN reg)
5551 %{
5552   predicate(Universe::narrow_oop_shift() == 0);
5553   constraint(ALLOC_IN_RC(ptr_reg));
5554   match(DecodeN reg);
5555 
5556   format %{ "[$reg]" %}
5557   interface(MEMORY_INTER) %{
5558     base($reg);
5559     index(0x4);
5560     scale(0x0);
5561     disp(0x0);
5562   %}
5563 %}
5564 
5565 // Indirect Memory Plus Short Offset Operand
5566 operand indOffset8Narrow(rRegN reg, immL8 off)
5567 %{
5568   predicate(Universe::narrow_oop_shift() == 0);
5569   constraint(ALLOC_IN_RC(ptr_reg));
5570   match(AddP (DecodeN reg) off);
5571 
5572   format %{ "[$reg + $off (8-bit)]" %}
5573   interface(MEMORY_INTER) %{
5574     base($reg);
5575     index(0x4);
5576     scale(0x0);
5577     disp($off);
5578   %}
5579 %}
5580 
5581 // Indirect Memory Plus Long Offset Operand
5582 operand indOffset32Narrow(rRegN reg, immL32 off)
5583 %{
5584   predicate(Universe::narrow_oop_shift() == 0);
5585   constraint(ALLOC_IN_RC(ptr_reg));
5586   match(AddP (DecodeN reg) off);
5587 
5588   format %{ "[$reg + $off (32-bit)]" %}
5589   interface(MEMORY_INTER) %{
5590     base($reg);
5591     index(0x4);
5592     scale(0x0);
5593     disp($off);
5594   %}
5595 %}
5596 
5597 // Indirect Memory Plus Index Register Plus Offset Operand
5598 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5599 %{
5600   predicate(Universe::narrow_oop_shift() == 0);
5601   constraint(ALLOC_IN_RC(ptr_reg));
5602   match(AddP (AddP (DecodeN reg) lreg) off);
5603 
5604   op_cost(10);
5605   format %{"[$reg + $off + $lreg]" %}
5606   interface(MEMORY_INTER) %{
5607     base($reg);
5608     index($lreg);
5609     scale(0x0);
5610     disp($off);
5611   %}
5612 %}
5613 
5614 // Indirect Memory Plus Index Register Plus Offset Operand
5615 operand indIndexNarrow(rRegN reg, rRegL lreg)
5616 %{
5617   predicate(Universe::narrow_oop_shift() == 0);
5618   constraint(ALLOC_IN_RC(ptr_reg));
5619   match(AddP (DecodeN reg) lreg);
5620 
5621   op_cost(10);
5622   format %{"[$reg + $lreg]" %}
5623   interface(MEMORY_INTER) %{
5624     base($reg);
5625     index($lreg);
5626     scale(0x0);
5627     disp(0x0);
5628   %}
5629 %}
5630 
5631 // Indirect Memory Times Scale Plus Index Register
5632 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5633 %{
5634   predicate(Universe::narrow_oop_shift() == 0);
5635   constraint(ALLOC_IN_RC(ptr_reg));
5636   match(AddP (DecodeN reg) (LShiftL lreg scale));
5637 
5638   op_cost(10);
5639   format %{"[$reg + $lreg << $scale]" %}
5640   interface(MEMORY_INTER) %{
5641     base($reg);
5642     index($lreg);
5643     scale($scale);
5644     disp(0x0);
5645   %}
5646 %}
5647 
5648 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5649 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5650 %{
5651   predicate(Universe::narrow_oop_shift() == 0);
5652   constraint(ALLOC_IN_RC(ptr_reg));
5653   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5654 
5655   op_cost(10);
5656   format %{"[$reg + $off + $lreg << $scale]" %}
5657   interface(MEMORY_INTER) %{
5658     base($reg);
5659     index($lreg);
5660     scale($scale);
5661     disp($off);
5662   %}
5663 %}
5664 
5665 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5666 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5667 %{
5668   constraint(ALLOC_IN_RC(ptr_reg));
5669   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5670   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5671 
5672   op_cost(10);
5673   format %{"[$reg + $off + $idx << $scale]" %}
5674   interface(MEMORY_INTER) %{
5675     base($reg);
5676     index($idx);
5677     scale($scale);
5678     disp($off);
5679   %}
5680 %}
5681 
5682 
5683 //----------Special Memory Operands--------------------------------------------
5684 // Stack Slot Operand - This operand is used for loading and storing temporary
5685 //                      values on the stack where a match requires a value to
5686 //                      flow through memory.
5687 operand stackSlotP(sRegP reg)
5688 %{
5689   constraint(ALLOC_IN_RC(stack_slots));
5690   // No match rule because this operand is only generated in matching
5691 
5692   format %{ "[$reg]" %}
5693   interface(MEMORY_INTER) %{
5694     base(0x4);   // RSP
5695     index(0x4);  // No Index
5696     scale(0x0);  // No Scale
5697     disp($reg);  // Stack Offset
5698   %}
5699 %}
5700 
5701 operand stackSlotI(sRegI reg)
5702 %{
5703   constraint(ALLOC_IN_RC(stack_slots));
5704   // No match rule because this operand is only generated in matching
5705 
5706   format %{ "[$reg]" %}
5707   interface(MEMORY_INTER) %{
5708     base(0x4);   // RSP
5709     index(0x4);  // No Index
5710     scale(0x0);  // No Scale
5711     disp($reg);  // Stack Offset
5712   %}
5713 %}
5714 
5715 operand stackSlotF(sRegF reg)
5716 %{
5717   constraint(ALLOC_IN_RC(stack_slots));
5718   // No match rule because this operand is only generated in matching
5719 
5720   format %{ "[$reg]" %}
5721   interface(MEMORY_INTER) %{
5722     base(0x4);   // RSP
5723     index(0x4);  // No Index
5724     scale(0x0);  // No Scale
5725     disp($reg);  // Stack Offset
5726   %}
5727 %}
5728 
5729 operand stackSlotD(sRegD reg)
5730 %{
5731   constraint(ALLOC_IN_RC(stack_slots));
5732   // No match rule because this operand is only generated in matching
5733 
5734   format %{ "[$reg]" %}
5735   interface(MEMORY_INTER) %{
5736     base(0x4);   // RSP
5737     index(0x4);  // No Index
5738     scale(0x0);  // No Scale
5739     disp($reg);  // Stack Offset
5740   %}
5741 %}
5742 operand stackSlotL(sRegL reg)
5743 %{
5744   constraint(ALLOC_IN_RC(stack_slots));
5745   // No match rule because this operand is only generated in matching
5746 
5747   format %{ "[$reg]" %}
5748   interface(MEMORY_INTER) %{
5749     base(0x4);   // RSP
5750     index(0x4);  // No Index
5751     scale(0x0);  // No Scale
5752     disp($reg);  // Stack Offset
5753   %}
5754 %}
5755 
5756 //----------Conditional Branch Operands----------------------------------------
5757 // Comparison Op  - This is the operation of the comparison, and is limited to
5758 //                  the following set of codes:
5759 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5760 //
5761 // Other attributes of the comparison, such as unsignedness, are specified
5762 // by the comparison instruction that sets a condition code flags register.
5763 // That result is represented by a flags operand whose subtype is appropriate
5764 // to the unsignedness (etc.) of the comparison.
5765 //
5766 // Later, the instruction which matches both the Comparison Op (a Bool) and
5767 // the flags (produced by the Cmp) specifies the coding of the comparison op
5768 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5769 
5770 // Comparision Code
5771 operand cmpOp()
5772 %{
5773   match(Bool);
5774 
5775   format %{ "" %}
5776   interface(COND_INTER) %{
5777     equal(0x4, "e");
5778     not_equal(0x5, "ne");
5779     less(0xC, "l");
5780     greater_equal(0xD, "ge");
5781     less_equal(0xE, "le");
5782     greater(0xF, "g");
5783   %}
5784 %}
5785 
5786 // Comparison Code, unsigned compare.  Used by FP also, with
5787 // C2 (unordered) turned into GT or LT already.  The other bits
5788 // C0 and C3 are turned into Carry & Zero flags.
5789 operand cmpOpU()
5790 %{
5791   match(Bool);
5792 
5793   format %{ "" %}
5794   interface(COND_INTER) %{
5795     equal(0x4, "e");
5796     not_equal(0x5, "ne");
5797     less(0x2, "b");
5798     greater_equal(0x3, "nb");
5799     less_equal(0x6, "be");
5800     greater(0x7, "nbe");
5801   %}
5802 %}
5803 
5804 
5805 // Floating comparisons that don't require any fixup for the unordered case
5806 operand cmpOpUCF() %{
5807   match(Bool);
5808   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5809             n->as_Bool()->_test._test == BoolTest::ge ||
5810             n->as_Bool()->_test._test == BoolTest::le ||
5811             n->as_Bool()->_test._test == BoolTest::gt);
5812   format %{ "" %}
5813   interface(COND_INTER) %{
5814     equal(0x4, "e");
5815     not_equal(0x5, "ne");
5816     less(0x2, "b");
5817     greater_equal(0x3, "nb");
5818     less_equal(0x6, "be");
5819     greater(0x7, "nbe");
5820   %}
5821 %}
5822 
5823 
5824 // Floating comparisons that can be fixed up with extra conditional jumps
5825 operand cmpOpUCF2() %{
5826   match(Bool);
5827   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5828             n->as_Bool()->_test._test == BoolTest::eq);
5829   format %{ "" %}
5830   interface(COND_INTER) %{
5831     equal(0x4, "e");
5832     not_equal(0x5, "ne");
5833     less(0x2, "b");
5834     greater_equal(0x3, "nb");
5835     less_equal(0x6, "be");
5836     greater(0x7, "nbe");
5837   %}
5838 %}
5839 
5840 
5841 //----------OPERAND CLASSES----------------------------------------------------
5842 // Operand Classes are groups of operands that are used as to simplify
5843 // instruction definitions by not requiring the AD writer to specify separate
5844 // instructions for every form of operand when the instruction accepts
5845 // multiple operand types with the same basic encoding and format.  The classic
5846 // case of this is memory operands.
5847 
5848 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5849                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5850                indCompressedOopOffset,
5851                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5852                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5853                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5854 
5855 //----------PIPELINE-----------------------------------------------------------
5856 // Rules which define the behavior of the target architectures pipeline.
5857 pipeline %{
5858 
5859 //----------ATTRIBUTES---------------------------------------------------------
5860 attributes %{
5861   variable_size_instructions;        // Fixed size instructions
5862   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5863   instruction_unit_size = 1;         // An instruction is 1 bytes long
5864   instruction_fetch_unit_size = 16;  // The processor fetches one line
5865   instruction_fetch_units = 1;       // of 16 bytes
5866 
5867   // List of nop instructions
5868   nops( MachNop );
5869 %}
5870 
5871 //----------RESOURCES----------------------------------------------------------
5872 // Resources are the functional units available to the machine
5873 
5874 // Generic P2/P3 pipeline
5875 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5876 // 3 instructions decoded per cycle.
5877 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5878 // 3 ALU op, only ALU0 handles mul instructions.
5879 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5880            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5881            BR, FPU,
5882            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5883 
5884 //----------PIPELINE DESCRIPTION-----------------------------------------------
5885 // Pipeline Description specifies the stages in the machine's pipeline
5886 
5887 // Generic P2/P3 pipeline
5888 pipe_desc(S0, S1, S2, S3, S4, S5);
5889 
5890 //----------PIPELINE CLASSES---------------------------------------------------
5891 // Pipeline Classes describe the stages in which input and output are
5892 // referenced by the hardware pipeline.
5893 
5894 // Naming convention: ialu or fpu
5895 // Then: _reg
5896 // Then: _reg if there is a 2nd register
5897 // Then: _long if it's a pair of instructions implementing a long
5898 // Then: _fat if it requires the big decoder
5899 //   Or: _mem if it requires the big decoder and a memory unit.
5900 
5901 // Integer ALU reg operation
5902 pipe_class ialu_reg(rRegI dst)
5903 %{
5904     single_instruction;
5905     dst    : S4(write);
5906     dst    : S3(read);
5907     DECODE : S0;        // any decoder
5908     ALU    : S3;        // any alu
5909 %}
5910 
5911 // Long ALU reg operation
5912 pipe_class ialu_reg_long(rRegL dst)
5913 %{
5914     instruction_count(2);
5915     dst    : S4(write);
5916     dst    : S3(read);
5917     DECODE : S0(2);     // any 2 decoders
5918     ALU    : S3(2);     // both alus
5919 %}
5920 
5921 // Integer ALU reg operation using big decoder
5922 pipe_class ialu_reg_fat(rRegI dst)
5923 %{
5924     single_instruction;
5925     dst    : S4(write);
5926     dst    : S3(read);
5927     D0     : S0;        // big decoder only
5928     ALU    : S3;        // any alu
5929 %}
5930 
5931 // Long ALU reg operation using big decoder
5932 pipe_class ialu_reg_long_fat(rRegL dst)
5933 %{
5934     instruction_count(2);
5935     dst    : S4(write);
5936     dst    : S3(read);
5937     D0     : S0(2);     // big decoder only; twice
5938     ALU    : S3(2);     // any 2 alus
5939 %}
5940 
5941 // Integer ALU reg-reg operation
5942 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5943 %{
5944     single_instruction;
5945     dst    : S4(write);
5946     src    : S3(read);
5947     DECODE : S0;        // any decoder
5948     ALU    : S3;        // any alu
5949 %}
5950 
5951 // Long ALU reg-reg operation
5952 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5953 %{
5954     instruction_count(2);
5955     dst    : S4(write);
5956     src    : S3(read);
5957     DECODE : S0(2);     // any 2 decoders
5958     ALU    : S3(2);     // both alus
5959 %}
5960 
5961 // Integer ALU reg-reg operation
5962 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5963 %{
5964     single_instruction;
5965     dst    : S4(write);
5966     src    : S3(read);
5967     D0     : S0;        // big decoder only
5968     ALU    : S3;        // any alu
5969 %}
5970 
5971 // Long ALU reg-reg operation
5972 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5973 %{
5974     instruction_count(2);
5975     dst    : S4(write);
5976     src    : S3(read);
5977     D0     : S0(2);     // big decoder only; twice
5978     ALU    : S3(2);     // both alus
5979 %}
5980 
5981 // Integer ALU reg-mem operation
5982 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5983 %{
5984     single_instruction;
5985     dst    : S5(write);
5986     mem    : S3(read);
5987     D0     : S0;        // big decoder only
5988     ALU    : S4;        // any alu
5989     MEM    : S3;        // any mem
5990 %}
5991 
5992 // Integer mem operation (prefetch)
5993 pipe_class ialu_mem(memory mem)
5994 %{
5995     single_instruction;
5996     mem    : S3(read);
5997     D0     : S0;        // big decoder only
5998     MEM    : S3;        // any mem
5999 %}
6000 
6001 // Integer Store to Memory
6002 pipe_class ialu_mem_reg(memory mem, rRegI src)
6003 %{
6004     single_instruction;
6005     mem    : S3(read);
6006     src    : S5(read);
6007     D0     : S0;        // big decoder only
6008     ALU    : S4;        // any alu
6009     MEM    : S3;
6010 %}
6011 
6012 // // Long Store to Memory
6013 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6014 // %{
6015 //     instruction_count(2);
6016 //     mem    : S3(read);
6017 //     src    : S5(read);
6018 //     D0     : S0(2);          // big decoder only; twice
6019 //     ALU    : S4(2);     // any 2 alus
6020 //     MEM    : S3(2);  // Both mems
6021 // %}
6022 
6023 // Integer Store to Memory
6024 pipe_class ialu_mem_imm(memory mem)
6025 %{
6026     single_instruction;
6027     mem    : S3(read);
6028     D0     : S0;        // big decoder only
6029     ALU    : S4;        // any alu
6030     MEM    : S3;
6031 %}
6032 
6033 // Integer ALU0 reg-reg operation
6034 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6035 %{
6036     single_instruction;
6037     dst    : S4(write);
6038     src    : S3(read);
6039     D0     : S0;        // Big decoder only
6040     ALU0   : S3;        // only alu0
6041 %}
6042 
6043 // Integer ALU0 reg-mem operation
6044 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6045 %{
6046     single_instruction;
6047     dst    : S5(write);
6048     mem    : S3(read);
6049     D0     : S0;        // big decoder only
6050     ALU0   : S4;        // ALU0 only
6051     MEM    : S3;        // any mem
6052 %}
6053 
6054 // Integer ALU reg-reg operation
6055 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6056 %{
6057     single_instruction;
6058     cr     : S4(write);
6059     src1   : S3(read);
6060     src2   : S3(read);
6061     DECODE : S0;        // any decoder
6062     ALU    : S3;        // any alu
6063 %}
6064 
6065 // Integer ALU reg-imm operation
6066 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6067 %{
6068     single_instruction;
6069     cr     : S4(write);
6070     src1   : S3(read);
6071     DECODE : S0;        // any decoder
6072     ALU    : S3;        // any alu
6073 %}
6074 
6075 // Integer ALU reg-mem operation
6076 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6077 %{
6078     single_instruction;
6079     cr     : S4(write);
6080     src1   : S3(read);
6081     src2   : S3(read);
6082     D0     : S0;        // big decoder only
6083     ALU    : S4;        // any alu
6084     MEM    : S3;
6085 %}
6086 
6087 // Conditional move reg-reg
6088 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6089 %{
6090     instruction_count(4);
6091     y      : S4(read);
6092     q      : S3(read);
6093     p      : S3(read);
6094     DECODE : S0(4);     // any decoder
6095 %}
6096 
6097 // Conditional move reg-reg
6098 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6099 %{
6100     single_instruction;
6101     dst    : S4(write);
6102     src    : S3(read);
6103     cr     : S3(read);
6104     DECODE : S0;        // any decoder
6105 %}
6106 
6107 // Conditional move reg-mem
6108 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6109 %{
6110     single_instruction;
6111     dst    : S4(write);
6112     src    : S3(read);
6113     cr     : S3(read);
6114     DECODE : S0;        // any decoder
6115     MEM    : S3;
6116 %}
6117 
6118 // Conditional move reg-reg long
6119 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6120 %{
6121     single_instruction;
6122     dst    : S4(write);
6123     src    : S3(read);
6124     cr     : S3(read);
6125     DECODE : S0(2);     // any 2 decoders
6126 %}
6127 
6128 // XXX
6129 // // Conditional move double reg-reg
6130 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
6131 // %{
6132 //     single_instruction;
6133 //     dst    : S4(write);
6134 //     src    : S3(read);
6135 //     cr     : S3(read);
6136 //     DECODE : S0;     // any decoder
6137 // %}
6138 
6139 // Float reg-reg operation
6140 pipe_class fpu_reg(regD dst)
6141 %{
6142     instruction_count(2);
6143     dst    : S3(read);
6144     DECODE : S0(2);     // any 2 decoders
6145     FPU    : S3;
6146 %}
6147 
6148 // Float reg-reg operation
6149 pipe_class fpu_reg_reg(regD dst, regD src)
6150 %{
6151     instruction_count(2);
6152     dst    : S4(write);
6153     src    : S3(read);
6154     DECODE : S0(2);     // any 2 decoders
6155     FPU    : S3;
6156 %}
6157 
6158 // Float reg-reg operation
6159 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6160 %{
6161     instruction_count(3);
6162     dst    : S4(write);
6163     src1   : S3(read);
6164     src2   : S3(read);
6165     DECODE : S0(3);     // any 3 decoders
6166     FPU    : S3(2);
6167 %}
6168 
6169 // Float reg-reg operation
6170 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6171 %{
6172     instruction_count(4);
6173     dst    : S4(write);
6174     src1   : S3(read);
6175     src2   : S3(read);
6176     src3   : S3(read);
6177     DECODE : S0(4);     // any 3 decoders
6178     FPU    : S3(2);
6179 %}
6180 
6181 // Float reg-reg operation
6182 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6183 %{
6184     instruction_count(4);
6185     dst    : S4(write);
6186     src1   : S3(read);
6187     src2   : S3(read);
6188     src3   : S3(read);
6189     DECODE : S1(3);     // any 3 decoders
6190     D0     : S0;        // Big decoder only
6191     FPU    : S3(2);
6192     MEM    : S3;
6193 %}
6194 
6195 // Float reg-mem operation
6196 pipe_class fpu_reg_mem(regD dst, memory mem)
6197 %{
6198     instruction_count(2);
6199     dst    : S5(write);
6200     mem    : S3(read);
6201     D0     : S0;        // big decoder only
6202     DECODE : S1;        // any decoder for FPU POP
6203     FPU    : S4;
6204     MEM    : S3;        // any mem
6205 %}
6206 
6207 // Float reg-mem operation
6208 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6209 %{
6210     instruction_count(3);
6211     dst    : S5(write);
6212     src1   : S3(read);
6213     mem    : S3(read);
6214     D0     : S0;        // big decoder only
6215     DECODE : S1(2);     // any decoder for FPU POP
6216     FPU    : S4;
6217     MEM    : S3;        // any mem
6218 %}
6219 
6220 // Float mem-reg operation
6221 pipe_class fpu_mem_reg(memory mem, regD src)
6222 %{
6223     instruction_count(2);
6224     src    : S5(read);
6225     mem    : S3(read);
6226     DECODE : S0;        // any decoder for FPU PUSH
6227     D0     : S1;        // big decoder only
6228     FPU    : S4;
6229     MEM    : S3;        // any mem
6230 %}
6231 
6232 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6233 %{
6234     instruction_count(3);
6235     src1   : S3(read);
6236     src2   : S3(read);
6237     mem    : S3(read);
6238     DECODE : S0(2);     // any decoder for FPU PUSH
6239     D0     : S1;        // big decoder only
6240     FPU    : S4;
6241     MEM    : S3;        // any mem
6242 %}
6243 
6244 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6245 %{
6246     instruction_count(3);
6247     src1   : S3(read);
6248     src2   : S3(read);
6249     mem    : S4(read);
6250     DECODE : S0;        // any decoder for FPU PUSH
6251     D0     : S0(2);     // big decoder only
6252     FPU    : S4;
6253     MEM    : S3(2);     // any mem
6254 %}
6255 
6256 pipe_class fpu_mem_mem(memory dst, memory src1)
6257 %{
6258     instruction_count(2);
6259     src1   : S3(read);
6260     dst    : S4(read);
6261     D0     : S0(2);     // big decoder only
6262     MEM    : S3(2);     // any mem
6263 %}
6264 
6265 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6266 %{
6267     instruction_count(3);
6268     src1   : S3(read);
6269     src2   : S3(read);
6270     dst    : S4(read);
6271     D0     : S0(3);     // big decoder only
6272     FPU    : S4;
6273     MEM    : S3(3);     // any mem
6274 %}
6275 
6276 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6277 %{
6278     instruction_count(3);
6279     src1   : S4(read);
6280     mem    : S4(read);
6281     DECODE : S0;        // any decoder for FPU PUSH
6282     D0     : S0(2);     // big decoder only
6283     FPU    : S4;
6284     MEM    : S3(2);     // any mem
6285 %}
6286 
6287 // Float load constant
6288 pipe_class fpu_reg_con(regD dst)
6289 %{
6290     instruction_count(2);
6291     dst    : S5(write);
6292     D0     : S0;        // big decoder only for the load
6293     DECODE : S1;        // any decoder for FPU POP
6294     FPU    : S4;
6295     MEM    : S3;        // any mem
6296 %}
6297 
6298 // Float load constant
6299 pipe_class fpu_reg_reg_con(regD dst, regD src)
6300 %{
6301     instruction_count(3);
6302     dst    : S5(write);
6303     src    : S3(read);
6304     D0     : S0;        // big decoder only for the load
6305     DECODE : S1(2);     // any decoder for FPU POP
6306     FPU    : S4;
6307     MEM    : S3;        // any mem
6308 %}
6309 
6310 // UnConditional branch
6311 pipe_class pipe_jmp(label labl)
6312 %{
6313     single_instruction;
6314     BR   : S3;
6315 %}
6316 
6317 // Conditional branch
6318 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6319 %{
6320     single_instruction;
6321     cr    : S1(read);
6322     BR    : S3;
6323 %}
6324 
6325 // Allocation idiom
6326 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6327 %{
6328     instruction_count(1); force_serialization;
6329     fixed_latency(6);
6330     heap_ptr : S3(read);
6331     DECODE   : S0(3);
6332     D0       : S2;
6333     MEM      : S3;
6334     ALU      : S3(2);
6335     dst      : S5(write);
6336     BR       : S5;
6337 %}
6338 
6339 // Generic big/slow expanded idiom
6340 pipe_class pipe_slow()
6341 %{
6342     instruction_count(10); multiple_bundles; force_serialization;
6343     fixed_latency(100);
6344     D0  : S0(2);
6345     MEM : S3(2);
6346 %}
6347 
6348 // The real do-nothing guy
6349 pipe_class empty()
6350 %{
6351     instruction_count(0);
6352 %}
6353 
6354 // Define the class for the Nop node
6355 define
6356 %{
6357    MachNop = empty;
6358 %}
6359 
6360 %}
6361 
6362 //----------INSTRUCTIONS-------------------------------------------------------
6363 //
6364 // match      -- States which machine-independent subtree may be replaced
6365 //               by this instruction.
6366 // ins_cost   -- The estimated cost of this instruction is used by instruction
6367 //               selection to identify a minimum cost tree of machine
6368 //               instructions that matches a tree of machine-independent
6369 //               instructions.
6370 // format     -- A string providing the disassembly for this instruction.
6371 //               The value of an instruction's operand may be inserted
6372 //               by referring to it with a '$' prefix.
6373 // opcode     -- Three instruction opcodes may be provided.  These are referred
6374 //               to within an encode class as $primary, $secondary, and $tertiary
6375 //               rrspectively.  The primary opcode is commonly used to
6376 //               indicate the type of machine instruction, while secondary
6377 //               and tertiary are often used for prefix options or addressing
6378 //               modes.
6379 // ins_encode -- A list of encode classes with parameters. The encode class
6380 //               name must have been defined in an 'enc_class' specification
6381 //               in the encode section of the architecture description.
6382 
6383 
6384 //----------Load/Store/Move Instructions---------------------------------------
6385 //----------Load Instructions--------------------------------------------------
6386 
6387 // Load Byte (8 bit signed)
6388 instruct loadB(rRegI dst, memory mem)
6389 %{
6390   match(Set dst (LoadB mem));
6391 
6392   ins_cost(125);
6393   format %{ "movsbl  $dst, $mem\t# byte" %}
6394 
6395   ins_encode %{
6396     __ movsbl($dst$$Register, $mem$$Address);
6397   %}
6398 
6399   ins_pipe(ialu_reg_mem);
6400 %}
6401 
6402 // Load Byte (8 bit signed) into Long Register
6403 instruct loadB2L(rRegL dst, memory mem)
6404 %{
6405   match(Set dst (ConvI2L (LoadB mem)));
6406 
6407   ins_cost(125);
6408   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6409 
6410   ins_encode %{
6411     __ movsbq($dst$$Register, $mem$$Address);
6412   %}
6413 
6414   ins_pipe(ialu_reg_mem);
6415 %}
6416 
6417 // Load Unsigned Byte (8 bit UNsigned)
6418 instruct loadUB(rRegI dst, memory mem)
6419 %{
6420   match(Set dst (LoadUB mem));
6421 
6422   ins_cost(125);
6423   format %{ "movzbl  $dst, $mem\t# ubyte" %}
6424 
6425   ins_encode %{
6426     __ movzbl($dst$$Register, $mem$$Address);
6427   %}
6428 
6429   ins_pipe(ialu_reg_mem);
6430 %}
6431 
6432 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6433 instruct loadUB2L(rRegL dst, memory mem)
6434 %{
6435   match(Set dst (ConvI2L (LoadUB mem)));
6436 
6437   ins_cost(125);
6438   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6439 
6440   ins_encode %{
6441     __ movzbq($dst$$Register, $mem$$Address);
6442   %}
6443 
6444   ins_pipe(ialu_reg_mem);
6445 %}
6446 
6447 // Load Short (16 bit signed)
6448 instruct loadS(rRegI dst, memory mem)
6449 %{
6450   match(Set dst (LoadS mem));
6451 
6452   ins_cost(125);
6453   format %{ "movswl $dst, $mem\t# short" %}
6454 
6455   ins_encode %{
6456     __ movswl($dst$$Register, $mem$$Address);
6457   %}
6458 
6459   ins_pipe(ialu_reg_mem);
6460 %}
6461 
6462 // Load Short (16 bit signed) into Long Register
6463 instruct loadS2L(rRegL dst, memory mem)
6464 %{
6465   match(Set dst (ConvI2L (LoadS mem)));
6466 
6467   ins_cost(125);
6468   format %{ "movswq $dst, $mem\t# short -> long" %}
6469 
6470   ins_encode %{
6471     __ movswq($dst$$Register, $mem$$Address);
6472   %}
6473 
6474   ins_pipe(ialu_reg_mem);
6475 %}
6476 
6477 // Load Unsigned Short/Char (16 bit UNsigned)
6478 instruct loadUS(rRegI dst, memory mem)
6479 %{
6480   match(Set dst (LoadUS mem));
6481 
6482   ins_cost(125);
6483   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6484 
6485   ins_encode %{
6486     __ movzwl($dst$$Register, $mem$$Address);
6487   %}
6488 
6489   ins_pipe(ialu_reg_mem);
6490 %}
6491 
6492 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6493 instruct loadUS2L(rRegL dst, memory mem)
6494 %{
6495   match(Set dst (ConvI2L (LoadUS mem)));
6496 
6497   ins_cost(125);
6498   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6499 
6500   ins_encode %{
6501     __ movzwq($dst$$Register, $mem$$Address);
6502   %}
6503 
6504   ins_pipe(ialu_reg_mem);
6505 %}
6506 
6507 // Load Integer
6508 instruct loadI(rRegI dst, memory mem)
6509 %{
6510   match(Set dst (LoadI mem));
6511 
6512   ins_cost(125);
6513   format %{ "movl    $dst, $mem\t# int" %}
6514 
6515   ins_encode %{
6516     __ movl($dst$$Register, $mem$$Address);
6517   %}
6518 
6519   ins_pipe(ialu_reg_mem);
6520 %}
6521 
6522 // Load Integer into Long Register
6523 instruct loadI2L(rRegL dst, memory mem)
6524 %{
6525   match(Set dst (ConvI2L (LoadI mem)));
6526 
6527   ins_cost(125);
6528   format %{ "movslq  $dst, $mem\t# int -> long" %}
6529 
6530   ins_encode %{
6531     __ movslq($dst$$Register, $mem$$Address);
6532   %}
6533 
6534   ins_pipe(ialu_reg_mem);
6535 %}
6536 
6537 // Load Unsigned Integer into Long Register
6538 instruct loadUI2L(rRegL dst, memory mem)
6539 %{
6540   match(Set dst (LoadUI2L mem));
6541 
6542   ins_cost(125);
6543   format %{ "movl    $dst, $mem\t# uint -> long" %}
6544 
6545   ins_encode %{
6546     __ movl($dst$$Register, $mem$$Address);
6547   %}
6548 
6549   ins_pipe(ialu_reg_mem);
6550 %}
6551 
6552 // Load Long
6553 instruct loadL(rRegL dst, memory mem)
6554 %{
6555   match(Set dst (LoadL mem));
6556 
6557   ins_cost(125);
6558   format %{ "movq    $dst, $mem\t# long" %}
6559 
6560   ins_encode %{
6561     __ movq($dst$$Register, $mem$$Address);
6562   %}
6563 
6564   ins_pipe(ialu_reg_mem); // XXX
6565 %}
6566 
6567 // Load Range
6568 instruct loadRange(rRegI dst, memory mem)
6569 %{
6570   match(Set dst (LoadRange mem));
6571 
6572   ins_cost(125); // XXX
6573   format %{ "movl    $dst, $mem\t# range" %}
6574   opcode(0x8B);
6575   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6576   ins_pipe(ialu_reg_mem);
6577 %}
6578 
6579 // Load Pointer
6580 instruct loadP(rRegP dst, memory mem)
6581 %{
6582   match(Set dst (LoadP mem));
6583 
6584   ins_cost(125); // XXX
6585   format %{ "movq    $dst, $mem\t# ptr" %}
6586   opcode(0x8B);
6587   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6588   ins_pipe(ialu_reg_mem); // XXX
6589 %}
6590 
6591 // Load Compressed Pointer
6592 instruct loadN(rRegN dst, memory mem)
6593 %{
6594    match(Set dst (LoadN mem));
6595 
6596    ins_cost(125); // XXX
6597    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6598    ins_encode %{
6599      __ movl($dst$$Register, $mem$$Address);
6600    %}
6601    ins_pipe(ialu_reg_mem); // XXX
6602 %}
6603 
6604 
6605 // Load Klass Pointer
6606 instruct loadKlass(rRegP dst, memory mem)
6607 %{
6608   match(Set dst (LoadKlass mem));
6609 
6610   ins_cost(125); // XXX
6611   format %{ "movq    $dst, $mem\t# class" %}
6612   opcode(0x8B);
6613   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6614   ins_pipe(ialu_reg_mem); // XXX
6615 %}
6616 
6617 // Load narrow Klass Pointer
6618 instruct loadNKlass(rRegN dst, memory mem)
6619 %{
6620   match(Set dst (LoadNKlass mem));
6621 
6622   ins_cost(125); // XXX
6623   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6624   ins_encode %{
6625     __ movl($dst$$Register, $mem$$Address);
6626   %}
6627   ins_pipe(ialu_reg_mem); // XXX
6628 %}
6629 
6630 // Load Float
6631 instruct loadF(regF dst, memory mem)
6632 %{
6633   match(Set dst (LoadF mem));
6634 
6635   ins_cost(145); // XXX
6636   format %{ "movss   $dst, $mem\t# float" %}
6637   opcode(0xF3, 0x0F, 0x10);
6638   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6639   ins_pipe(pipe_slow); // XXX
6640 %}
6641 
6642 // Load Double
6643 instruct loadD_partial(regD dst, memory mem)
6644 %{
6645   predicate(!UseXmmLoadAndClearUpper);
6646   match(Set dst (LoadD mem));
6647 
6648   ins_cost(145); // XXX
6649   format %{ "movlpd  $dst, $mem\t# double" %}
6650   opcode(0x66, 0x0F, 0x12);
6651   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6652   ins_pipe(pipe_slow); // XXX
6653 %}
6654 
6655 instruct loadD(regD dst, memory mem)
6656 %{
6657   predicate(UseXmmLoadAndClearUpper);
6658   match(Set dst (LoadD mem));
6659 
6660   ins_cost(145); // XXX
6661   format %{ "movsd   $dst, $mem\t# double" %}
6662   opcode(0xF2, 0x0F, 0x10);
6663   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6664   ins_pipe(pipe_slow); // XXX
6665 %}
6666 
6667 // Load Aligned Packed Byte to XMM register
6668 instruct loadA8B(regD dst, memory mem) %{
6669   match(Set dst (Load8B mem));
6670   ins_cost(125);
6671   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6672   ins_encode( movq_ld(dst, mem));
6673   ins_pipe( pipe_slow );
6674 %}
6675 
6676 // Load Aligned Packed Short to XMM register
6677 instruct loadA4S(regD dst, memory mem) %{
6678   match(Set dst (Load4S mem));
6679   ins_cost(125);
6680   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6681   ins_encode( movq_ld(dst, mem));
6682   ins_pipe( pipe_slow );
6683 %}
6684 
6685 // Load Aligned Packed Char to XMM register
6686 instruct loadA4C(regD dst, memory mem) %{
6687   match(Set dst (Load4C mem));
6688   ins_cost(125);
6689   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6690   ins_encode( movq_ld(dst, mem));
6691   ins_pipe( pipe_slow );
6692 %}
6693 
6694 // Load Aligned Packed Integer to XMM register
6695 instruct load2IU(regD dst, memory mem) %{
6696   match(Set dst (Load2I mem));
6697   ins_cost(125);
6698   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6699   ins_encode( movq_ld(dst, mem));
6700   ins_pipe( pipe_slow );
6701 %}
6702 
6703 // Load Aligned Packed Single to XMM
6704 instruct loadA2F(regD dst, memory mem) %{
6705   match(Set dst (Load2F mem));
6706   ins_cost(145);
6707   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6708   ins_encode( movq_ld(dst, mem));
6709   ins_pipe( pipe_slow );
6710 %}
6711 
6712 // Load Effective Address
6713 instruct leaP8(rRegP dst, indOffset8 mem)
6714 %{
6715   match(Set dst mem);
6716 
6717   ins_cost(110); // XXX
6718   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6719   opcode(0x8D);
6720   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6721   ins_pipe(ialu_reg_reg_fat);
6722 %}
6723 
6724 instruct leaP32(rRegP dst, indOffset32 mem)
6725 %{
6726   match(Set dst mem);
6727 
6728   ins_cost(110);
6729   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6730   opcode(0x8D);
6731   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6732   ins_pipe(ialu_reg_reg_fat);
6733 %}
6734 
6735 // instruct leaPIdx(rRegP dst, indIndex mem)
6736 // %{
6737 //   match(Set dst mem);
6738 
6739 //   ins_cost(110);
6740 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6741 //   opcode(0x8D);
6742 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6743 //   ins_pipe(ialu_reg_reg_fat);
6744 // %}
6745 
6746 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6747 %{
6748   match(Set dst mem);
6749 
6750   ins_cost(110);
6751   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6752   opcode(0x8D);
6753   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6754   ins_pipe(ialu_reg_reg_fat);
6755 %}
6756 
6757 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6758 %{
6759   match(Set dst mem);
6760 
6761   ins_cost(110);
6762   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6763   opcode(0x8D);
6764   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6765   ins_pipe(ialu_reg_reg_fat);
6766 %}
6767 
6768 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6769 %{
6770   match(Set dst mem);
6771 
6772   ins_cost(110);
6773   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6774   opcode(0x8D);
6775   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6776   ins_pipe(ialu_reg_reg_fat);
6777 %}
6778 
6779 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6780 %{
6781   match(Set dst mem);
6782 
6783   ins_cost(110);
6784   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6785   opcode(0x8D);
6786   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6787   ins_pipe(ialu_reg_reg_fat);
6788 %}
6789 
6790 // Load Effective Address which uses Narrow (32-bits) oop
6791 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6792 %{
6793   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6794   match(Set dst mem);
6795 
6796   ins_cost(110);
6797   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6798   opcode(0x8D);
6799   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6800   ins_pipe(ialu_reg_reg_fat);
6801 %}
6802 
6803 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6804 %{
6805   predicate(Universe::narrow_oop_shift() == 0);
6806   match(Set dst mem);
6807 
6808   ins_cost(110); // XXX
6809   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6810   opcode(0x8D);
6811   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6812   ins_pipe(ialu_reg_reg_fat);
6813 %}
6814 
6815 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6816 %{
6817   predicate(Universe::narrow_oop_shift() == 0);
6818   match(Set dst mem);
6819 
6820   ins_cost(110);
6821   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6822   opcode(0x8D);
6823   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6824   ins_pipe(ialu_reg_reg_fat);
6825 %}
6826 
6827 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6828 %{
6829   predicate(Universe::narrow_oop_shift() == 0);
6830   match(Set dst mem);
6831 
6832   ins_cost(110);
6833   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6834   opcode(0x8D);
6835   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6836   ins_pipe(ialu_reg_reg_fat);
6837 %}
6838 
6839 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6840 %{
6841   predicate(Universe::narrow_oop_shift() == 0);
6842   match(Set dst mem);
6843 
6844   ins_cost(110);
6845   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6846   opcode(0x8D);
6847   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6848   ins_pipe(ialu_reg_reg_fat);
6849 %}
6850 
6851 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6852 %{
6853   predicate(Universe::narrow_oop_shift() == 0);
6854   match(Set dst mem);
6855 
6856   ins_cost(110);
6857   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6858   opcode(0x8D);
6859   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6860   ins_pipe(ialu_reg_reg_fat);
6861 %}
6862 
6863 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6864 %{
6865   predicate(Universe::narrow_oop_shift() == 0);
6866   match(Set dst mem);
6867 
6868   ins_cost(110);
6869   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6870   opcode(0x8D);
6871   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6872   ins_pipe(ialu_reg_reg_fat);
6873 %}
6874 
6875 instruct loadConI(rRegI dst, immI src)
6876 %{
6877   match(Set dst src);
6878 
6879   format %{ "movl    $dst, $src\t# int" %}
6880   ins_encode(load_immI(dst, src));
6881   ins_pipe(ialu_reg_fat); // XXX
6882 %}
6883 
6884 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6885 %{
6886   match(Set dst src);
6887   effect(KILL cr);
6888 
6889   ins_cost(50);
6890   format %{ "xorl    $dst, $dst\t# int" %}
6891   opcode(0x33); /* + rd */
6892   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6893   ins_pipe(ialu_reg);
6894 %}
6895 
6896 instruct loadConL(rRegL dst, immL src)
6897 %{
6898   match(Set dst src);
6899 
6900   ins_cost(150);
6901   format %{ "movq    $dst, $src\t# long" %}
6902   ins_encode(load_immL(dst, src));
6903   ins_pipe(ialu_reg);
6904 %}
6905 
6906 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6907 %{
6908   match(Set dst src);
6909   effect(KILL cr);
6910 
6911   ins_cost(50);
6912   format %{ "xorl    $dst, $dst\t# long" %}
6913   opcode(0x33); /* + rd */
6914   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6915   ins_pipe(ialu_reg); // XXX
6916 %}
6917 
6918 instruct loadConUL32(rRegL dst, immUL32 src)
6919 %{
6920   match(Set dst src);
6921 
6922   ins_cost(60);
6923   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6924   ins_encode(load_immUL32(dst, src));
6925   ins_pipe(ialu_reg);
6926 %}
6927 
6928 instruct loadConL32(rRegL dst, immL32 src)
6929 %{
6930   match(Set dst src);
6931 
6932   ins_cost(70);
6933   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6934   ins_encode(load_immL32(dst, src));
6935   ins_pipe(ialu_reg);
6936 %}
6937 
6938 instruct loadConP(rRegP dst, immP src)
6939 %{
6940   match(Set dst src);
6941 
6942   format %{ "movq    $dst, $src\t# ptr" %}
6943   ins_encode(load_immP(dst, src));
6944   ins_pipe(ialu_reg_fat); // XXX
6945 %}
6946 
6947 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6948 %{
6949   match(Set dst src);
6950   effect(KILL cr);
6951 
6952   ins_cost(50);
6953   format %{ "xorl    $dst, $dst\t# ptr" %}
6954   opcode(0x33); /* + rd */
6955   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6956   ins_pipe(ialu_reg);
6957 %}
6958 
6959 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6960 %{
6961   match(Set dst src);
6962   effect(KILL cr);
6963 
6964   ins_cost(60);
6965   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6966   ins_encode(load_immP31(dst, src));
6967   ins_pipe(ialu_reg);
6968 %}
6969 
6970 instruct loadConF(regF dst, immF src)
6971 %{
6972   match(Set dst src);
6973   ins_cost(125);
6974 
6975   format %{ "movss   $dst, [$src]" %}
6976   ins_encode(load_conF(dst, src));
6977   ins_pipe(pipe_slow);
6978 %}
6979 
6980 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6981   match(Set dst src);
6982   effect(KILL cr);
6983   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6984   ins_encode %{
6985     __ xorq($dst$$Register, $dst$$Register);
6986   %}
6987   ins_pipe(ialu_reg);
6988 %}
6989 
6990 instruct loadConN(rRegN dst, immN src) %{
6991   match(Set dst src);
6992 
6993   ins_cost(125);
6994   format %{ "movl    $dst, $src\t# compressed ptr" %}
6995   ins_encode %{
6996     address con = (address)$src$$constant;
6997     if (con == NULL) {
6998       ShouldNotReachHere();
6999     } else {
7000       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7001     }
7002   %}
7003   ins_pipe(ialu_reg_fat); // XXX
7004 %}
7005 
7006 instruct loadConF0(regF dst, immF0 src)
7007 %{
7008   match(Set dst src);
7009   ins_cost(100);
7010 
7011   format %{ "xorps   $dst, $dst\t# float 0.0" %}
7012   opcode(0x0F, 0x57);
7013   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
7014   ins_pipe(pipe_slow);
7015 %}
7016 
7017 // Use the same format since predicate() can not be used here.
7018 instruct loadConD(regD dst, immD src)
7019 %{
7020   match(Set dst src);
7021   ins_cost(125);
7022 
7023   format %{ "movsd   $dst, [$src]" %}
7024   ins_encode(load_conD(dst, src));
7025   ins_pipe(pipe_slow);
7026 %}
7027 
7028 instruct loadConD0(regD dst, immD0 src)
7029 %{
7030   match(Set dst src);
7031   ins_cost(100);
7032 
7033   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
7034   opcode(0x66, 0x0F, 0x57);
7035   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
7036   ins_pipe(pipe_slow);
7037 %}
7038 
7039 instruct loadSSI(rRegI dst, stackSlotI src)
7040 %{
7041   match(Set dst src);
7042 
7043   ins_cost(125);
7044   format %{ "movl    $dst, $src\t# int stk" %}
7045   opcode(0x8B);
7046   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7047   ins_pipe(ialu_reg_mem);
7048 %}
7049 
7050 instruct loadSSL(rRegL dst, stackSlotL src)
7051 %{
7052   match(Set dst src);
7053 
7054   ins_cost(125);
7055   format %{ "movq    $dst, $src\t# long stk" %}
7056   opcode(0x8B);
7057   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7058   ins_pipe(ialu_reg_mem);
7059 %}
7060 
7061 instruct loadSSP(rRegP dst, stackSlotP src)
7062 %{
7063   match(Set dst src);
7064 
7065   ins_cost(125);
7066   format %{ "movq    $dst, $src\t# ptr stk" %}
7067   opcode(0x8B);
7068   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7069   ins_pipe(ialu_reg_mem);
7070 %}
7071 
7072 instruct loadSSF(regF dst, stackSlotF src)
7073 %{
7074   match(Set dst src);
7075 
7076   ins_cost(125);
7077   format %{ "movss   $dst, $src\t# float stk" %}
7078   opcode(0xF3, 0x0F, 0x10);
7079   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
7080   ins_pipe(pipe_slow); // XXX
7081 %}
7082 
7083 // Use the same format since predicate() can not be used here.
7084 instruct loadSSD(regD dst, stackSlotD src)
7085 %{
7086   match(Set dst src);
7087 
7088   ins_cost(125);
7089   format %{ "movsd   $dst, $src\t# double stk" %}
7090   ins_encode  %{
7091     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7092   %}
7093   ins_pipe(pipe_slow); // XXX
7094 %}
7095 
7096 // Prefetch instructions.
7097 // Must be safe to execute with invalid address (cannot fault).
7098 
7099 instruct prefetchr( memory mem ) %{
7100   predicate(ReadPrefetchInstr==3);
7101   match(PrefetchRead mem);
7102   ins_cost(125);
7103 
7104   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
7105   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
7106   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7107   ins_pipe(ialu_mem);
7108 %}
7109 
7110 instruct prefetchrNTA( memory mem ) %{
7111   predicate(ReadPrefetchInstr==0);
7112   match(PrefetchRead mem);
7113   ins_cost(125);
7114 
7115   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
7116   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7117   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7118   ins_pipe(ialu_mem);
7119 %}
7120 
7121 instruct prefetchrT0( memory mem ) %{
7122   predicate(ReadPrefetchInstr==1);
7123   match(PrefetchRead mem);
7124   ins_cost(125);
7125 
7126   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
7127   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7128   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7129   ins_pipe(ialu_mem);
7130 %}
7131 
7132 instruct prefetchrT2( memory mem ) %{
7133   predicate(ReadPrefetchInstr==2);
7134   match(PrefetchRead mem);
7135   ins_cost(125);
7136 
7137   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
7138   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7139   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
7140   ins_pipe(ialu_mem);
7141 %}
7142 
7143 instruct prefetchw( memory mem ) %{
7144   predicate(AllocatePrefetchInstr==3);
7145   match(PrefetchWrite mem);
7146   ins_cost(125);
7147 
7148   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
7149   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7150   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7151   ins_pipe(ialu_mem);
7152 %}
7153 
7154 instruct prefetchwNTA( memory mem ) %{
7155   predicate(AllocatePrefetchInstr==0);
7156   match(PrefetchWrite mem);
7157   ins_cost(125);
7158 
7159   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
7160   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7161   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7162   ins_pipe(ialu_mem);
7163 %}
7164 
7165 instruct prefetchwT0( memory mem ) %{
7166   predicate(AllocatePrefetchInstr==1);
7167   match(PrefetchWrite mem);
7168   ins_cost(125);
7169 
7170   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
7171   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7172   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7173   ins_pipe(ialu_mem);
7174 %}
7175 
7176 instruct prefetchwT2( memory mem ) %{
7177   predicate(AllocatePrefetchInstr==2);
7178   match(PrefetchWrite mem);
7179   ins_cost(125);
7180 
7181   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
7182   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7183   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
7184   ins_pipe(ialu_mem);
7185 %}
7186 
7187 //----------Store Instructions-------------------------------------------------
7188 
7189 // Store Byte
7190 instruct storeB(memory mem, rRegI src)
7191 %{
7192   match(Set mem (StoreB mem src));
7193 
7194   ins_cost(125); // XXX
7195   format %{ "movb    $mem, $src\t# byte" %}
7196   opcode(0x88);
7197   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
7198   ins_pipe(ialu_mem_reg);
7199 %}
7200 
7201 // Store Char/Short
7202 instruct storeC(memory mem, rRegI src)
7203 %{
7204   match(Set mem (StoreC mem src));
7205 
7206   ins_cost(125); // XXX
7207   format %{ "movw    $mem, $src\t# char/short" %}
7208   opcode(0x89);
7209   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
7210   ins_pipe(ialu_mem_reg);
7211 %}
7212 
7213 // Store Integer
7214 instruct storeI(memory mem, rRegI src)
7215 %{
7216   match(Set mem (StoreI mem src));
7217 
7218   ins_cost(125); // XXX
7219   format %{ "movl    $mem, $src\t# int" %}
7220   opcode(0x89);
7221   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
7222   ins_pipe(ialu_mem_reg);
7223 %}
7224 
7225 // Store Long
7226 instruct storeL(memory mem, rRegL src)
7227 %{
7228   match(Set mem (StoreL mem src));
7229 
7230   ins_cost(125); // XXX
7231   format %{ "movq    $mem, $src\t# long" %}
7232   opcode(0x89);
7233   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
7234   ins_pipe(ialu_mem_reg); // XXX
7235 %}
7236 
7237 // Store Pointer
7238 instruct storeP(memory mem, any_RegP src)
7239 %{
7240   match(Set mem (StoreP mem src));
7241 
7242   ins_cost(125); // XXX
7243   format %{ "movq    $mem, $src\t# ptr" %}
7244   opcode(0x89);
7245   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
7246   ins_pipe(ialu_mem_reg);
7247 %}
7248 
7249 instruct storeImmP0(memory mem, immP0 zero)
7250 %{
7251   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7252   match(Set mem (StoreP mem zero));
7253 
7254   ins_cost(125); // XXX
7255   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
7256   ins_encode %{
7257     __ movq($mem$$Address, r12);
7258   %}
7259   ins_pipe(ialu_mem_reg);
7260 %}
7261 
7262 // Store NULL Pointer, mark word, or other simple pointer constant.
7263 instruct storeImmP(memory mem, immP31 src)
7264 %{
7265   match(Set mem (StoreP mem src));
7266 
7267   ins_cost(150); // XXX
7268   format %{ "movq    $mem, $src\t# ptr" %}
7269   opcode(0xC7); /* C7 /0 */
7270   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7271   ins_pipe(ialu_mem_imm);
7272 %}
7273 
7274 // Store Compressed Pointer
7275 instruct storeN(memory mem, rRegN src)
7276 %{
7277   match(Set mem (StoreN mem src));
7278 
7279   ins_cost(125); // XXX
7280   format %{ "movl    $mem, $src\t# compressed ptr" %}
7281   ins_encode %{
7282     __ movl($mem$$Address, $src$$Register);
7283   %}
7284   ins_pipe(ialu_mem_reg);
7285 %}
7286 
7287 instruct storeImmN0(memory mem, immN0 zero)
7288 %{
7289   predicate(Universe::narrow_oop_base() == NULL);
7290   match(Set mem (StoreN mem zero));
7291 
7292   ins_cost(125); // XXX
7293   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7294   ins_encode %{
7295     __ movl($mem$$Address, r12);
7296   %}
7297   ins_pipe(ialu_mem_reg);
7298 %}
7299 
7300 instruct storeImmN(memory mem, immN src)
7301 %{
7302   match(Set mem (StoreN mem src));
7303 
7304   ins_cost(150); // XXX
7305   format %{ "movl    $mem, $src\t# compressed ptr" %}
7306   ins_encode %{
7307     address con = (address)$src$$constant;
7308     if (con == NULL) {
7309       __ movl($mem$$Address, (int32_t)0);
7310     } else {
7311       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7312     }
7313   %}
7314   ins_pipe(ialu_mem_imm);
7315 %}
7316 
7317 // Store Integer Immediate
7318 instruct storeImmI0(memory mem, immI0 zero)
7319 %{
7320   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7321   match(Set mem (StoreI mem zero));
7322 
7323   ins_cost(125); // XXX
7324   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7325   ins_encode %{
7326     __ movl($mem$$Address, r12);
7327   %}
7328   ins_pipe(ialu_mem_reg);
7329 %}
7330 
7331 instruct storeImmI(memory mem, immI src)
7332 %{
7333   match(Set mem (StoreI mem src));
7334 
7335   ins_cost(150);
7336   format %{ "movl    $mem, $src\t# int" %}
7337   opcode(0xC7); /* C7 /0 */
7338   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7339   ins_pipe(ialu_mem_imm);
7340 %}
7341 
7342 // Store Long Immediate
7343 instruct storeImmL0(memory mem, immL0 zero)
7344 %{
7345   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7346   match(Set mem (StoreL mem zero));
7347 
7348   ins_cost(125); // XXX
7349   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7350   ins_encode %{
7351     __ movq($mem$$Address, r12);
7352   %}
7353   ins_pipe(ialu_mem_reg);
7354 %}
7355 
7356 instruct storeImmL(memory mem, immL32 src)
7357 %{
7358   match(Set mem (StoreL mem src));
7359 
7360   ins_cost(150);
7361   format %{ "movq    $mem, $src\t# long" %}
7362   opcode(0xC7); /* C7 /0 */
7363   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7364   ins_pipe(ialu_mem_imm);
7365 %}
7366 
7367 // Store Short/Char Immediate
7368 instruct storeImmC0(memory mem, immI0 zero)
7369 %{
7370   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7371   match(Set mem (StoreC mem zero));
7372 
7373   ins_cost(125); // XXX
7374   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7375   ins_encode %{
7376     __ movw($mem$$Address, r12);
7377   %}
7378   ins_pipe(ialu_mem_reg);
7379 %}
7380 
7381 instruct storeImmI16(memory mem, immI16 src)
7382 %{
7383   predicate(UseStoreImmI16);
7384   match(Set mem (StoreC mem src));
7385 
7386   ins_cost(150);
7387   format %{ "movw    $mem, $src\t# short/char" %}
7388   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7389   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7390   ins_pipe(ialu_mem_imm);
7391 %}
7392 
7393 // Store Byte Immediate
7394 instruct storeImmB0(memory mem, immI0 zero)
7395 %{
7396   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7397   match(Set mem (StoreB mem zero));
7398 
7399   ins_cost(125); // XXX
7400   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7401   ins_encode %{
7402     __ movb($mem$$Address, r12);
7403   %}
7404   ins_pipe(ialu_mem_reg);
7405 %}
7406 
7407 instruct storeImmB(memory mem, immI8 src)
7408 %{
7409   match(Set mem (StoreB mem src));
7410 
7411   ins_cost(150); // XXX
7412   format %{ "movb    $mem, $src\t# byte" %}
7413   opcode(0xC6); /* C6 /0 */
7414   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7415   ins_pipe(ialu_mem_imm);
7416 %}
7417 
7418 // Store Aligned Packed Byte XMM register to memory
7419 instruct storeA8B(memory mem, regD src) %{
7420   match(Set mem (Store8B mem src));
7421   ins_cost(145);
7422   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7423   ins_encode( movq_st(mem, src));
7424   ins_pipe( pipe_slow );
7425 %}
7426 
7427 // Store Aligned Packed Char/Short XMM register to memory
7428 instruct storeA4C(memory mem, regD src) %{
7429   match(Set mem (Store4C mem src));
7430   ins_cost(145);
7431   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7432   ins_encode( movq_st(mem, src));
7433   ins_pipe( pipe_slow );
7434 %}
7435 
7436 // Store Aligned Packed Integer XMM register to memory
7437 instruct storeA2I(memory mem, regD src) %{
7438   match(Set mem (Store2I mem src));
7439   ins_cost(145);
7440   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7441   ins_encode( movq_st(mem, src));
7442   ins_pipe( pipe_slow );
7443 %}
7444 
7445 // Store CMS card-mark Immediate
7446 instruct storeImmCM0_reg(memory mem, immI0 zero)
7447 %{
7448   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7449   match(Set mem (StoreCM mem zero));
7450 
7451   ins_cost(125); // XXX
7452   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7453   ins_encode %{
7454     __ movb($mem$$Address, r12);
7455   %}
7456   ins_pipe(ialu_mem_reg);
7457 %}
7458 
7459 instruct storeImmCM0(memory mem, immI0 src)
7460 %{
7461   match(Set mem (StoreCM mem src));
7462 
7463   ins_cost(150); // XXX
7464   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7465   opcode(0xC6); /* C6 /0 */
7466   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7467   ins_pipe(ialu_mem_imm);
7468 %}
7469 
7470 // Store Aligned Packed Single Float XMM register to memory
7471 instruct storeA2F(memory mem, regD src) %{
7472   match(Set mem (Store2F mem src));
7473   ins_cost(145);
7474   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7475   ins_encode( movq_st(mem, src));
7476   ins_pipe( pipe_slow );
7477 %}
7478 
7479 // Store Float
7480 instruct storeF(memory mem, regF src)
7481 %{
7482   match(Set mem (StoreF mem src));
7483 
7484   ins_cost(95); // XXX
7485   format %{ "movss   $mem, $src\t# float" %}
7486   opcode(0xF3, 0x0F, 0x11);
7487   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7488   ins_pipe(pipe_slow); // XXX
7489 %}
7490 
7491 // Store immediate Float value (it is faster than store from XMM register)
7492 instruct storeF0(memory mem, immF0 zero)
7493 %{
7494   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7495   match(Set mem (StoreF mem zero));
7496 
7497   ins_cost(25); // XXX
7498   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7499   ins_encode %{
7500     __ movl($mem$$Address, r12);
7501   %}
7502   ins_pipe(ialu_mem_reg);
7503 %}
7504 
7505 instruct storeF_imm(memory mem, immF src)
7506 %{
7507   match(Set mem (StoreF mem src));
7508 
7509   ins_cost(50);
7510   format %{ "movl    $mem, $src\t# float" %}
7511   opcode(0xC7); /* C7 /0 */
7512   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7513   ins_pipe(ialu_mem_imm);
7514 %}
7515 
7516 // Store Double
7517 instruct storeD(memory mem, regD src)
7518 %{
7519   match(Set mem (StoreD mem src));
7520 
7521   ins_cost(95); // XXX
7522   format %{ "movsd   $mem, $src\t# double" %}
7523   opcode(0xF2, 0x0F, 0x11);
7524   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7525   ins_pipe(pipe_slow); // XXX
7526 %}
7527 
7528 // Store immediate double 0.0 (it is faster than store from XMM register)
7529 instruct storeD0_imm(memory mem, immD0 src)
7530 %{
7531   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7532   match(Set mem (StoreD mem src));
7533 
7534   ins_cost(50);
7535   format %{ "movq    $mem, $src\t# double 0." %}
7536   opcode(0xC7); /* C7 /0 */
7537   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7538   ins_pipe(ialu_mem_imm);
7539 %}
7540 
7541 instruct storeD0(memory mem, immD0 zero)
7542 %{
7543   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7544   match(Set mem (StoreD mem zero));
7545 
7546   ins_cost(25); // XXX
7547   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7548   ins_encode %{
7549     __ movq($mem$$Address, r12);
7550   %}
7551   ins_pipe(ialu_mem_reg);
7552 %}
7553 
7554 instruct storeSSI(stackSlotI dst, rRegI src)
7555 %{
7556   match(Set dst src);
7557 
7558   ins_cost(100);
7559   format %{ "movl    $dst, $src\t# int stk" %}
7560   opcode(0x89);
7561   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7562   ins_pipe( ialu_mem_reg );
7563 %}
7564 
7565 instruct storeSSL(stackSlotL dst, rRegL src)
7566 %{
7567   match(Set dst src);
7568 
7569   ins_cost(100);
7570   format %{ "movq    $dst, $src\t# long stk" %}
7571   opcode(0x89);
7572   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7573   ins_pipe(ialu_mem_reg);
7574 %}
7575 
7576 instruct storeSSP(stackSlotP dst, rRegP src)
7577 %{
7578   match(Set dst src);
7579 
7580   ins_cost(100);
7581   format %{ "movq    $dst, $src\t# ptr stk" %}
7582   opcode(0x89);
7583   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7584   ins_pipe(ialu_mem_reg);
7585 %}
7586 
7587 instruct storeSSF(stackSlotF dst, regF src)
7588 %{
7589   match(Set dst src);
7590 
7591   ins_cost(95); // XXX
7592   format %{ "movss   $dst, $src\t# float stk" %}
7593   opcode(0xF3, 0x0F, 0x11);
7594   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7595   ins_pipe(pipe_slow); // XXX
7596 %}
7597 
7598 instruct storeSSD(stackSlotD dst, regD src)
7599 %{
7600   match(Set dst src);
7601 
7602   ins_cost(95); // XXX
7603   format %{ "movsd   $dst, $src\t# double stk" %}
7604   opcode(0xF2, 0x0F, 0x11);
7605   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7606   ins_pipe(pipe_slow); // XXX
7607 %}
7608 
7609 //----------BSWAP Instructions-------------------------------------------------
7610 instruct bytes_reverse_int(rRegI dst) %{
7611   match(Set dst (ReverseBytesI dst));
7612 
7613   format %{ "bswapl  $dst" %}
7614   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7615   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7616   ins_pipe( ialu_reg );
7617 %}
7618 
7619 instruct bytes_reverse_long(rRegL dst) %{
7620   match(Set dst (ReverseBytesL dst));
7621 
7622   format %{ "bswapq  $dst" %}
7623 
7624   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7625   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7626   ins_pipe( ialu_reg);
7627 %}
7628 
7629 instruct loadI_reversed(rRegI dst, memory src) %{
7630   match(Set dst (ReverseBytesI (LoadI src)));
7631 
7632   format %{ "bswap_movl $dst, $src" %}
7633   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7634   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7635   ins_pipe( ialu_reg_mem );
7636 %}
7637 
7638 instruct loadL_reversed(rRegL dst, memory src) %{
7639   match(Set dst (ReverseBytesL (LoadL src)));
7640 
7641   format %{ "bswap_movq $dst, $src" %}
7642   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7643   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7644   ins_pipe( ialu_reg_mem );
7645 %}
7646 
7647 instruct storeI_reversed(memory dst, rRegI src) %{
7648   match(Set dst (StoreI dst (ReverseBytesI  src)));
7649 
7650   format %{ "movl_bswap $dst, $src" %}
7651   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7652   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7653   ins_pipe( ialu_mem_reg );
7654 %}
7655 
7656 instruct storeL_reversed(memory dst, rRegL src) %{
7657   match(Set dst (StoreL dst (ReverseBytesL  src)));
7658 
7659   format %{ "movq_bswap $dst, $src" %}
7660   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7661   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7662   ins_pipe( ialu_mem_reg );
7663 %}
7664 
7665 
7666 //---------- Zeros Count Instructions ------------------------------------------
7667 
7668 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7669   predicate(UseCountLeadingZerosInstruction);
7670   match(Set dst (CountLeadingZerosI src));
7671   effect(KILL cr);
7672 
7673   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7674   ins_encode %{
7675     __ lzcntl($dst$$Register, $src$$Register);
7676   %}
7677   ins_pipe(ialu_reg);
7678 %}
7679 
7680 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr) %{
7681   predicate(!UseCountLeadingZerosInstruction);
7682   match(Set dst (CountLeadingZerosI src));
7683   effect(TEMP dst, TEMP tmp, KILL cr);
7684 
7685   format %{ "bsrl    $tmp, $src\t# count leading zeros (int)\n\t"
7686             "jnz     skip\n\t"
7687             "movl    $tmp, -1\n"
7688       "skip:\n\t"
7689             "movl    $dst, 31\n\t"
7690             "subl    $dst, $tmp" %}
7691   ins_encode %{
7692     Label skip;
7693     __ bsrl($tmp$$Register, $src$$Register);
7694     __ jccb(Assembler::notZero, skip);
7695     __ movl($tmp$$Register, -1);
7696     __ bind(skip);
7697     __ movl($dst$$Register, BitsPerInt - 1);
7698     __ subl($dst$$Register, $tmp$$Register);
7699   %}
7700   ins_pipe(ialu_reg);
7701 %}
7702 
7703 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7704   predicate(UseCountLeadingZerosInstruction);
7705   match(Set dst (CountLeadingZerosL src));
7706   effect(KILL cr);
7707 
7708   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7709   ins_encode %{
7710     __ lzcntq($dst$$Register, $src$$Register);
7711   %}
7712   ins_pipe(ialu_reg);
7713 %}
7714 
7715 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rRegI tmp, rFlagsReg cr) %{
7716   predicate(!UseCountLeadingZerosInstruction);
7717   match(Set dst (CountLeadingZerosL src));
7718   effect(TEMP dst, TEMP tmp, KILL cr);
7719 
7720   format %{ "bsrq    $tmp, $src\t# count leading zeros (long)\n\t"
7721             "jnz     skip\n\t"
7722             "movl    $tmp, -1\n"
7723       "skip:\n\t"
7724             "movl    $dst, 63\n\t"
7725             "subl    $dst, $tmp" %}
7726   ins_encode %{
7727     Label skip;
7728     __ bsrq($tmp$$Register, $src$$Register);
7729     __ jccb(Assembler::notZero, skip);
7730     __ movl($tmp$$Register, -1);
7731     __ bind(skip);
7732     __ movl($dst$$Register, BitsPerLong - 1);
7733     __ subl($dst$$Register, $tmp$$Register);
7734   %}
7735   ins_pipe(ialu_reg);
7736 %}
7737 
7738 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7739   match(Set dst (CountTrailingZerosI src));
7740   effect(KILL cr);
7741 
7742   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7743             "jnz     done\n\t"
7744             "movl    $dst, 32\n"
7745       "done:" %}
7746   ins_encode %{
7747     Label done;
7748     __ bsfl($dst$$Register, $src$$Register);
7749     __ jccb(Assembler::notZero, done);
7750     __ movl($dst$$Register, BitsPerInt);
7751     __ bind(done);
7752   %}
7753   ins_pipe(ialu_reg);
7754 %}
7755 
7756 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7757   match(Set dst (CountTrailingZerosL src));
7758   effect(KILL cr);
7759 
7760   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7761             "jnz     done\n\t"
7762             "movl    $dst, 64\n"
7763       "done:" %}
7764   ins_encode %{
7765     Label done;
7766     __ bsfq($dst$$Register, $src$$Register);
7767     __ jccb(Assembler::notZero, done);
7768     __ movl($dst$$Register, BitsPerLong);
7769     __ bind(done);
7770   %}
7771   ins_pipe(ialu_reg);
7772 %}
7773 
7774 
7775 //---------- Population Count Instructions -------------------------------------
7776 
7777 instruct popCountI(rRegI dst, rRegI src) %{
7778   predicate(UsePopCountInstruction);
7779   match(Set dst (PopCountI src));
7780 
7781   format %{ "popcnt  $dst, $src" %}
7782   ins_encode %{
7783     __ popcntl($dst$$Register, $src$$Register);
7784   %}
7785   ins_pipe(ialu_reg);
7786 %}
7787 
7788 instruct popCountI_mem(rRegI dst, memory mem) %{
7789   predicate(UsePopCountInstruction);
7790   match(Set dst (PopCountI (LoadI mem)));
7791 
7792   format %{ "popcnt  $dst, $mem" %}
7793   ins_encode %{
7794     __ popcntl($dst$$Register, $mem$$Address);
7795   %}
7796   ins_pipe(ialu_reg);
7797 %}
7798 
7799 // Note: Long.bitCount(long) returns an int.
7800 instruct popCountL(rRegI dst, rRegL src) %{
7801   predicate(UsePopCountInstruction);
7802   match(Set dst (PopCountL src));
7803 
7804   format %{ "popcnt  $dst, $src" %}
7805   ins_encode %{
7806     __ popcntq($dst$$Register, $src$$Register);
7807   %}
7808   ins_pipe(ialu_reg);
7809 %}
7810 
7811 // Note: Long.bitCount(long) returns an int.
7812 instruct popCountL_mem(rRegI dst, memory mem) %{
7813   predicate(UsePopCountInstruction);
7814   match(Set dst (PopCountL (LoadL mem)));
7815 
7816   format %{ "popcnt  $dst, $mem" %}
7817   ins_encode %{
7818     __ popcntq($dst$$Register, $mem$$Address);
7819   %}
7820   ins_pipe(ialu_reg);
7821 %}
7822 
7823 
7824 //----------MemBar Instructions-----------------------------------------------
7825 // Memory barrier flavors
7826 
7827 instruct membar_acquire()
7828 %{
7829   match(MemBarAcquire);
7830   ins_cost(0);
7831 
7832   size(0);
7833   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7834   ins_encode();
7835   ins_pipe(empty);
7836 %}
7837 
7838 instruct membar_acquire_lock()
7839 %{
7840   match(MemBarAcquire);
7841   predicate(Matcher::prior_fast_lock(n));
7842   ins_cost(0);
7843 
7844   size(0);
7845   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7846   ins_encode();
7847   ins_pipe(empty);
7848 %}
7849 
7850 instruct membar_release()
7851 %{
7852   match(MemBarRelease);
7853   ins_cost(0);
7854 
7855   size(0);
7856   format %{ "MEMBAR-release ! (empty encoding)" %}
7857   ins_encode();
7858   ins_pipe(empty);
7859 %}
7860 
7861 instruct membar_release_lock()
7862 %{
7863   match(MemBarRelease);
7864   predicate(Matcher::post_fast_unlock(n));
7865   ins_cost(0);
7866 
7867   size(0);
7868   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7869   ins_encode();
7870   ins_pipe(empty);
7871 %}
7872 
7873 instruct membar_volatile(rFlagsReg cr) %{
7874   match(MemBarVolatile);
7875   effect(KILL cr);
7876   ins_cost(400);
7877 
7878   format %{ 
7879     $$template
7880     if (os::is_MP()) {
7881       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7882     } else {
7883       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7884     }
7885   %}
7886   ins_encode %{
7887     __ membar(Assembler::StoreLoad);
7888   %}
7889   ins_pipe(pipe_slow);
7890 %}
7891 
7892 instruct unnecessary_membar_volatile()
7893 %{
7894   match(MemBarVolatile);
7895   predicate(Matcher::post_store_load_barrier(n));
7896   ins_cost(0);
7897 
7898   size(0);
7899   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7900   ins_encode();
7901   ins_pipe(empty);
7902 %}
7903 
7904 //----------Move Instructions--------------------------------------------------
7905 
7906 instruct castX2P(rRegP dst, rRegL src)
7907 %{
7908   match(Set dst (CastX2P src));
7909 
7910   format %{ "movq    $dst, $src\t# long->ptr" %}
7911   ins_encode(enc_copy_wide(dst, src));
7912   ins_pipe(ialu_reg_reg); // XXX
7913 %}
7914 
7915 instruct castP2X(rRegL dst, rRegP src)
7916 %{
7917   match(Set dst (CastP2X src));
7918 
7919   format %{ "movq    $dst, $src\t# ptr -> long" %}
7920   ins_encode(enc_copy_wide(dst, src));
7921   ins_pipe(ialu_reg_reg); // XXX
7922 %}
7923 
7924 
7925 // Convert oop pointer into compressed form
7926 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7927   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7928   match(Set dst (EncodeP src));
7929   effect(KILL cr);
7930   format %{ "encode_heap_oop $dst,$src" %}
7931   ins_encode %{
7932     Register s = $src$$Register;
7933     Register d = $dst$$Register;
7934     if (s != d) {
7935       __ movq(d, s);
7936     }
7937     __ encode_heap_oop(d);
7938   %}
7939   ins_pipe(ialu_reg_long);
7940 %}
7941 
7942 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7943   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7944   match(Set dst (EncodeP src));
7945   effect(KILL cr);
7946   format %{ "encode_heap_oop_not_null $dst,$src" %}
7947   ins_encode %{
7948     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7949   %}
7950   ins_pipe(ialu_reg_long);
7951 %}
7952 
7953 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7954   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7955             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7956   match(Set dst (DecodeN src));
7957   effect(KILL cr);
7958   format %{ "decode_heap_oop $dst,$src" %}
7959   ins_encode %{
7960     Register s = $src$$Register;
7961     Register d = $dst$$Register;
7962     if (s != d) {
7963       __ movq(d, s);
7964     }
7965     __ decode_heap_oop(d);
7966   %}
7967   ins_pipe(ialu_reg_long);
7968 %}
7969 
7970 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7971   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7972             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7973   match(Set dst (DecodeN src));
7974   format %{ "decode_heap_oop_not_null $dst,$src" %}
7975   ins_encode %{
7976     Register s = $src$$Register;
7977     Register d = $dst$$Register;
7978     if (s != d) {
7979       __ decode_heap_oop_not_null(d, s);
7980     } else {
7981       __ decode_heap_oop_not_null(d);
7982     }
7983   %}
7984   ins_pipe(ialu_reg_long);
7985 %}
7986 
7987 
7988 //----------Conditional Move---------------------------------------------------
7989 // Jump
7990 // dummy instruction for generating temp registers
7991 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7992   match(Jump (LShiftL switch_val shift));
7993   ins_cost(350);
7994   predicate(false);
7995   effect(TEMP dest);
7996 
7997   format %{ "leaq    $dest, table_base\n\t"
7998             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7999   ins_encode(jump_enc_offset(switch_val, shift, dest));
8000   ins_pipe(pipe_jmp);
8001   ins_pc_relative(1);
8002 %}
8003 
8004 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8005   match(Jump (AddL (LShiftL switch_val shift) offset));
8006   ins_cost(350);
8007   effect(TEMP dest);
8008 
8009   format %{ "leaq    $dest, table_base\n\t"
8010             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
8011   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
8012   ins_pipe(pipe_jmp);
8013   ins_pc_relative(1);
8014 %}
8015 
8016 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
8017   match(Jump switch_val);
8018   ins_cost(350);
8019   effect(TEMP dest);
8020 
8021   format %{ "leaq    $dest, table_base\n\t"
8022             "jmp     [$dest + $switch_val]\n\t" %}
8023   ins_encode(jump_enc(switch_val, dest));
8024   ins_pipe(pipe_jmp);
8025   ins_pc_relative(1);
8026 %}
8027 
8028 // Conditional move
8029 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
8030 %{
8031   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8032 
8033   ins_cost(200); // XXX
8034   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8035   opcode(0x0F, 0x40);
8036   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8037   ins_pipe(pipe_cmov_reg);
8038 %}
8039 
8040 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
8041   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8042 
8043   ins_cost(200); // XXX
8044   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
8045   opcode(0x0F, 0x40);
8046   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8047   ins_pipe(pipe_cmov_reg);
8048 %}
8049 
8050 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
8051   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8052   ins_cost(200);
8053   expand %{
8054     cmovI_regU(cop, cr, dst, src);
8055   %}
8056 %}
8057 
8058 // Conditional move
8059 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
8060   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8061 
8062   ins_cost(250); // XXX
8063   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8064   opcode(0x0F, 0x40);
8065   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
8066   ins_pipe(pipe_cmov_mem);
8067 %}
8068 
8069 // Conditional move
8070 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
8071 %{
8072   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8073 
8074   ins_cost(250); // XXX
8075   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
8076   opcode(0x0F, 0x40);
8077   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
8078   ins_pipe(pipe_cmov_mem);
8079 %}
8080 
8081 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
8082   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8083   ins_cost(250);
8084   expand %{
8085     cmovI_memU(cop, cr, dst, src);
8086   %}
8087 %}
8088 
8089 // Conditional move
8090 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
8091 %{
8092   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8093 
8094   ins_cost(200); // XXX
8095   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
8096   opcode(0x0F, 0x40);
8097   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8098   ins_pipe(pipe_cmov_reg);
8099 %}
8100 
8101 // Conditional move
8102 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
8103 %{
8104   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8105 
8106   ins_cost(200); // XXX
8107   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
8108   opcode(0x0F, 0x40);
8109   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8110   ins_pipe(pipe_cmov_reg);
8111 %}
8112 
8113 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
8114   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8115   ins_cost(200);
8116   expand %{
8117     cmovN_regU(cop, cr, dst, src);
8118   %}
8119 %}
8120 
8121 // Conditional move
8122 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
8123 %{
8124   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8125 
8126   ins_cost(200); // XXX
8127   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
8128   opcode(0x0F, 0x40);
8129   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8130   ins_pipe(pipe_cmov_reg);  // XXX
8131 %}
8132 
8133 // Conditional move
8134 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
8135 %{
8136   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8137 
8138   ins_cost(200); // XXX
8139   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
8140   opcode(0x0F, 0x40);
8141   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8142   ins_pipe(pipe_cmov_reg); // XXX
8143 %}
8144 
8145 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
8146   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8147   ins_cost(200);
8148   expand %{
8149     cmovP_regU(cop, cr, dst, src);
8150   %}
8151 %}
8152 
8153 // DISABLED: Requires the ADLC to emit a bottom_type call that
8154 // correctly meets the two pointer arguments; one is an incoming
8155 // register but the other is a memory operand.  ALSO appears to
8156 // be buggy with implicit null checks.
8157 //
8158 //// Conditional move
8159 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
8160 //%{
8161 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8162 //  ins_cost(250);
8163 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8164 //  opcode(0x0F,0x40);
8165 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
8166 //  ins_pipe( pipe_cmov_mem );
8167 //%}
8168 //
8169 //// Conditional move
8170 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
8171 //%{
8172 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8173 //  ins_cost(250);
8174 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8175 //  opcode(0x0F,0x40);
8176 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
8177 //  ins_pipe( pipe_cmov_mem );
8178 //%}
8179 
8180 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
8181 %{
8182   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8183 
8184   ins_cost(200); // XXX
8185   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
8186   opcode(0x0F, 0x40);
8187   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8188   ins_pipe(pipe_cmov_reg);  // XXX
8189 %}
8190 
8191 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
8192 %{
8193   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8194 
8195   ins_cost(200); // XXX
8196   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
8197   opcode(0x0F, 0x40);
8198   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8199   ins_pipe(pipe_cmov_mem);  // XXX
8200 %}
8201 
8202 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
8203 %{
8204   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8205 
8206   ins_cost(200); // XXX
8207   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8208   opcode(0x0F, 0x40);
8209   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8210   ins_pipe(pipe_cmov_reg); // XXX
8211 %}
8212 
8213 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
8214   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8215   ins_cost(200);
8216   expand %{
8217     cmovL_regU(cop, cr, dst, src);
8218   %}
8219 %}
8220 
8221 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
8222 %{
8223   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8224 
8225   ins_cost(200); // XXX
8226   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8227   opcode(0x0F, 0x40);
8228   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8229   ins_pipe(pipe_cmov_mem); // XXX
8230 %}
8231 
8232 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
8233   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8234   ins_cost(200);
8235   expand %{
8236     cmovL_memU(cop, cr, dst, src);
8237   %}
8238 %}
8239 
8240 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
8241 %{
8242   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8243 
8244   ins_cost(200); // XXX
8245   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8246             "movss     $dst, $src\n"
8247     "skip:" %}
8248   ins_encode(enc_cmovf_branch(cop, dst, src));
8249   ins_pipe(pipe_slow);
8250 %}
8251 
8252 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
8253 // %{
8254 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
8255 
8256 //   ins_cost(200); // XXX
8257 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8258 //             "movss     $dst, $src\n"
8259 //     "skip:" %}
8260 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
8261 //   ins_pipe(pipe_slow);
8262 // %}
8263 
8264 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
8265 %{
8266   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8267 
8268   ins_cost(200); // XXX
8269   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
8270             "movss     $dst, $src\n"
8271     "skip:" %}
8272   ins_encode(enc_cmovf_branch(cop, dst, src));
8273   ins_pipe(pipe_slow);
8274 %}
8275 
8276 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
8277   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8278   ins_cost(200);
8279   expand %{
8280     cmovF_regU(cop, cr, dst, src);
8281   %}
8282 %}
8283 
8284 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
8285 %{
8286   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8287 
8288   ins_cost(200); // XXX
8289   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8290             "movsd     $dst, $src\n"
8291     "skip:" %}
8292   ins_encode(enc_cmovd_branch(cop, dst, src));
8293   ins_pipe(pipe_slow);
8294 %}
8295 
8296 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8297 %{
8298   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8299 
8300   ins_cost(200); // XXX
8301   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8302             "movsd     $dst, $src\n"
8303     "skip:" %}
8304   ins_encode(enc_cmovd_branch(cop, dst, src));
8305   ins_pipe(pipe_slow);
8306 %}
8307 
8308 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8309   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8310   ins_cost(200);
8311   expand %{
8312     cmovD_regU(cop, cr, dst, src);
8313   %}
8314 %}
8315 
8316 //----------Arithmetic Instructions--------------------------------------------
8317 //----------Addition Instructions----------------------------------------------
8318 
8319 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8320 %{
8321   match(Set dst (AddI dst src));
8322   effect(KILL cr);
8323 
8324   format %{ "addl    $dst, $src\t# int" %}
8325   opcode(0x03);
8326   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8327   ins_pipe(ialu_reg_reg);
8328 %}
8329 
8330 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8331 %{
8332   match(Set dst (AddI dst src));
8333   effect(KILL cr);
8334 
8335   format %{ "addl    $dst, $src\t# int" %}
8336   opcode(0x81, 0x00); /* /0 id */
8337   ins_encode(OpcSErm(dst, src), Con8or32(src));
8338   ins_pipe( ialu_reg );
8339 %}
8340 
8341 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8342 %{
8343   match(Set dst (AddI dst (LoadI src)));
8344   effect(KILL cr);
8345 
8346   ins_cost(125); // XXX
8347   format %{ "addl    $dst, $src\t# int" %}
8348   opcode(0x03);
8349   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8350   ins_pipe(ialu_reg_mem);
8351 %}
8352 
8353 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8354 %{
8355   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8356   effect(KILL cr);
8357 
8358   ins_cost(150); // XXX
8359   format %{ "addl    $dst, $src\t# int" %}
8360   opcode(0x01); /* Opcode 01 /r */
8361   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8362   ins_pipe(ialu_mem_reg);
8363 %}
8364 
8365 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8366 %{
8367   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8368   effect(KILL cr);
8369 
8370   ins_cost(125); // XXX
8371   format %{ "addl    $dst, $src\t# int" %}
8372   opcode(0x81); /* Opcode 81 /0 id */
8373   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8374   ins_pipe(ialu_mem_imm);
8375 %}
8376 
8377 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8378 %{
8379   predicate(UseIncDec);
8380   match(Set dst (AddI dst src));
8381   effect(KILL cr);
8382 
8383   format %{ "incl    $dst\t# int" %}
8384   opcode(0xFF, 0x00); // FF /0
8385   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8386   ins_pipe(ialu_reg);
8387 %}
8388 
8389 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8390 %{
8391   predicate(UseIncDec);
8392   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8393   effect(KILL cr);
8394 
8395   ins_cost(125); // XXX
8396   format %{ "incl    $dst\t# int" %}
8397   opcode(0xFF); /* Opcode FF /0 */
8398   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8399   ins_pipe(ialu_mem_imm);
8400 %}
8401 
8402 // XXX why does that use AddI
8403 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8404 %{
8405   predicate(UseIncDec);
8406   match(Set dst (AddI dst src));
8407   effect(KILL cr);
8408 
8409   format %{ "decl    $dst\t# int" %}
8410   opcode(0xFF, 0x01); // FF /1
8411   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8412   ins_pipe(ialu_reg);
8413 %}
8414 
8415 // XXX why does that use AddI
8416 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8417 %{
8418   predicate(UseIncDec);
8419   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8420   effect(KILL cr);
8421 
8422   ins_cost(125); // XXX
8423   format %{ "decl    $dst\t# int" %}
8424   opcode(0xFF); /* Opcode FF /1 */
8425   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8426   ins_pipe(ialu_mem_imm);
8427 %}
8428 
8429 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8430 %{
8431   match(Set dst (AddI src0 src1));
8432 
8433   ins_cost(110);
8434   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8435   opcode(0x8D); /* 0x8D /r */
8436   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8437   ins_pipe(ialu_reg_reg);
8438 %}
8439 
8440 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8441 %{
8442   match(Set dst (AddL dst src));
8443   effect(KILL cr);
8444 
8445   format %{ "addq    $dst, $src\t# long" %}
8446   opcode(0x03);
8447   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8448   ins_pipe(ialu_reg_reg);
8449 %}
8450 
8451 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8452 %{
8453   match(Set dst (AddL dst src));
8454   effect(KILL cr);
8455 
8456   format %{ "addq    $dst, $src\t# long" %}
8457   opcode(0x81, 0x00); /* /0 id */
8458   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8459   ins_pipe( ialu_reg );
8460 %}
8461 
8462 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8463 %{
8464   match(Set dst (AddL dst (LoadL src)));
8465   effect(KILL cr);
8466 
8467   ins_cost(125); // XXX
8468   format %{ "addq    $dst, $src\t# long" %}
8469   opcode(0x03);
8470   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8471   ins_pipe(ialu_reg_mem);
8472 %}
8473 
8474 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8475 %{
8476   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8477   effect(KILL cr);
8478 
8479   ins_cost(150); // XXX
8480   format %{ "addq    $dst, $src\t# long" %}
8481   opcode(0x01); /* Opcode 01 /r */
8482   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8483   ins_pipe(ialu_mem_reg);
8484 %}
8485 
8486 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8487 %{
8488   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8489   effect(KILL cr);
8490 
8491   ins_cost(125); // XXX
8492   format %{ "addq    $dst, $src\t# long" %}
8493   opcode(0x81); /* Opcode 81 /0 id */
8494   ins_encode(REX_mem_wide(dst),
8495              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8496   ins_pipe(ialu_mem_imm);
8497 %}
8498 
8499 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8500 %{
8501   predicate(UseIncDec);
8502   match(Set dst (AddL dst src));
8503   effect(KILL cr);
8504 
8505   format %{ "incq    $dst\t# long" %}
8506   opcode(0xFF, 0x00); // FF /0
8507   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8508   ins_pipe(ialu_reg);
8509 %}
8510 
8511 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8512 %{
8513   predicate(UseIncDec);
8514   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8515   effect(KILL cr);
8516 
8517   ins_cost(125); // XXX
8518   format %{ "incq    $dst\t# long" %}
8519   opcode(0xFF); /* Opcode FF /0 */
8520   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8521   ins_pipe(ialu_mem_imm);
8522 %}
8523 
8524 // XXX why does that use AddL
8525 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8526 %{
8527   predicate(UseIncDec);
8528   match(Set dst (AddL dst src));
8529   effect(KILL cr);
8530 
8531   format %{ "decq    $dst\t# long" %}
8532   opcode(0xFF, 0x01); // FF /1
8533   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8534   ins_pipe(ialu_reg);
8535 %}
8536 
8537 // XXX why does that use AddL
8538 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8539 %{
8540   predicate(UseIncDec);
8541   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8542   effect(KILL cr);
8543 
8544   ins_cost(125); // XXX
8545   format %{ "decq    $dst\t# long" %}
8546   opcode(0xFF); /* Opcode FF /1 */
8547   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8548   ins_pipe(ialu_mem_imm);
8549 %}
8550 
8551 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8552 %{
8553   match(Set dst (AddL src0 src1));
8554 
8555   ins_cost(110);
8556   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8557   opcode(0x8D); /* 0x8D /r */
8558   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8559   ins_pipe(ialu_reg_reg);
8560 %}
8561 
8562 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8563 %{
8564   match(Set dst (AddP dst src));
8565   effect(KILL cr);
8566 
8567   format %{ "addq    $dst, $src\t# ptr" %}
8568   opcode(0x03);
8569   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8570   ins_pipe(ialu_reg_reg);
8571 %}
8572 
8573 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8574 %{
8575   match(Set dst (AddP dst src));
8576   effect(KILL cr);
8577 
8578   format %{ "addq    $dst, $src\t# ptr" %}
8579   opcode(0x81, 0x00); /* /0 id */
8580   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8581   ins_pipe( ialu_reg );
8582 %}
8583 
8584 // XXX addP mem ops ????
8585 
8586 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8587 %{
8588   match(Set dst (AddP src0 src1));
8589 
8590   ins_cost(110);
8591   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8592   opcode(0x8D); /* 0x8D /r */
8593   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8594   ins_pipe(ialu_reg_reg);
8595 %}
8596 
8597 instruct checkCastPP(rRegP dst)
8598 %{
8599   match(Set dst (CheckCastPP dst));
8600 
8601   size(0);
8602   format %{ "# checkcastPP of $dst" %}
8603   ins_encode(/* empty encoding */);
8604   ins_pipe(empty);
8605 %}
8606 
8607 instruct castPP(rRegP dst)
8608 %{
8609   match(Set dst (CastPP dst));
8610 
8611   size(0);
8612   format %{ "# castPP of $dst" %}
8613   ins_encode(/* empty encoding */);
8614   ins_pipe(empty);
8615 %}
8616 
8617 instruct castII(rRegI dst)
8618 %{
8619   match(Set dst (CastII dst));
8620 
8621   size(0);
8622   format %{ "# castII of $dst" %}
8623   ins_encode(/* empty encoding */);
8624   ins_cost(0);
8625   ins_pipe(empty);
8626 %}
8627 
8628 // LoadP-locked same as a regular LoadP when used with compare-swap
8629 instruct loadPLocked(rRegP dst, memory mem)
8630 %{
8631   match(Set dst (LoadPLocked mem));
8632 
8633   ins_cost(125); // XXX
8634   format %{ "movq    $dst, $mem\t# ptr locked" %}
8635   opcode(0x8B);
8636   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8637   ins_pipe(ialu_reg_mem); // XXX
8638 %}
8639 
8640 // LoadL-locked - same as a regular LoadL when used with compare-swap
8641 instruct loadLLocked(rRegL dst, memory mem)
8642 %{
8643   match(Set dst (LoadLLocked mem));
8644 
8645   ins_cost(125); // XXX
8646   format %{ "movq    $dst, $mem\t# long locked" %}
8647   opcode(0x8B);
8648   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8649   ins_pipe(ialu_reg_mem); // XXX
8650 %}
8651 
8652 // Conditional-store of the updated heap-top.
8653 // Used during allocation of the shared heap.
8654 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8655 
8656 instruct storePConditional(memory heap_top_ptr,
8657                            rax_RegP oldval, rRegP newval,
8658                            rFlagsReg cr)
8659 %{
8660   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8661  
8662   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8663             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8664   opcode(0x0F, 0xB1);
8665   ins_encode(lock_prefix,
8666              REX_reg_mem_wide(newval, heap_top_ptr),
8667              OpcP, OpcS,
8668              reg_mem(newval, heap_top_ptr));
8669   ins_pipe(pipe_cmpxchg);
8670 %}
8671 
8672 // Conditional-store of an int value.
8673 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8674 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8675 %{
8676   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8677   effect(KILL oldval);
8678 
8679   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8680   opcode(0x0F, 0xB1);
8681   ins_encode(lock_prefix,
8682              REX_reg_mem(newval, mem),
8683              OpcP, OpcS,
8684              reg_mem(newval, mem));
8685   ins_pipe(pipe_cmpxchg);
8686 %}
8687 
8688 // Conditional-store of a long value.
8689 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8690 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8691 %{
8692   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8693   effect(KILL oldval);
8694 
8695   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8696   opcode(0x0F, 0xB1);
8697   ins_encode(lock_prefix,
8698              REX_reg_mem_wide(newval, mem),
8699              OpcP, OpcS,
8700              reg_mem(newval, mem));
8701   ins_pipe(pipe_cmpxchg);
8702 %}
8703 
8704 
8705 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8706 instruct compareAndSwapP(rRegI res,
8707                          memory mem_ptr,
8708                          rax_RegP oldval, rRegP newval,
8709                          rFlagsReg cr)
8710 %{
8711   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8712   effect(KILL cr, KILL oldval);
8713 
8714   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8715             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8716             "sete    $res\n\t"
8717             "movzbl  $res, $res" %}
8718   opcode(0x0F, 0xB1);
8719   ins_encode(lock_prefix,
8720              REX_reg_mem_wide(newval, mem_ptr),
8721              OpcP, OpcS,
8722              reg_mem(newval, mem_ptr),
8723              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8724              REX_reg_breg(res, res), // movzbl
8725              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8726   ins_pipe( pipe_cmpxchg );
8727 %}
8728 
8729 instruct compareAndSwapL(rRegI res,
8730                          memory mem_ptr,
8731                          rax_RegL oldval, rRegL newval,
8732                          rFlagsReg cr)
8733 %{
8734   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8735   effect(KILL cr, KILL oldval);
8736 
8737   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8738             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8739             "sete    $res\n\t"
8740             "movzbl  $res, $res" %}
8741   opcode(0x0F, 0xB1);
8742   ins_encode(lock_prefix,
8743              REX_reg_mem_wide(newval, mem_ptr),
8744              OpcP, OpcS,
8745              reg_mem(newval, mem_ptr),
8746              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8747              REX_reg_breg(res, res), // movzbl
8748              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8749   ins_pipe( pipe_cmpxchg );
8750 %}
8751 
8752 instruct compareAndSwapI(rRegI res,
8753                          memory mem_ptr,
8754                          rax_RegI oldval, rRegI newval,
8755                          rFlagsReg cr)
8756 %{
8757   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8758   effect(KILL cr, KILL oldval);
8759 
8760   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8761             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8762             "sete    $res\n\t"
8763             "movzbl  $res, $res" %}
8764   opcode(0x0F, 0xB1);
8765   ins_encode(lock_prefix,
8766              REX_reg_mem(newval, mem_ptr),
8767              OpcP, OpcS,
8768              reg_mem(newval, mem_ptr),
8769              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8770              REX_reg_breg(res, res), // movzbl
8771              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8772   ins_pipe( pipe_cmpxchg );
8773 %}
8774 
8775 
8776 instruct compareAndSwapN(rRegI res,
8777                           memory mem_ptr,
8778                           rax_RegN oldval, rRegN newval,
8779                           rFlagsReg cr) %{
8780   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8781   effect(KILL cr, KILL oldval);
8782 
8783   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8784             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8785             "sete    $res\n\t"
8786             "movzbl  $res, $res" %}
8787   opcode(0x0F, 0xB1);
8788   ins_encode(lock_prefix,
8789              REX_reg_mem(newval, mem_ptr),
8790              OpcP, OpcS,
8791              reg_mem(newval, mem_ptr),
8792              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8793              REX_reg_breg(res, res), // movzbl
8794              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8795   ins_pipe( pipe_cmpxchg );
8796 %}
8797 
8798 //----------Subtraction Instructions-------------------------------------------
8799 
8800 // Integer Subtraction Instructions
8801 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8802 %{
8803   match(Set dst (SubI dst src));
8804   effect(KILL cr);
8805 
8806   format %{ "subl    $dst, $src\t# int" %}
8807   opcode(0x2B);
8808   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8809   ins_pipe(ialu_reg_reg);
8810 %}
8811 
8812 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8813 %{
8814   match(Set dst (SubI dst src));
8815   effect(KILL cr);
8816 
8817   format %{ "subl    $dst, $src\t# int" %}
8818   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8819   ins_encode(OpcSErm(dst, src), Con8or32(src));
8820   ins_pipe(ialu_reg);
8821 %}
8822 
8823 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8824 %{
8825   match(Set dst (SubI dst (LoadI src)));
8826   effect(KILL cr);
8827 
8828   ins_cost(125);
8829   format %{ "subl    $dst, $src\t# int" %}
8830   opcode(0x2B);
8831   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8832   ins_pipe(ialu_reg_mem);
8833 %}
8834 
8835 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8836 %{
8837   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8838   effect(KILL cr);
8839 
8840   ins_cost(150);
8841   format %{ "subl    $dst, $src\t# int" %}
8842   opcode(0x29); /* Opcode 29 /r */
8843   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8844   ins_pipe(ialu_mem_reg);
8845 %}
8846 
8847 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8848 %{
8849   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8850   effect(KILL cr);
8851 
8852   ins_cost(125); // XXX
8853   format %{ "subl    $dst, $src\t# int" %}
8854   opcode(0x81); /* Opcode 81 /5 id */
8855   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8856   ins_pipe(ialu_mem_imm);
8857 %}
8858 
8859 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8860 %{
8861   match(Set dst (SubL dst src));
8862   effect(KILL cr);
8863 
8864   format %{ "subq    $dst, $src\t# long" %}
8865   opcode(0x2B);
8866   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8867   ins_pipe(ialu_reg_reg);
8868 %}
8869 
8870 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8871 %{
8872   match(Set dst (SubL dst src));
8873   effect(KILL cr);
8874 
8875   format %{ "subq    $dst, $src\t# long" %}
8876   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8877   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8878   ins_pipe(ialu_reg);
8879 %}
8880 
8881 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8882 %{
8883   match(Set dst (SubL dst (LoadL src)));
8884   effect(KILL cr);
8885 
8886   ins_cost(125);
8887   format %{ "subq    $dst, $src\t# long" %}
8888   opcode(0x2B);
8889   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8890   ins_pipe(ialu_reg_mem);
8891 %}
8892 
8893 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8894 %{
8895   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8896   effect(KILL cr);
8897 
8898   ins_cost(150);
8899   format %{ "subq    $dst, $src\t# long" %}
8900   opcode(0x29); /* Opcode 29 /r */
8901   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8902   ins_pipe(ialu_mem_reg);
8903 %}
8904 
8905 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8906 %{
8907   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8908   effect(KILL cr);
8909 
8910   ins_cost(125); // XXX
8911   format %{ "subq    $dst, $src\t# long" %}
8912   opcode(0x81); /* Opcode 81 /5 id */
8913   ins_encode(REX_mem_wide(dst),
8914              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8915   ins_pipe(ialu_mem_imm);
8916 %}
8917 
8918 // Subtract from a pointer
8919 // XXX hmpf???
8920 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8921 %{
8922   match(Set dst (AddP dst (SubI zero src)));
8923   effect(KILL cr);
8924 
8925   format %{ "subq    $dst, $src\t# ptr - int" %}
8926   opcode(0x2B);
8927   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8928   ins_pipe(ialu_reg_reg);
8929 %}
8930 
8931 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8932 %{
8933   match(Set dst (SubI zero dst));
8934   effect(KILL cr);
8935 
8936   format %{ "negl    $dst\t# int" %}
8937   opcode(0xF7, 0x03);  // Opcode F7 /3
8938   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8939   ins_pipe(ialu_reg);
8940 %}
8941 
8942 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8943 %{
8944   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8945   effect(KILL cr);
8946 
8947   format %{ "negl    $dst\t# int" %}
8948   opcode(0xF7, 0x03);  // Opcode F7 /3
8949   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8950   ins_pipe(ialu_reg);
8951 %}
8952 
8953 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8954 %{
8955   match(Set dst (SubL zero dst));
8956   effect(KILL cr);
8957 
8958   format %{ "negq    $dst\t# long" %}
8959   opcode(0xF7, 0x03);  // Opcode F7 /3
8960   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8961   ins_pipe(ialu_reg);
8962 %}
8963 
8964 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8965 %{
8966   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8967   effect(KILL cr);
8968 
8969   format %{ "negq    $dst\t# long" %}
8970   opcode(0xF7, 0x03);  // Opcode F7 /3
8971   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8972   ins_pipe(ialu_reg);
8973 %}
8974 
8975 
8976 //----------Multiplication/Division Instructions-------------------------------
8977 // Integer Multiplication Instructions
8978 // Multiply Register
8979 
8980 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8981 %{
8982   match(Set dst (MulI dst src));
8983   effect(KILL cr);
8984 
8985   ins_cost(300);
8986   format %{ "imull   $dst, $src\t# int" %}
8987   opcode(0x0F, 0xAF);
8988   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8989   ins_pipe(ialu_reg_reg_alu0);
8990 %}
8991 
8992 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8993 %{
8994   match(Set dst (MulI src imm));
8995   effect(KILL cr);
8996 
8997   ins_cost(300);
8998   format %{ "imull   $dst, $src, $imm\t# int" %}
8999   opcode(0x69); /* 69 /r id */
9000   ins_encode(REX_reg_reg(dst, src),
9001              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
9002   ins_pipe(ialu_reg_reg_alu0);
9003 %}
9004 
9005 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
9006 %{
9007   match(Set dst (MulI dst (LoadI src)));
9008   effect(KILL cr);
9009 
9010   ins_cost(350);
9011   format %{ "imull   $dst, $src\t# int" %}
9012   opcode(0x0F, 0xAF);
9013   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
9014   ins_pipe(ialu_reg_mem_alu0);
9015 %}
9016 
9017 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
9018 %{
9019   match(Set dst (MulI (LoadI src) imm));
9020   effect(KILL cr);
9021 
9022   ins_cost(300);
9023   format %{ "imull   $dst, $src, $imm\t# int" %}
9024   opcode(0x69); /* 69 /r id */
9025   ins_encode(REX_reg_mem(dst, src),
9026              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
9027   ins_pipe(ialu_reg_mem_alu0);
9028 %}
9029 
9030 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9031 %{
9032   match(Set dst (MulL dst src));
9033   effect(KILL cr);
9034 
9035   ins_cost(300);
9036   format %{ "imulq   $dst, $src\t# long" %}
9037   opcode(0x0F, 0xAF);
9038   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
9039   ins_pipe(ialu_reg_reg_alu0);
9040 %}
9041 
9042 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
9043 %{
9044   match(Set dst (MulL src imm));
9045   effect(KILL cr);
9046 
9047   ins_cost(300);
9048   format %{ "imulq   $dst, $src, $imm\t# long" %}
9049   opcode(0x69); /* 69 /r id */
9050   ins_encode(REX_reg_reg_wide(dst, src),
9051              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
9052   ins_pipe(ialu_reg_reg_alu0);
9053 %}
9054 
9055 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
9056 %{
9057   match(Set dst (MulL dst (LoadL src)));
9058   effect(KILL cr);
9059 
9060   ins_cost(350);
9061   format %{ "imulq   $dst, $src\t# long" %}
9062   opcode(0x0F, 0xAF);
9063   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
9064   ins_pipe(ialu_reg_mem_alu0);
9065 %}
9066 
9067 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
9068 %{
9069   match(Set dst (MulL (LoadL src) imm));
9070   effect(KILL cr);
9071 
9072   ins_cost(300);
9073   format %{ "imulq   $dst, $src, $imm\t# long" %}
9074   opcode(0x69); /* 69 /r id */
9075   ins_encode(REX_reg_mem_wide(dst, src),
9076              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
9077   ins_pipe(ialu_reg_mem_alu0);
9078 %}
9079 
9080 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
9081 %{
9082   match(Set dst (MulHiL src rax));
9083   effect(USE_KILL rax, KILL cr);
9084 
9085   ins_cost(300);
9086   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
9087   opcode(0xF7, 0x5); /* Opcode F7 /5 */
9088   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
9089   ins_pipe(ialu_reg_reg_alu0);
9090 %}
9091 
9092 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
9093                    rFlagsReg cr)
9094 %{
9095   match(Set rax (DivI rax div));
9096   effect(KILL rdx, KILL cr);
9097 
9098   ins_cost(30*100+10*100); // XXX
9099   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
9100             "jne,s   normal\n\t"
9101             "xorl    rdx, rdx\n\t"
9102             "cmpl    $div, -1\n\t"
9103             "je,s    done\n"
9104     "normal: cdql\n\t"
9105             "idivl   $div\n"
9106     "done:"        %}
9107   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9108   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9109   ins_pipe(ialu_reg_reg_alu0);
9110 %}
9111 
9112 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
9113                    rFlagsReg cr)
9114 %{
9115   match(Set rax (DivL rax div));
9116   effect(KILL rdx, KILL cr);
9117 
9118   ins_cost(30*100+10*100); // XXX
9119   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
9120             "cmpq    rax, rdx\n\t"
9121             "jne,s   normal\n\t"
9122             "xorl    rdx, rdx\n\t"
9123             "cmpq    $div, -1\n\t"
9124             "je,s    done\n"
9125     "normal: cdqq\n\t"
9126             "idivq   $div\n"
9127     "done:"        %}
9128   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9129   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9130   ins_pipe(ialu_reg_reg_alu0);
9131 %}
9132 
9133 // Integer DIVMOD with Register, both quotient and mod results
9134 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
9135                              rFlagsReg cr)
9136 %{
9137   match(DivModI rax div);
9138   effect(KILL cr);
9139 
9140   ins_cost(30*100+10*100); // XXX
9141   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
9142             "jne,s   normal\n\t"
9143             "xorl    rdx, rdx\n\t"
9144             "cmpl    $div, -1\n\t"
9145             "je,s    done\n"
9146     "normal: cdql\n\t"
9147             "idivl   $div\n"
9148     "done:"        %}
9149   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9150   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9151   ins_pipe(pipe_slow);
9152 %}
9153 
9154 // Long DIVMOD with Register, both quotient and mod results
9155 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
9156                              rFlagsReg cr)
9157 %{
9158   match(DivModL rax div);
9159   effect(KILL cr);
9160 
9161   ins_cost(30*100+10*100); // XXX
9162   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
9163             "cmpq    rax, rdx\n\t"
9164             "jne,s   normal\n\t"
9165             "xorl    rdx, rdx\n\t"
9166             "cmpq    $div, -1\n\t"
9167             "je,s    done\n"
9168     "normal: cdqq\n\t"
9169             "idivq   $div\n"
9170     "done:"        %}
9171   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9172   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9173   ins_pipe(pipe_slow);
9174 %}
9175 
9176 //----------- DivL-By-Constant-Expansions--------------------------------------
9177 // DivI cases are handled by the compiler
9178 
9179 // Magic constant, reciprocal of 10
9180 instruct loadConL_0x6666666666666667(rRegL dst)
9181 %{
9182   effect(DEF dst);
9183 
9184   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
9185   ins_encode(load_immL(dst, 0x6666666666666667));
9186   ins_pipe(ialu_reg);
9187 %}
9188 
9189 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
9190 %{
9191   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
9192 
9193   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
9194   opcode(0xF7, 0x5); /* Opcode F7 /5 */
9195   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
9196   ins_pipe(ialu_reg_reg_alu0);
9197 %}
9198 
9199 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
9200 %{
9201   effect(USE_DEF dst, KILL cr);
9202 
9203   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
9204   opcode(0xC1, 0x7); /* C1 /7 ib */
9205   ins_encode(reg_opc_imm_wide(dst, 0x3F));
9206   ins_pipe(ialu_reg);
9207 %}
9208 
9209 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
9210 %{
9211   effect(USE_DEF dst, KILL cr);
9212 
9213   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
9214   opcode(0xC1, 0x7); /* C1 /7 ib */
9215   ins_encode(reg_opc_imm_wide(dst, 0x2));
9216   ins_pipe(ialu_reg);
9217 %}
9218 
9219 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
9220 %{
9221   match(Set dst (DivL src div));
9222 
9223   ins_cost((5+8)*100);
9224   expand %{
9225     rax_RegL rax;                     // Killed temp
9226     rFlagsReg cr;                     // Killed
9227     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
9228     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
9229     sarL_rReg_63(src, cr);            // sarq  src, 63
9230     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
9231     subL_rReg(dst, src, cr);          // subl  rdx, src
9232   %}
9233 %}
9234 
9235 //-----------------------------------------------------------------------------
9236 
9237 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
9238                    rFlagsReg cr)
9239 %{
9240   match(Set rdx (ModI rax div));
9241   effect(KILL rax, KILL cr);
9242 
9243   ins_cost(300); // XXX
9244   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
9245             "jne,s   normal\n\t"
9246             "xorl    rdx, rdx\n\t"
9247             "cmpl    $div, -1\n\t"
9248             "je,s    done\n"
9249     "normal: cdql\n\t"
9250             "idivl   $div\n"
9251     "done:"        %}
9252   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9253   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9254   ins_pipe(ialu_reg_reg_alu0);
9255 %}
9256 
9257 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
9258                    rFlagsReg cr)
9259 %{
9260   match(Set rdx (ModL rax div));
9261   effect(KILL rax, KILL cr);
9262 
9263   ins_cost(300); // XXX
9264   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
9265             "cmpq    rax, rdx\n\t"
9266             "jne,s   normal\n\t"
9267             "xorl    rdx, rdx\n\t"
9268             "cmpq    $div, -1\n\t"
9269             "je,s    done\n"
9270     "normal: cdqq\n\t"
9271             "idivq   $div\n"
9272     "done:"        %}
9273   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9274   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9275   ins_pipe(ialu_reg_reg_alu0);
9276 %}
9277 
9278 // Integer Shift Instructions
9279 // Shift Left by one
9280 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9281 %{
9282   match(Set dst (LShiftI dst shift));
9283   effect(KILL cr);
9284 
9285   format %{ "sall    $dst, $shift" %}
9286   opcode(0xD1, 0x4); /* D1 /4 */
9287   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9288   ins_pipe(ialu_reg);
9289 %}
9290 
9291 // Shift Left by one
9292 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9293 %{
9294   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9295   effect(KILL cr);
9296 
9297   format %{ "sall    $dst, $shift\t" %}
9298   opcode(0xD1, 0x4); /* D1 /4 */
9299   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9300   ins_pipe(ialu_mem_imm);
9301 %}
9302 
9303 // Shift Left by 8-bit immediate
9304 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9305 %{
9306   match(Set dst (LShiftI dst shift));
9307   effect(KILL cr);
9308 
9309   format %{ "sall    $dst, $shift" %}
9310   opcode(0xC1, 0x4); /* C1 /4 ib */
9311   ins_encode(reg_opc_imm(dst, shift));
9312   ins_pipe(ialu_reg);
9313 %}
9314 
9315 // Shift Left by 8-bit immediate
9316 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9317 %{
9318   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9319   effect(KILL cr);
9320 
9321   format %{ "sall    $dst, $shift" %}
9322   opcode(0xC1, 0x4); /* C1 /4 ib */
9323   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9324   ins_pipe(ialu_mem_imm);
9325 %}
9326 
9327 // Shift Left by variable
9328 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9329 %{
9330   match(Set dst (LShiftI dst shift));
9331   effect(KILL cr);
9332 
9333   format %{ "sall    $dst, $shift" %}
9334   opcode(0xD3, 0x4); /* D3 /4 */
9335   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9336   ins_pipe(ialu_reg_reg);
9337 %}
9338 
9339 // Shift Left by variable
9340 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9341 %{
9342   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9343   effect(KILL cr);
9344 
9345   format %{ "sall    $dst, $shift" %}
9346   opcode(0xD3, 0x4); /* D3 /4 */
9347   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9348   ins_pipe(ialu_mem_reg);
9349 %}
9350 
9351 // Arithmetic shift right by one
9352 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9353 %{
9354   match(Set dst (RShiftI dst shift));
9355   effect(KILL cr);
9356 
9357   format %{ "sarl    $dst, $shift" %}
9358   opcode(0xD1, 0x7); /* D1 /7 */
9359   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9360   ins_pipe(ialu_reg);
9361 %}
9362 
9363 // Arithmetic shift right by one
9364 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9365 %{
9366   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9367   effect(KILL cr);
9368 
9369   format %{ "sarl    $dst, $shift" %}
9370   opcode(0xD1, 0x7); /* D1 /7 */
9371   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9372   ins_pipe(ialu_mem_imm);
9373 %}
9374 
9375 // Arithmetic Shift Right by 8-bit immediate
9376 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9377 %{
9378   match(Set dst (RShiftI dst shift));
9379   effect(KILL cr);
9380 
9381   format %{ "sarl    $dst, $shift" %}
9382   opcode(0xC1, 0x7); /* C1 /7 ib */
9383   ins_encode(reg_opc_imm(dst, shift));
9384   ins_pipe(ialu_mem_imm);
9385 %}
9386 
9387 // Arithmetic Shift Right by 8-bit immediate
9388 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9389 %{
9390   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9391   effect(KILL cr);
9392 
9393   format %{ "sarl    $dst, $shift" %}
9394   opcode(0xC1, 0x7); /* C1 /7 ib */
9395   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9396   ins_pipe(ialu_mem_imm);
9397 %}
9398 
9399 // Arithmetic Shift Right by variable
9400 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9401 %{
9402   match(Set dst (RShiftI dst shift));
9403   effect(KILL cr);
9404 
9405   format %{ "sarl    $dst, $shift" %}
9406   opcode(0xD3, 0x7); /* D3 /7 */
9407   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9408   ins_pipe(ialu_reg_reg);
9409 %}
9410 
9411 // Arithmetic Shift Right by variable
9412 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9413 %{
9414   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9415   effect(KILL cr);
9416 
9417   format %{ "sarl    $dst, $shift" %}
9418   opcode(0xD3, 0x7); /* D3 /7 */
9419   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9420   ins_pipe(ialu_mem_reg);
9421 %}
9422 
9423 // Logical shift right by one
9424 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9425 %{
9426   match(Set dst (URShiftI dst shift));
9427   effect(KILL cr);
9428 
9429   format %{ "shrl    $dst, $shift" %}
9430   opcode(0xD1, 0x5); /* D1 /5 */
9431   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9432   ins_pipe(ialu_reg);
9433 %}
9434 
9435 // Logical shift right by one
9436 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9437 %{
9438   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9439   effect(KILL cr);
9440 
9441   format %{ "shrl    $dst, $shift" %}
9442   opcode(0xD1, 0x5); /* D1 /5 */
9443   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9444   ins_pipe(ialu_mem_imm);
9445 %}
9446 
9447 // Logical Shift Right by 8-bit immediate
9448 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9449 %{
9450   match(Set dst (URShiftI dst shift));
9451   effect(KILL cr);
9452 
9453   format %{ "shrl    $dst, $shift" %}
9454   opcode(0xC1, 0x5); /* C1 /5 ib */
9455   ins_encode(reg_opc_imm(dst, shift));
9456   ins_pipe(ialu_reg);
9457 %}
9458 
9459 // Logical Shift Right by 8-bit immediate
9460 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9461 %{
9462   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9463   effect(KILL cr);
9464 
9465   format %{ "shrl    $dst, $shift" %}
9466   opcode(0xC1, 0x5); /* C1 /5 ib */
9467   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9468   ins_pipe(ialu_mem_imm);
9469 %}
9470 
9471 // Logical Shift Right by variable
9472 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9473 %{
9474   match(Set dst (URShiftI dst shift));
9475   effect(KILL cr);
9476 
9477   format %{ "shrl    $dst, $shift" %}
9478   opcode(0xD3, 0x5); /* D3 /5 */
9479   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9480   ins_pipe(ialu_reg_reg);
9481 %}
9482 
9483 // Logical Shift Right by variable
9484 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9485 %{
9486   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9487   effect(KILL cr);
9488 
9489   format %{ "shrl    $dst, $shift" %}
9490   opcode(0xD3, 0x5); /* D3 /5 */
9491   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9492   ins_pipe(ialu_mem_reg);
9493 %}
9494 
9495 // Long Shift Instructions
9496 // Shift Left by one
9497 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9498 %{
9499   match(Set dst (LShiftL dst shift));
9500   effect(KILL cr);
9501 
9502   format %{ "salq    $dst, $shift" %}
9503   opcode(0xD1, 0x4); /* D1 /4 */
9504   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9505   ins_pipe(ialu_reg);
9506 %}
9507 
9508 // Shift Left by one
9509 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9510 %{
9511   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9512   effect(KILL cr);
9513 
9514   format %{ "salq    $dst, $shift" %}
9515   opcode(0xD1, 0x4); /* D1 /4 */
9516   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9517   ins_pipe(ialu_mem_imm);
9518 %}
9519 
9520 // Shift Left by 8-bit immediate
9521 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9522 %{
9523   match(Set dst (LShiftL dst shift));
9524   effect(KILL cr);
9525 
9526   format %{ "salq    $dst, $shift" %}
9527   opcode(0xC1, 0x4); /* C1 /4 ib */
9528   ins_encode(reg_opc_imm_wide(dst, shift));
9529   ins_pipe(ialu_reg);
9530 %}
9531 
9532 // Shift Left by 8-bit immediate
9533 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9534 %{
9535   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9536   effect(KILL cr);
9537 
9538   format %{ "salq    $dst, $shift" %}
9539   opcode(0xC1, 0x4); /* C1 /4 ib */
9540   ins_encode(REX_mem_wide(dst), OpcP,
9541              RM_opc_mem(secondary, dst), Con8or32(shift));
9542   ins_pipe(ialu_mem_imm);
9543 %}
9544 
9545 // Shift Left by variable
9546 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9547 %{
9548   match(Set dst (LShiftL dst shift));
9549   effect(KILL cr);
9550 
9551   format %{ "salq    $dst, $shift" %}
9552   opcode(0xD3, 0x4); /* D3 /4 */
9553   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9554   ins_pipe(ialu_reg_reg);
9555 %}
9556 
9557 // Shift Left by variable
9558 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9559 %{
9560   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9561   effect(KILL cr);
9562 
9563   format %{ "salq    $dst, $shift" %}
9564   opcode(0xD3, 0x4); /* D3 /4 */
9565   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9566   ins_pipe(ialu_mem_reg);
9567 %}
9568 
9569 // Arithmetic shift right by one
9570 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9571 %{
9572   match(Set dst (RShiftL dst shift));
9573   effect(KILL cr);
9574 
9575   format %{ "sarq    $dst, $shift" %}
9576   opcode(0xD1, 0x7); /* D1 /7 */
9577   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9578   ins_pipe(ialu_reg);
9579 %}
9580 
9581 // Arithmetic shift right by one
9582 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9583 %{
9584   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9585   effect(KILL cr);
9586 
9587   format %{ "sarq    $dst, $shift" %}
9588   opcode(0xD1, 0x7); /* D1 /7 */
9589   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9590   ins_pipe(ialu_mem_imm);
9591 %}
9592 
9593 // Arithmetic Shift Right by 8-bit immediate
9594 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9595 %{
9596   match(Set dst (RShiftL dst shift));
9597   effect(KILL cr);
9598 
9599   format %{ "sarq    $dst, $shift" %}
9600   opcode(0xC1, 0x7); /* C1 /7 ib */
9601   ins_encode(reg_opc_imm_wide(dst, shift));
9602   ins_pipe(ialu_mem_imm);
9603 %}
9604 
9605 // Arithmetic Shift Right by 8-bit immediate
9606 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9607 %{
9608   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9609   effect(KILL cr);
9610 
9611   format %{ "sarq    $dst, $shift" %}
9612   opcode(0xC1, 0x7); /* C1 /7 ib */
9613   ins_encode(REX_mem_wide(dst), OpcP,
9614              RM_opc_mem(secondary, dst), Con8or32(shift));
9615   ins_pipe(ialu_mem_imm);
9616 %}
9617 
9618 // Arithmetic Shift Right by variable
9619 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9620 %{
9621   match(Set dst (RShiftL dst shift));
9622   effect(KILL cr);
9623 
9624   format %{ "sarq    $dst, $shift" %}
9625   opcode(0xD3, 0x7); /* D3 /7 */
9626   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9627   ins_pipe(ialu_reg_reg);
9628 %}
9629 
9630 // Arithmetic Shift Right by variable
9631 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9632 %{
9633   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9634   effect(KILL cr);
9635 
9636   format %{ "sarq    $dst, $shift" %}
9637   opcode(0xD3, 0x7); /* D3 /7 */
9638   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9639   ins_pipe(ialu_mem_reg);
9640 %}
9641 
9642 // Logical shift right by one
9643 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9644 %{
9645   match(Set dst (URShiftL dst shift));
9646   effect(KILL cr);
9647 
9648   format %{ "shrq    $dst, $shift" %}
9649   opcode(0xD1, 0x5); /* D1 /5 */
9650   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9651   ins_pipe(ialu_reg);
9652 %}
9653 
9654 // Logical shift right by one
9655 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9656 %{
9657   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9658   effect(KILL cr);
9659 
9660   format %{ "shrq    $dst, $shift" %}
9661   opcode(0xD1, 0x5); /* D1 /5 */
9662   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9663   ins_pipe(ialu_mem_imm);
9664 %}
9665 
9666 // Logical Shift Right by 8-bit immediate
9667 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9668 %{
9669   match(Set dst (URShiftL dst shift));
9670   effect(KILL cr);
9671 
9672   format %{ "shrq    $dst, $shift" %}
9673   opcode(0xC1, 0x5); /* C1 /5 ib */
9674   ins_encode(reg_opc_imm_wide(dst, shift));
9675   ins_pipe(ialu_reg);
9676 %}
9677 
9678 
9679 // Logical Shift Right by 8-bit immediate
9680 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9681 %{
9682   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9683   effect(KILL cr);
9684 
9685   format %{ "shrq    $dst, $shift" %}
9686   opcode(0xC1, 0x5); /* C1 /5 ib */
9687   ins_encode(REX_mem_wide(dst), OpcP,
9688              RM_opc_mem(secondary, dst), Con8or32(shift));
9689   ins_pipe(ialu_mem_imm);
9690 %}
9691 
9692 // Logical Shift Right by variable
9693 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9694 %{
9695   match(Set dst (URShiftL dst shift));
9696   effect(KILL cr);
9697 
9698   format %{ "shrq    $dst, $shift" %}
9699   opcode(0xD3, 0x5); /* D3 /5 */
9700   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9701   ins_pipe(ialu_reg_reg);
9702 %}
9703 
9704 // Logical Shift Right by variable
9705 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9706 %{
9707   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9708   effect(KILL cr);
9709 
9710   format %{ "shrq    $dst, $shift" %}
9711   opcode(0xD3, 0x5); /* D3 /5 */
9712   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9713   ins_pipe(ialu_mem_reg);
9714 %}
9715 
9716 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9717 // This idiom is used by the compiler for the i2b bytecode.
9718 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9719 %{
9720   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9721 
9722   format %{ "movsbl  $dst, $src\t# i2b" %}
9723   opcode(0x0F, 0xBE);
9724   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9725   ins_pipe(ialu_reg_reg);
9726 %}
9727 
9728 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9729 // This idiom is used by the compiler the i2s bytecode.
9730 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9731 %{
9732   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9733 
9734   format %{ "movswl  $dst, $src\t# i2s" %}
9735   opcode(0x0F, 0xBF);
9736   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9737   ins_pipe(ialu_reg_reg);
9738 %}
9739 
9740 // ROL/ROR instructions
9741 
9742 // ROL expand
9743 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9744   effect(KILL cr, USE_DEF dst);
9745 
9746   format %{ "roll    $dst" %}
9747   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9748   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9749   ins_pipe(ialu_reg);
9750 %}
9751 
9752 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9753   effect(USE_DEF dst, USE shift, KILL cr);
9754 
9755   format %{ "roll    $dst, $shift" %}
9756   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9757   ins_encode( reg_opc_imm(dst, shift) );
9758   ins_pipe(ialu_reg);
9759 %}
9760 
9761 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9762 %{
9763   effect(USE_DEF dst, USE shift, KILL cr);
9764 
9765   format %{ "roll    $dst, $shift" %}
9766   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9767   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9768   ins_pipe(ialu_reg_reg);
9769 %}
9770 // end of ROL expand
9771 
9772 // Rotate Left by one
9773 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9774 %{
9775   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9776 
9777   expand %{
9778     rolI_rReg_imm1(dst, cr);
9779   %}
9780 %}
9781 
9782 // Rotate Left by 8-bit immediate
9783 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9784 %{
9785   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9786   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9787 
9788   expand %{
9789     rolI_rReg_imm8(dst, lshift, cr);
9790   %}
9791 %}
9792 
9793 // Rotate Left by variable
9794 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9795 %{
9796   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9797 
9798   expand %{
9799     rolI_rReg_CL(dst, shift, cr);
9800   %}
9801 %}
9802 
9803 // Rotate Left by variable
9804 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9805 %{
9806   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9807 
9808   expand %{
9809     rolI_rReg_CL(dst, shift, cr);
9810   %}
9811 %}
9812 
9813 // ROR expand
9814 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9815 %{
9816   effect(USE_DEF dst, KILL cr);
9817 
9818   format %{ "rorl    $dst" %}
9819   opcode(0xD1, 0x1); /* D1 /1 */
9820   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9821   ins_pipe(ialu_reg);
9822 %}
9823 
9824 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9825 %{
9826   effect(USE_DEF dst, USE shift, KILL cr);
9827 
9828   format %{ "rorl    $dst, $shift" %}
9829   opcode(0xC1, 0x1); /* C1 /1 ib */
9830   ins_encode(reg_opc_imm(dst, shift));
9831   ins_pipe(ialu_reg);
9832 %}
9833 
9834 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9835 %{
9836   effect(USE_DEF dst, USE shift, KILL cr);
9837 
9838   format %{ "rorl    $dst, $shift" %}
9839   opcode(0xD3, 0x1); /* D3 /1 */
9840   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9841   ins_pipe(ialu_reg_reg);
9842 %}
9843 // end of ROR expand
9844 
9845 // Rotate Right by one
9846 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9847 %{
9848   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9849 
9850   expand %{
9851     rorI_rReg_imm1(dst, cr);
9852   %}
9853 %}
9854 
9855 // Rotate Right by 8-bit immediate
9856 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9857 %{
9858   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9859   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9860 
9861   expand %{
9862     rorI_rReg_imm8(dst, rshift, cr);
9863   %}
9864 %}
9865 
9866 // Rotate Right by variable
9867 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9868 %{
9869   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9870 
9871   expand %{
9872     rorI_rReg_CL(dst, shift, cr);
9873   %}
9874 %}
9875 
9876 // Rotate Right by variable
9877 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9878 %{
9879   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9880 
9881   expand %{
9882     rorI_rReg_CL(dst, shift, cr);
9883   %}
9884 %}
9885 
9886 // for long rotate
9887 // ROL expand
9888 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9889   effect(USE_DEF dst, KILL cr);
9890 
9891   format %{ "rolq    $dst" %}
9892   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9893   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9894   ins_pipe(ialu_reg);
9895 %}
9896 
9897 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9898   effect(USE_DEF dst, USE shift, KILL cr);
9899 
9900   format %{ "rolq    $dst, $shift" %}
9901   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9902   ins_encode( reg_opc_imm_wide(dst, shift) );
9903   ins_pipe(ialu_reg);
9904 %}
9905 
9906 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9907 %{
9908   effect(USE_DEF dst, USE shift, KILL cr);
9909 
9910   format %{ "rolq    $dst, $shift" %}
9911   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9912   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9913   ins_pipe(ialu_reg_reg);
9914 %}
9915 // end of ROL expand
9916 
9917 // Rotate Left by one
9918 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9919 %{
9920   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9921 
9922   expand %{
9923     rolL_rReg_imm1(dst, cr);
9924   %}
9925 %}
9926 
9927 // Rotate Left by 8-bit immediate
9928 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9929 %{
9930   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9931   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9932 
9933   expand %{
9934     rolL_rReg_imm8(dst, lshift, cr);
9935   %}
9936 %}
9937 
9938 // Rotate Left by variable
9939 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9940 %{
9941   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9942 
9943   expand %{
9944     rolL_rReg_CL(dst, shift, cr);
9945   %}
9946 %}
9947 
9948 // Rotate Left by variable
9949 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9950 %{
9951   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9952 
9953   expand %{
9954     rolL_rReg_CL(dst, shift, cr);
9955   %}
9956 %}
9957 
9958 // ROR expand
9959 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9960 %{
9961   effect(USE_DEF dst, KILL cr);
9962 
9963   format %{ "rorq    $dst" %}
9964   opcode(0xD1, 0x1); /* D1 /1 */
9965   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9966   ins_pipe(ialu_reg);
9967 %}
9968 
9969 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9970 %{
9971   effect(USE_DEF dst, USE shift, KILL cr);
9972 
9973   format %{ "rorq    $dst, $shift" %}
9974   opcode(0xC1, 0x1); /* C1 /1 ib */
9975   ins_encode(reg_opc_imm_wide(dst, shift));
9976   ins_pipe(ialu_reg);
9977 %}
9978 
9979 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9980 %{
9981   effect(USE_DEF dst, USE shift, KILL cr);
9982 
9983   format %{ "rorq    $dst, $shift" %}
9984   opcode(0xD3, 0x1); /* D3 /1 */
9985   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9986   ins_pipe(ialu_reg_reg);
9987 %}
9988 // end of ROR expand
9989 
9990 // Rotate Right by one
9991 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9992 %{
9993   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9994 
9995   expand %{
9996     rorL_rReg_imm1(dst, cr);
9997   %}
9998 %}
9999 
10000 // Rotate Right by 8-bit immediate
10001 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
10002 %{
10003   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
10004   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
10005 
10006   expand %{
10007     rorL_rReg_imm8(dst, rshift, cr);
10008   %}
10009 %}
10010 
10011 // Rotate Right by variable
10012 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
10013 %{
10014   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
10015 
10016   expand %{
10017     rorL_rReg_CL(dst, shift, cr);
10018   %}
10019 %}
10020 
10021 // Rotate Right by variable
10022 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
10023 %{
10024   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
10025 
10026   expand %{
10027     rorL_rReg_CL(dst, shift, cr);
10028   %}
10029 %}
10030 
10031 // Logical Instructions
10032 
10033 // Integer Logical Instructions
10034 
10035 // And Instructions
10036 // And Register with Register
10037 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10038 %{
10039   match(Set dst (AndI dst src));
10040   effect(KILL cr);
10041 
10042   format %{ "andl    $dst, $src\t# int" %}
10043   opcode(0x23);
10044   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10045   ins_pipe(ialu_reg_reg);
10046 %}
10047 
10048 // And Register with Immediate 255
10049 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
10050 %{
10051   match(Set dst (AndI dst src));
10052 
10053   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
10054   opcode(0x0F, 0xB6);
10055   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10056   ins_pipe(ialu_reg);
10057 %}
10058 
10059 // And Register with Immediate 255 and promote to long
10060 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
10061 %{
10062   match(Set dst (ConvI2L (AndI src mask)));
10063 
10064   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
10065   opcode(0x0F, 0xB6);
10066   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10067   ins_pipe(ialu_reg);
10068 %}
10069 
10070 // And Register with Immediate 65535
10071 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
10072 %{
10073   match(Set dst (AndI dst src));
10074 
10075   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
10076   opcode(0x0F, 0xB7);
10077   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10078   ins_pipe(ialu_reg);
10079 %}
10080 
10081 // And Register with Immediate 65535 and promote to long
10082 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
10083 %{
10084   match(Set dst (ConvI2L (AndI src mask)));
10085 
10086   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
10087   opcode(0x0F, 0xB7);
10088   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10089   ins_pipe(ialu_reg);
10090 %}
10091 
10092 // And Register with Immediate
10093 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10094 %{
10095   match(Set dst (AndI dst src));
10096   effect(KILL cr);
10097 
10098   format %{ "andl    $dst, $src\t# int" %}
10099   opcode(0x81, 0x04); /* Opcode 81 /4 */
10100   ins_encode(OpcSErm(dst, src), Con8or32(src));
10101   ins_pipe(ialu_reg);
10102 %}
10103 
10104 // And Register with Memory
10105 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10106 %{
10107   match(Set dst (AndI dst (LoadI src)));
10108   effect(KILL cr);
10109 
10110   ins_cost(125);
10111   format %{ "andl    $dst, $src\t# int" %}
10112   opcode(0x23);
10113   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10114   ins_pipe(ialu_reg_mem);
10115 %}
10116 
10117 // And Memory with Register
10118 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10119 %{
10120   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10121   effect(KILL cr);
10122 
10123   ins_cost(150);
10124   format %{ "andl    $dst, $src\t# int" %}
10125   opcode(0x21); /* Opcode 21 /r */
10126   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10127   ins_pipe(ialu_mem_reg);
10128 %}
10129 
10130 // And Memory with Immediate
10131 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
10132 %{
10133   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10134   effect(KILL cr);
10135 
10136   ins_cost(125);
10137   format %{ "andl    $dst, $src\t# int" %}
10138   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10139   ins_encode(REX_mem(dst), OpcSE(src),
10140              RM_opc_mem(secondary, dst), Con8or32(src));
10141   ins_pipe(ialu_mem_imm);
10142 %}
10143 
10144 // Or Instructions
10145 // Or Register with Register
10146 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10147 %{
10148   match(Set dst (OrI dst src));
10149   effect(KILL cr);
10150 
10151   format %{ "orl     $dst, $src\t# int" %}
10152   opcode(0x0B);
10153   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10154   ins_pipe(ialu_reg_reg);
10155 %}
10156 
10157 // Or Register with Immediate
10158 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10159 %{
10160   match(Set dst (OrI dst src));
10161   effect(KILL cr);
10162 
10163   format %{ "orl     $dst, $src\t# int" %}
10164   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10165   ins_encode(OpcSErm(dst, src), Con8or32(src));
10166   ins_pipe(ialu_reg);
10167 %}
10168 
10169 // Or Register with Memory
10170 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10171 %{
10172   match(Set dst (OrI dst (LoadI src)));
10173   effect(KILL cr);
10174 
10175   ins_cost(125);
10176   format %{ "orl     $dst, $src\t# int" %}
10177   opcode(0x0B);
10178   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10179   ins_pipe(ialu_reg_mem);
10180 %}
10181 
10182 // Or Memory with Register
10183 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10184 %{
10185   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10186   effect(KILL cr);
10187 
10188   ins_cost(150);
10189   format %{ "orl     $dst, $src\t# int" %}
10190   opcode(0x09); /* Opcode 09 /r */
10191   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10192   ins_pipe(ialu_mem_reg);
10193 %}
10194 
10195 // Or Memory with Immediate
10196 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10197 %{
10198   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10199   effect(KILL cr);
10200 
10201   ins_cost(125);
10202   format %{ "orl     $dst, $src\t# int" %}
10203   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10204   ins_encode(REX_mem(dst), OpcSE(src),
10205              RM_opc_mem(secondary, dst), Con8or32(src));
10206   ins_pipe(ialu_mem_imm);
10207 %}
10208 
10209 // Xor Instructions
10210 // Xor Register with Register
10211 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10212 %{
10213   match(Set dst (XorI dst src));
10214   effect(KILL cr);
10215 
10216   format %{ "xorl    $dst, $src\t# int" %}
10217   opcode(0x33);
10218   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10219   ins_pipe(ialu_reg_reg);
10220 %}
10221 
10222 // Xor Register with Immediate -1
10223 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10224   match(Set dst (XorI dst imm));  
10225 
10226   format %{ "not    $dst" %}  
10227   ins_encode %{
10228      __ notl($dst$$Register);
10229   %}
10230   ins_pipe(ialu_reg);
10231 %}
10232 
10233 // Xor Register with Immediate
10234 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10235 %{
10236   match(Set dst (XorI dst src));
10237   effect(KILL cr);
10238 
10239   format %{ "xorl    $dst, $src\t# int" %}
10240   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10241   ins_encode(OpcSErm(dst, src), Con8or32(src));
10242   ins_pipe(ialu_reg);
10243 %}
10244 
10245 // Xor Register with Memory
10246 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10247 %{
10248   match(Set dst (XorI dst (LoadI src)));
10249   effect(KILL cr);
10250 
10251   ins_cost(125);
10252   format %{ "xorl    $dst, $src\t# int" %}
10253   opcode(0x33);
10254   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10255   ins_pipe(ialu_reg_mem);
10256 %}
10257 
10258 // Xor Memory with Register
10259 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10260 %{
10261   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10262   effect(KILL cr);
10263 
10264   ins_cost(150);
10265   format %{ "xorl    $dst, $src\t# int" %}
10266   opcode(0x31); /* Opcode 31 /r */
10267   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10268   ins_pipe(ialu_mem_reg);
10269 %}
10270 
10271 // Xor Memory with Immediate
10272 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10273 %{
10274   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10275   effect(KILL cr);
10276 
10277   ins_cost(125);
10278   format %{ "xorl    $dst, $src\t# int" %}
10279   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10280   ins_encode(REX_mem(dst), OpcSE(src),
10281              RM_opc_mem(secondary, dst), Con8or32(src));
10282   ins_pipe(ialu_mem_imm);
10283 %}
10284 
10285 
10286 // Long Logical Instructions
10287 
10288 // And Instructions
10289 // And Register with Register
10290 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10291 %{
10292   match(Set dst (AndL dst src));
10293   effect(KILL cr);
10294 
10295   format %{ "andq    $dst, $src\t# long" %}
10296   opcode(0x23);
10297   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10298   ins_pipe(ialu_reg_reg);
10299 %}
10300 
10301 // And Register with Immediate 255
10302 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10303 %{
10304   match(Set dst (AndL dst src));
10305 
10306   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10307   opcode(0x0F, 0xB6);
10308   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10309   ins_pipe(ialu_reg);
10310 %}
10311 
10312 // And Register with Immediate 65535
10313 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10314 %{
10315   match(Set dst (AndL dst src));
10316 
10317   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10318   opcode(0x0F, 0xB7);
10319   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10320   ins_pipe(ialu_reg);
10321 %}
10322 
10323 // And Register with Immediate
10324 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10325 %{
10326   match(Set dst (AndL dst src));
10327   effect(KILL cr);
10328 
10329   format %{ "andq    $dst, $src\t# long" %}
10330   opcode(0x81, 0x04); /* Opcode 81 /4 */
10331   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10332   ins_pipe(ialu_reg);
10333 %}
10334 
10335 // And Register with Memory
10336 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10337 %{
10338   match(Set dst (AndL dst (LoadL src)));
10339   effect(KILL cr);
10340 
10341   ins_cost(125);
10342   format %{ "andq    $dst, $src\t# long" %}
10343   opcode(0x23);
10344   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10345   ins_pipe(ialu_reg_mem);
10346 %}
10347 
10348 // And Memory with Register
10349 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10350 %{
10351   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10352   effect(KILL cr);
10353 
10354   ins_cost(150);
10355   format %{ "andq    $dst, $src\t# long" %}
10356   opcode(0x21); /* Opcode 21 /r */
10357   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10358   ins_pipe(ialu_mem_reg);
10359 %}
10360 
10361 // And Memory with Immediate
10362 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10363 %{
10364   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10365   effect(KILL cr);
10366 
10367   ins_cost(125);
10368   format %{ "andq    $dst, $src\t# long" %}
10369   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10370   ins_encode(REX_mem_wide(dst), OpcSE(src),
10371              RM_opc_mem(secondary, dst), Con8or32(src));
10372   ins_pipe(ialu_mem_imm);
10373 %}
10374 
10375 // Or Instructions
10376 // Or Register with Register
10377 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10378 %{
10379   match(Set dst (OrL dst src));
10380   effect(KILL cr);
10381 
10382   format %{ "orq     $dst, $src\t# long" %}
10383   opcode(0x0B);
10384   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10385   ins_pipe(ialu_reg_reg);
10386 %}
10387 
10388 // Use any_RegP to match R15 (TLS register) without spilling.
10389 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10390   match(Set dst (OrL dst (CastP2X src)));
10391   effect(KILL cr);
10392 
10393   format %{ "orq     $dst, $src\t# long" %}
10394   opcode(0x0B);
10395   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10396   ins_pipe(ialu_reg_reg);
10397 %}
10398 
10399 
10400 // Or Register with Immediate
10401 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10402 %{
10403   match(Set dst (OrL dst src));
10404   effect(KILL cr);
10405 
10406   format %{ "orq     $dst, $src\t# long" %}
10407   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10408   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10409   ins_pipe(ialu_reg);
10410 %}
10411 
10412 // Or Register with Memory
10413 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10414 %{
10415   match(Set dst (OrL dst (LoadL src)));
10416   effect(KILL cr);
10417 
10418   ins_cost(125);
10419   format %{ "orq     $dst, $src\t# long" %}
10420   opcode(0x0B);
10421   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10422   ins_pipe(ialu_reg_mem);
10423 %}
10424 
10425 // Or Memory with Register
10426 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10427 %{
10428   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10429   effect(KILL cr);
10430 
10431   ins_cost(150);
10432   format %{ "orq     $dst, $src\t# long" %}
10433   opcode(0x09); /* Opcode 09 /r */
10434   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10435   ins_pipe(ialu_mem_reg);
10436 %}
10437 
10438 // Or Memory with Immediate
10439 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10440 %{
10441   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10442   effect(KILL cr);
10443 
10444   ins_cost(125);
10445   format %{ "orq     $dst, $src\t# long" %}
10446   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10447   ins_encode(REX_mem_wide(dst), OpcSE(src),
10448              RM_opc_mem(secondary, dst), Con8or32(src));
10449   ins_pipe(ialu_mem_imm);
10450 %}
10451 
10452 // Xor Instructions
10453 // Xor Register with Register
10454 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10455 %{
10456   match(Set dst (XorL dst src));
10457   effect(KILL cr);
10458 
10459   format %{ "xorq    $dst, $src\t# long" %}
10460   opcode(0x33);
10461   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10462   ins_pipe(ialu_reg_reg);
10463 %}
10464 
10465 // Xor Register with Immediate -1
10466 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10467   match(Set dst (XorL dst imm));  
10468 
10469   format %{ "notq   $dst" %}  
10470   ins_encode %{
10471      __ notq($dst$$Register);
10472   %}
10473   ins_pipe(ialu_reg);
10474 %}
10475 
10476 // Xor Register with Immediate
10477 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10478 %{
10479   match(Set dst (XorL dst src));
10480   effect(KILL cr);
10481 
10482   format %{ "xorq    $dst, $src\t# long" %}
10483   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10484   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10485   ins_pipe(ialu_reg);
10486 %}
10487 
10488 // Xor Register with Memory
10489 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10490 %{
10491   match(Set dst (XorL dst (LoadL src)));
10492   effect(KILL cr);
10493 
10494   ins_cost(125);
10495   format %{ "xorq    $dst, $src\t# long" %}
10496   opcode(0x33);
10497   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10498   ins_pipe(ialu_reg_mem);
10499 %}
10500 
10501 // Xor Memory with Register
10502 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10503 %{
10504   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10505   effect(KILL cr);
10506 
10507   ins_cost(150);
10508   format %{ "xorq    $dst, $src\t# long" %}
10509   opcode(0x31); /* Opcode 31 /r */
10510   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10511   ins_pipe(ialu_mem_reg);
10512 %}
10513 
10514 // Xor Memory with Immediate
10515 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10516 %{
10517   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10518   effect(KILL cr);
10519 
10520   ins_cost(125);
10521   format %{ "xorq    $dst, $src\t# long" %}
10522   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10523   ins_encode(REX_mem_wide(dst), OpcSE(src),
10524              RM_opc_mem(secondary, dst), Con8or32(src));
10525   ins_pipe(ialu_mem_imm);
10526 %}
10527 
10528 // Convert Int to Boolean
10529 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10530 %{
10531   match(Set dst (Conv2B src));
10532   effect(KILL cr);
10533 
10534   format %{ "testl   $src, $src\t# ci2b\n\t"
10535             "setnz   $dst\n\t"
10536             "movzbl  $dst, $dst" %}
10537   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10538              setNZ_reg(dst),
10539              REX_reg_breg(dst, dst), // movzbl
10540              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10541   ins_pipe(pipe_slow); // XXX
10542 %}
10543 
10544 // Convert Pointer to Boolean
10545 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10546 %{
10547   match(Set dst (Conv2B src));
10548   effect(KILL cr);
10549 
10550   format %{ "testq   $src, $src\t# cp2b\n\t"
10551             "setnz   $dst\n\t"
10552             "movzbl  $dst, $dst" %}
10553   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10554              setNZ_reg(dst),
10555              REX_reg_breg(dst, dst), // movzbl
10556              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10557   ins_pipe(pipe_slow); // XXX
10558 %}
10559 
10560 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10561 %{
10562   match(Set dst (CmpLTMask p q));
10563   effect(KILL cr);
10564 
10565   ins_cost(400); // XXX
10566   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10567             "setlt   $dst\n\t"
10568             "movzbl  $dst, $dst\n\t"
10569             "negl    $dst" %}
10570   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10571              setLT_reg(dst),
10572              REX_reg_breg(dst, dst), // movzbl
10573              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10574              neg_reg(dst));
10575   ins_pipe(pipe_slow);
10576 %}
10577 
10578 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10579 %{
10580   match(Set dst (CmpLTMask dst zero));
10581   effect(KILL cr);
10582 
10583   ins_cost(100); // XXX
10584   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10585   opcode(0xC1, 0x7);  /* C1 /7 ib */
10586   ins_encode(reg_opc_imm(dst, 0x1F));
10587   ins_pipe(ialu_reg);
10588 %}
10589 
10590 
10591 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10592                          rRegI tmp,
10593                          rFlagsReg cr)
10594 %{
10595   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10596   effect(TEMP tmp, KILL cr);
10597 
10598   ins_cost(400); // XXX
10599   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10600             "sbbl    $tmp, $tmp\n\t"
10601             "andl    $tmp, $y\n\t"
10602             "addl    $p, $tmp" %}
10603   ins_encode(enc_cmpLTP(p, q, y, tmp));
10604   ins_pipe(pipe_cmplt);
10605 %}
10606 
10607 /* If I enable this, I encourage spilling in the inner loop of compress.
10608 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10609 %{
10610   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10611   effect( TEMP tmp, KILL cr );
10612   ins_cost(400);
10613 
10614   format %{ "SUB    $p,$q\n\t"
10615             "SBB    RCX,RCX\n\t"
10616             "AND    RCX,$y\n\t"
10617             "ADD    $p,RCX" %}
10618   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10619 %}
10620 */
10621 
10622 //---------- FP Instructions------------------------------------------------
10623 
10624 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10625 %{
10626   match(Set cr (CmpF src1 src2));
10627 
10628   ins_cost(145);
10629   format %{ "ucomiss $src1, $src2\n\t"
10630             "jnp,s   exit\n\t"
10631             "pushfq\t# saw NaN, set CF\n\t"
10632             "andq    [rsp], #0xffffff2b\n\t"
10633             "popfq\n"
10634     "exit:   nop\t# avoid branch to branch" %}
10635   opcode(0x0F, 0x2E);
10636   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10637              cmpfp_fixup);
10638   ins_pipe(pipe_slow);
10639 %}
10640 
10641 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10642   match(Set cr (CmpF src1 src2));
10643 
10644   ins_cost(145);
10645   format %{ "ucomiss $src1, $src2" %}
10646   ins_encode %{
10647     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10648   %}
10649   ins_pipe(pipe_slow);
10650 %}
10651 
10652 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10653 %{
10654   match(Set cr (CmpF src1 (LoadF src2)));
10655 
10656   ins_cost(145);
10657   format %{ "ucomiss $src1, $src2\n\t"
10658             "jnp,s   exit\n\t"
10659             "pushfq\t# saw NaN, set CF\n\t"
10660             "andq    [rsp], #0xffffff2b\n\t"
10661             "popfq\n"
10662     "exit:   nop\t# avoid branch to branch" %}
10663   opcode(0x0F, 0x2E);
10664   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10665              cmpfp_fixup);
10666   ins_pipe(pipe_slow);
10667 %}
10668 
10669 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10670   match(Set cr (CmpF src1 (LoadF src2)));
10671 
10672   ins_cost(100);
10673   format %{ "ucomiss $src1, $src2" %}
10674   opcode(0x0F, 0x2E);
10675   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10676   ins_pipe(pipe_slow);
10677 %}
10678 
10679 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10680 %{
10681   match(Set cr (CmpF src1 src2));
10682 
10683   ins_cost(145);
10684   format %{ "ucomiss $src1, $src2\n\t"
10685             "jnp,s   exit\n\t"
10686             "pushfq\t# saw NaN, set CF\n\t"
10687             "andq    [rsp], #0xffffff2b\n\t"
10688             "popfq\n"
10689     "exit:   nop\t# avoid branch to branch" %}
10690   opcode(0x0F, 0x2E);
10691   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10692              cmpfp_fixup);
10693   ins_pipe(pipe_slow);
10694 %}
10695 
10696 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10697   match(Set cr (CmpF src1 src2));
10698 
10699   ins_cost(100);
10700   format %{ "ucomiss $src1, $src2" %}
10701   opcode(0x0F, 0x2E);
10702   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10703   ins_pipe(pipe_slow);
10704 %}
10705 
10706 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10707 %{
10708   match(Set cr (CmpD src1 src2));
10709 
10710   ins_cost(145);
10711   format %{ "ucomisd $src1, $src2\n\t"
10712             "jnp,s   exit\n\t"
10713             "pushfq\t# saw NaN, set CF\n\t"
10714             "andq    [rsp], #0xffffff2b\n\t"
10715             "popfq\n"
10716     "exit:   nop\t# avoid branch to branch" %}
10717   opcode(0x66, 0x0F, 0x2E);
10718   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10719              cmpfp_fixup);
10720   ins_pipe(pipe_slow);
10721 %}
10722 
10723 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10724   match(Set cr (CmpD src1 src2));
10725 
10726   ins_cost(100);
10727   format %{ "ucomisd $src1, $src2 test" %}
10728   ins_encode %{
10729     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10730   %}
10731   ins_pipe(pipe_slow);
10732 %}
10733 
10734 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10735 %{
10736   match(Set cr (CmpD src1 (LoadD src2)));
10737 
10738   ins_cost(145);
10739   format %{ "ucomisd $src1, $src2\n\t"
10740             "jnp,s   exit\n\t"
10741             "pushfq\t# saw NaN, set CF\n\t"
10742             "andq    [rsp], #0xffffff2b\n\t"
10743             "popfq\n"
10744     "exit:   nop\t# avoid branch to branch" %}
10745   opcode(0x66, 0x0F, 0x2E);
10746   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10747              cmpfp_fixup);
10748   ins_pipe(pipe_slow);
10749 %}
10750 
10751 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10752   match(Set cr (CmpD src1 (LoadD src2)));
10753 
10754   ins_cost(100);
10755   format %{ "ucomisd $src1, $src2" %}
10756   opcode(0x66, 0x0F, 0x2E);
10757   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10758   ins_pipe(pipe_slow);
10759 %}
10760 
10761 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10762 %{
10763   match(Set cr (CmpD src1 src2));
10764 
10765   ins_cost(145);
10766   format %{ "ucomisd $src1, [$src2]\n\t"
10767             "jnp,s   exit\n\t"
10768             "pushfq\t# saw NaN, set CF\n\t"
10769             "andq    [rsp], #0xffffff2b\n\t"
10770             "popfq\n"
10771     "exit:   nop\t# avoid branch to branch" %}
10772   opcode(0x66, 0x0F, 0x2E);
10773   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10774              cmpfp_fixup);
10775   ins_pipe(pipe_slow);
10776 %}
10777 
10778 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10779   match(Set cr (CmpD src1 src2));
10780 
10781   ins_cost(100);
10782   format %{ "ucomisd $src1, [$src2]" %}
10783   opcode(0x66, 0x0F, 0x2E);
10784   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10785   ins_pipe(pipe_slow);
10786 %}
10787 
10788 // Compare into -1,0,1
10789 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10790 %{
10791   match(Set dst (CmpF3 src1 src2));
10792   effect(KILL cr);
10793 
10794   ins_cost(275);
10795   format %{ "ucomiss $src1, $src2\n\t"
10796             "movl    $dst, #-1\n\t"
10797             "jp,s    done\n\t"
10798             "jb,s    done\n\t"
10799             "setne   $dst\n\t"
10800             "movzbl  $dst, $dst\n"
10801     "done:" %}
10802 
10803   opcode(0x0F, 0x2E);
10804   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10805              cmpfp3(dst));
10806   ins_pipe(pipe_slow);
10807 %}
10808 
10809 // Compare into -1,0,1
10810 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10811 %{
10812   match(Set dst (CmpF3 src1 (LoadF src2)));
10813   effect(KILL cr);
10814 
10815   ins_cost(275);
10816   format %{ "ucomiss $src1, $src2\n\t"
10817             "movl    $dst, #-1\n\t"
10818             "jp,s    done\n\t"
10819             "jb,s    done\n\t"
10820             "setne   $dst\n\t"
10821             "movzbl  $dst, $dst\n"
10822     "done:" %}
10823 
10824   opcode(0x0F, 0x2E);
10825   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10826              cmpfp3(dst));
10827   ins_pipe(pipe_slow);
10828 %}
10829 
10830 // Compare into -1,0,1
10831 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10832 %{
10833   match(Set dst (CmpF3 src1 src2));
10834   effect(KILL cr);
10835 
10836   ins_cost(275);
10837   format %{ "ucomiss $src1, [$src2]\n\t"
10838             "movl    $dst, #-1\n\t"
10839             "jp,s    done\n\t"
10840             "jb,s    done\n\t"
10841             "setne   $dst\n\t"
10842             "movzbl  $dst, $dst\n"
10843     "done:" %}
10844 
10845   opcode(0x0F, 0x2E);
10846   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10847              cmpfp3(dst));
10848   ins_pipe(pipe_slow);
10849 %}
10850 
10851 // Compare into -1,0,1
10852 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10853 %{
10854   match(Set dst (CmpD3 src1 src2));
10855   effect(KILL cr);
10856 
10857   ins_cost(275);
10858   format %{ "ucomisd $src1, $src2\n\t"
10859             "movl    $dst, #-1\n\t"
10860             "jp,s    done\n\t"
10861             "jb,s    done\n\t"
10862             "setne   $dst\n\t"
10863             "movzbl  $dst, $dst\n"
10864     "done:" %}
10865 
10866   opcode(0x66, 0x0F, 0x2E);
10867   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10868              cmpfp3(dst));
10869   ins_pipe(pipe_slow);
10870 %}
10871 
10872 // Compare into -1,0,1
10873 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10874 %{
10875   match(Set dst (CmpD3 src1 (LoadD src2)));
10876   effect(KILL cr);
10877 
10878   ins_cost(275);
10879   format %{ "ucomisd $src1, $src2\n\t"
10880             "movl    $dst, #-1\n\t"
10881             "jp,s    done\n\t"
10882             "jb,s    done\n\t"
10883             "setne   $dst\n\t"
10884             "movzbl  $dst, $dst\n"
10885     "done:" %}
10886 
10887   opcode(0x66, 0x0F, 0x2E);
10888   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10889              cmpfp3(dst));
10890   ins_pipe(pipe_slow);
10891 %}
10892 
10893 // Compare into -1,0,1
10894 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10895 %{
10896   match(Set dst (CmpD3 src1 src2));
10897   effect(KILL cr);
10898 
10899   ins_cost(275);
10900   format %{ "ucomisd $src1, [$src2]\n\t"
10901             "movl    $dst, #-1\n\t"
10902             "jp,s    done\n\t"
10903             "jb,s    done\n\t"
10904             "setne   $dst\n\t"
10905             "movzbl  $dst, $dst\n"
10906     "done:" %}
10907 
10908   opcode(0x66, 0x0F, 0x2E);
10909   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10910              cmpfp3(dst));
10911   ins_pipe(pipe_slow);
10912 %}
10913 
10914 instruct addF_reg(regF dst, regF src)
10915 %{
10916   match(Set dst (AddF dst src));
10917 
10918   format %{ "addss   $dst, $src" %}
10919   ins_cost(150); // XXX
10920   opcode(0xF3, 0x0F, 0x58);
10921   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10922   ins_pipe(pipe_slow);
10923 %}
10924 
10925 instruct addF_mem(regF dst, memory src)
10926 %{
10927   match(Set dst (AddF dst (LoadF src)));
10928 
10929   format %{ "addss   $dst, $src" %}
10930   ins_cost(150); // XXX
10931   opcode(0xF3, 0x0F, 0x58);
10932   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10933   ins_pipe(pipe_slow);
10934 %}
10935 
10936 instruct addF_imm(regF dst, immF src)
10937 %{
10938   match(Set dst (AddF dst src));
10939 
10940   format %{ "addss   $dst, [$src]" %}
10941   ins_cost(150); // XXX
10942   opcode(0xF3, 0x0F, 0x58);
10943   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10944   ins_pipe(pipe_slow);
10945 %}
10946 
10947 instruct addD_reg(regD dst, regD src)
10948 %{
10949   match(Set dst (AddD dst src));
10950 
10951   format %{ "addsd   $dst, $src" %}
10952   ins_cost(150); // XXX
10953   opcode(0xF2, 0x0F, 0x58);
10954   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10955   ins_pipe(pipe_slow);
10956 %}
10957 
10958 instruct addD_mem(regD dst, memory src)
10959 %{
10960   match(Set dst (AddD dst (LoadD src)));
10961 
10962   format %{ "addsd   $dst, $src" %}
10963   ins_cost(150); // XXX
10964   opcode(0xF2, 0x0F, 0x58);
10965   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10966   ins_pipe(pipe_slow);
10967 %}
10968 
10969 instruct addD_imm(regD dst, immD src)
10970 %{
10971   match(Set dst (AddD dst src));
10972 
10973   format %{ "addsd   $dst, [$src]" %}
10974   ins_cost(150); // XXX
10975   opcode(0xF2, 0x0F, 0x58);
10976   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10977   ins_pipe(pipe_slow);
10978 %}
10979 
10980 instruct subF_reg(regF dst, regF src)
10981 %{
10982   match(Set dst (SubF dst src));
10983 
10984   format %{ "subss   $dst, $src" %}
10985   ins_cost(150); // XXX
10986   opcode(0xF3, 0x0F, 0x5C);
10987   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10988   ins_pipe(pipe_slow);
10989 %}
10990 
10991 instruct subF_mem(regF dst, memory src)
10992 %{
10993   match(Set dst (SubF dst (LoadF src)));
10994 
10995   format %{ "subss   $dst, $src" %}
10996   ins_cost(150); // XXX
10997   opcode(0xF3, 0x0F, 0x5C);
10998   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10999   ins_pipe(pipe_slow);
11000 %}
11001 
11002 instruct subF_imm(regF dst, immF src)
11003 %{
11004   match(Set dst (SubF dst src));
11005 
11006   format %{ "subss   $dst, [$src]" %}
11007   ins_cost(150); // XXX
11008   opcode(0xF3, 0x0F, 0x5C);
11009   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11010   ins_pipe(pipe_slow);
11011 %}
11012 
11013 instruct subD_reg(regD dst, regD src)
11014 %{
11015   match(Set dst (SubD dst src));
11016 
11017   format %{ "subsd   $dst, $src" %}
11018   ins_cost(150); // XXX
11019   opcode(0xF2, 0x0F, 0x5C);
11020   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11021   ins_pipe(pipe_slow);
11022 %}
11023 
11024 instruct subD_mem(regD dst, memory src)
11025 %{
11026   match(Set dst (SubD dst (LoadD src)));
11027 
11028   format %{ "subsd   $dst, $src" %}
11029   ins_cost(150); // XXX
11030   opcode(0xF2, 0x0F, 0x5C);
11031   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11032   ins_pipe(pipe_slow);
11033 %}
11034 
11035 instruct subD_imm(regD dst, immD src)
11036 %{
11037   match(Set dst (SubD dst src));
11038 
11039   format %{ "subsd   $dst, [$src]" %}
11040   ins_cost(150); // XXX
11041   opcode(0xF2, 0x0F, 0x5C);
11042   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11043   ins_pipe(pipe_slow);
11044 %}
11045 
11046 instruct mulF_reg(regF dst, regF src)
11047 %{
11048   match(Set dst (MulF dst src));
11049 
11050   format %{ "mulss   $dst, $src" %}
11051   ins_cost(150); // XXX
11052   opcode(0xF3, 0x0F, 0x59);
11053   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11054   ins_pipe(pipe_slow);
11055 %}
11056 
11057 instruct mulF_mem(regF dst, memory src)
11058 %{
11059   match(Set dst (MulF dst (LoadF src)));
11060 
11061   format %{ "mulss   $dst, $src" %}
11062   ins_cost(150); // XXX
11063   opcode(0xF3, 0x0F, 0x59);
11064   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11065   ins_pipe(pipe_slow);
11066 %}
11067 
11068 instruct mulF_imm(regF dst, immF src)
11069 %{
11070   match(Set dst (MulF dst src));
11071 
11072   format %{ "mulss   $dst, [$src]" %}
11073   ins_cost(150); // XXX
11074   opcode(0xF3, 0x0F, 0x59);
11075   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11076   ins_pipe(pipe_slow);
11077 %}
11078 
11079 instruct mulD_reg(regD dst, regD src)
11080 %{
11081   match(Set dst (MulD dst src));
11082 
11083   format %{ "mulsd   $dst, $src" %}
11084   ins_cost(150); // XXX
11085   opcode(0xF2, 0x0F, 0x59);
11086   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11087   ins_pipe(pipe_slow);
11088 %}
11089 
11090 instruct mulD_mem(regD dst, memory src)
11091 %{
11092   match(Set dst (MulD dst (LoadD src)));
11093 
11094   format %{ "mulsd   $dst, $src" %}
11095   ins_cost(150); // XXX
11096   opcode(0xF2, 0x0F, 0x59);
11097   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11098   ins_pipe(pipe_slow);
11099 %}
11100 
11101 instruct mulD_imm(regD dst, immD src)
11102 %{
11103   match(Set dst (MulD dst src));
11104 
11105   format %{ "mulsd   $dst, [$src]" %}
11106   ins_cost(150); // XXX
11107   opcode(0xF2, 0x0F, 0x59);
11108   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11109   ins_pipe(pipe_slow);
11110 %}
11111 
11112 instruct divF_reg(regF dst, regF src)
11113 %{
11114   match(Set dst (DivF dst src));
11115 
11116   format %{ "divss   $dst, $src" %}
11117   ins_cost(150); // XXX
11118   opcode(0xF3, 0x0F, 0x5E);
11119   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11120   ins_pipe(pipe_slow);
11121 %}
11122 
11123 instruct divF_mem(regF dst, memory src)
11124 %{
11125   match(Set dst (DivF dst (LoadF src)));
11126 
11127   format %{ "divss   $dst, $src" %}
11128   ins_cost(150); // XXX
11129   opcode(0xF3, 0x0F, 0x5E);
11130   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11131   ins_pipe(pipe_slow);
11132 %}
11133 
11134 instruct divF_imm(regF dst, immF src)
11135 %{
11136   match(Set dst (DivF dst src));
11137 
11138   format %{ "divss   $dst, [$src]" %}
11139   ins_cost(150); // XXX
11140   opcode(0xF3, 0x0F, 0x5E);
11141   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11142   ins_pipe(pipe_slow);
11143 %}
11144 
11145 instruct divD_reg(regD dst, regD src)
11146 %{
11147   match(Set dst (DivD dst src));
11148 
11149   format %{ "divsd   $dst, $src" %}
11150   ins_cost(150); // XXX
11151   opcode(0xF2, 0x0F, 0x5E);
11152   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11153   ins_pipe(pipe_slow);
11154 %}
11155 
11156 instruct divD_mem(regD dst, memory src)
11157 %{
11158   match(Set dst (DivD dst (LoadD src)));
11159 
11160   format %{ "divsd   $dst, $src" %}
11161   ins_cost(150); // XXX
11162   opcode(0xF2, 0x0F, 0x5E);
11163   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11164   ins_pipe(pipe_slow);
11165 %}
11166 
11167 instruct divD_imm(regD dst, immD src)
11168 %{
11169   match(Set dst (DivD dst src));
11170 
11171   format %{ "divsd   $dst, [$src]" %}
11172   ins_cost(150); // XXX
11173   opcode(0xF2, 0x0F, 0x5E);
11174   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11175   ins_pipe(pipe_slow);
11176 %}
11177 
11178 instruct sqrtF_reg(regF dst, regF src)
11179 %{
11180   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11181 
11182   format %{ "sqrtss  $dst, $src" %}
11183   ins_cost(150); // XXX
11184   opcode(0xF3, 0x0F, 0x51);
11185   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11186   ins_pipe(pipe_slow);
11187 %}
11188 
11189 instruct sqrtF_mem(regF dst, memory src)
11190 %{
11191   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
11192 
11193   format %{ "sqrtss  $dst, $src" %}
11194   ins_cost(150); // XXX
11195   opcode(0xF3, 0x0F, 0x51);
11196   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11197   ins_pipe(pipe_slow);
11198 %}
11199 
11200 instruct sqrtF_imm(regF dst, immF src)
11201 %{
11202   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11203 
11204   format %{ "sqrtss  $dst, [$src]" %}
11205   ins_cost(150); // XXX
11206   opcode(0xF3, 0x0F, 0x51);
11207   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11208   ins_pipe(pipe_slow);
11209 %}
11210 
11211 instruct sqrtD_reg(regD dst, regD src)
11212 %{
11213   match(Set dst (SqrtD src));
11214 
11215   format %{ "sqrtsd  $dst, $src" %}
11216   ins_cost(150); // XXX
11217   opcode(0xF2, 0x0F, 0x51);
11218   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11219   ins_pipe(pipe_slow);
11220 %}
11221 
11222 instruct sqrtD_mem(regD dst, memory src)
11223 %{
11224   match(Set dst (SqrtD (LoadD src)));
11225 
11226   format %{ "sqrtsd  $dst, $src" %}
11227   ins_cost(150); // XXX
11228   opcode(0xF2, 0x0F, 0x51);
11229   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11230   ins_pipe(pipe_slow);
11231 %}
11232 
11233 instruct sqrtD_imm(regD dst, immD src)
11234 %{
11235   match(Set dst (SqrtD src));
11236 
11237   format %{ "sqrtsd  $dst, [$src]" %}
11238   ins_cost(150); // XXX
11239   opcode(0xF2, 0x0F, 0x51);
11240   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11241   ins_pipe(pipe_slow);
11242 %}
11243 
11244 instruct absF_reg(regF dst)
11245 %{
11246   match(Set dst (AbsF dst));
11247 
11248   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
11249   ins_encode(absF_encoding(dst));
11250   ins_pipe(pipe_slow);
11251 %}
11252 
11253 instruct absD_reg(regD dst)
11254 %{
11255   match(Set dst (AbsD dst));
11256 
11257   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
11258             "# abs double by sign masking" %}
11259   ins_encode(absD_encoding(dst));
11260   ins_pipe(pipe_slow);
11261 %}
11262 
11263 instruct negF_reg(regF dst)
11264 %{
11265   match(Set dst (NegF dst));
11266 
11267   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
11268   ins_encode(negF_encoding(dst));
11269   ins_pipe(pipe_slow);
11270 %}
11271 
11272 instruct negD_reg(regD dst)
11273 %{
11274   match(Set dst (NegD dst));
11275 
11276   format %{ "xorpd   $dst, [0x8000000000000000]\t"
11277             "# neg double by sign flipping" %}
11278   ins_encode(negD_encoding(dst));
11279   ins_pipe(pipe_slow);
11280 %}
11281 
11282 // -----------Trig and Trancendental Instructions------------------------------
11283 instruct cosD_reg(regD dst) %{
11284   match(Set dst (CosD dst));
11285 
11286   format %{ "dcos   $dst\n\t" %}
11287   opcode(0xD9, 0xFF);
11288   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11289   ins_pipe( pipe_slow );
11290 %}
11291 
11292 instruct sinD_reg(regD dst) %{
11293   match(Set dst (SinD dst));
11294 
11295   format %{ "dsin   $dst\n\t" %}
11296   opcode(0xD9, 0xFE);
11297   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11298   ins_pipe( pipe_slow );
11299 %}
11300 
11301 instruct tanD_reg(regD dst) %{
11302   match(Set dst (TanD dst));
11303 
11304   format %{ "dtan   $dst\n\t" %}
11305   ins_encode( Push_SrcXD(dst),
11306               Opcode(0xD9), Opcode(0xF2),   //fptan
11307               Opcode(0xDD), Opcode(0xD8),   //fstp st
11308               Push_ResultXD(dst) );
11309   ins_pipe( pipe_slow );
11310 %}
11311 
11312 instruct log10D_reg(regD dst) %{
11313   // The source and result Double operands in XMM registers
11314   match(Set dst (Log10D dst));
11315   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11316   // fyl2x        ; compute log_10(2) * log_2(x)
11317   format %{ "fldlg2\t\t\t#Log10\n\t"
11318             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11319          %}
11320    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11321               Push_SrcXD(dst),
11322               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11323               Push_ResultXD(dst));
11324 
11325   ins_pipe( pipe_slow );
11326 %}
11327 
11328 instruct logD_reg(regD dst) %{
11329   // The source and result Double operands in XMM registers
11330   match(Set dst (LogD dst));
11331   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11332   // fyl2x        ; compute log_e(2) * log_2(x)
11333   format %{ "fldln2\t\t\t#Log_e\n\t"
11334             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11335          %}
11336   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11337               Push_SrcXD(dst),
11338               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11339               Push_ResultXD(dst));
11340   ins_pipe( pipe_slow );
11341 %}
11342 
11343 
11344 
11345 //----------Arithmetic Conversion Instructions---------------------------------
11346 
11347 instruct roundFloat_nop(regF dst)
11348 %{
11349   match(Set dst (RoundFloat dst));
11350 
11351   ins_cost(0);
11352   ins_encode();
11353   ins_pipe(empty);
11354 %}
11355 
11356 instruct roundDouble_nop(regD dst)
11357 %{
11358   match(Set dst (RoundDouble dst));
11359 
11360   ins_cost(0);
11361   ins_encode();
11362   ins_pipe(empty);
11363 %}
11364 
11365 instruct convF2D_reg_reg(regD dst, regF src)
11366 %{
11367   match(Set dst (ConvF2D src));
11368 
11369   format %{ "cvtss2sd $dst, $src" %}
11370   opcode(0xF3, 0x0F, 0x5A);
11371   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11372   ins_pipe(pipe_slow); // XXX
11373 %}
11374 
11375 instruct convF2D_reg_mem(regD dst, memory src)
11376 %{
11377   match(Set dst (ConvF2D (LoadF src)));
11378 
11379   format %{ "cvtss2sd $dst, $src" %}
11380   opcode(0xF3, 0x0F, 0x5A);
11381   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11382   ins_pipe(pipe_slow); // XXX
11383 %}
11384 
11385 instruct convD2F_reg_reg(regF dst, regD src)
11386 %{
11387   match(Set dst (ConvD2F src));
11388 
11389   format %{ "cvtsd2ss $dst, $src" %}
11390   opcode(0xF2, 0x0F, 0x5A);
11391   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11392   ins_pipe(pipe_slow); // XXX
11393 %}
11394 
11395 instruct convD2F_reg_mem(regF dst, memory src)
11396 %{
11397   match(Set dst (ConvD2F (LoadD src)));
11398 
11399   format %{ "cvtsd2ss $dst, $src" %}
11400   opcode(0xF2, 0x0F, 0x5A);
11401   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11402   ins_pipe(pipe_slow); // XXX
11403 %}
11404 
11405 // XXX do mem variants
11406 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11407 %{
11408   match(Set dst (ConvF2I src));
11409   effect(KILL cr);
11410 
11411   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11412             "cmpl    $dst, #0x80000000\n\t"
11413             "jne,s   done\n\t"
11414             "subq    rsp, #8\n\t"
11415             "movss   [rsp], $src\n\t"
11416             "call    f2i_fixup\n\t"
11417             "popq    $dst\n"
11418     "done:   "%}
11419   opcode(0xF3, 0x0F, 0x2C);
11420   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11421              f2i_fixup(dst, src));
11422   ins_pipe(pipe_slow);
11423 %}
11424 
11425 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11426 %{
11427   match(Set dst (ConvF2L src));
11428   effect(KILL cr);
11429 
11430   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11431             "cmpq    $dst, [0x8000000000000000]\n\t"
11432             "jne,s   done\n\t"
11433             "subq    rsp, #8\n\t"
11434             "movss   [rsp], $src\n\t"
11435             "call    f2l_fixup\n\t"
11436             "popq    $dst\n"
11437     "done:   "%}
11438   opcode(0xF3, 0x0F, 0x2C);
11439   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11440              f2l_fixup(dst, src));
11441   ins_pipe(pipe_slow);
11442 %}
11443 
11444 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11445 %{
11446   match(Set dst (ConvD2I src));
11447   effect(KILL cr);
11448 
11449   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11450             "cmpl    $dst, #0x80000000\n\t"
11451             "jne,s   done\n\t"
11452             "subq    rsp, #8\n\t"
11453             "movsd   [rsp], $src\n\t"
11454             "call    d2i_fixup\n\t"
11455             "popq    $dst\n"
11456     "done:   "%}
11457   opcode(0xF2, 0x0F, 0x2C);
11458   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11459              d2i_fixup(dst, src));
11460   ins_pipe(pipe_slow);
11461 %}
11462 
11463 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11464 %{
11465   match(Set dst (ConvD2L src));
11466   effect(KILL cr);
11467 
11468   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11469             "cmpq    $dst, [0x8000000000000000]\n\t"
11470             "jne,s   done\n\t"
11471             "subq    rsp, #8\n\t"
11472             "movsd   [rsp], $src\n\t"
11473             "call    d2l_fixup\n\t"
11474             "popq    $dst\n"
11475     "done:   "%}
11476   opcode(0xF2, 0x0F, 0x2C);
11477   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11478              d2l_fixup(dst, src));
11479   ins_pipe(pipe_slow);
11480 %}
11481 
11482 instruct convI2F_reg_reg(regF dst, rRegI src)
11483 %{
11484   predicate(!UseXmmI2F);
11485   match(Set dst (ConvI2F src));
11486 
11487   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11488   opcode(0xF3, 0x0F, 0x2A);
11489   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11490   ins_pipe(pipe_slow); // XXX
11491 %}
11492 
11493 instruct convI2F_reg_mem(regF dst, memory src)
11494 %{
11495   match(Set dst (ConvI2F (LoadI src)));
11496 
11497   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11498   opcode(0xF3, 0x0F, 0x2A);
11499   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11500   ins_pipe(pipe_slow); // XXX
11501 %}
11502 
11503 instruct convI2D_reg_reg(regD dst, rRegI src)
11504 %{
11505   predicate(!UseXmmI2D);
11506   match(Set dst (ConvI2D src));
11507 
11508   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11509   opcode(0xF2, 0x0F, 0x2A);
11510   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11511   ins_pipe(pipe_slow); // XXX
11512 %}
11513 
11514 instruct convI2D_reg_mem(regD dst, memory src)
11515 %{
11516   match(Set dst (ConvI2D (LoadI src)));
11517 
11518   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11519   opcode(0xF2, 0x0F, 0x2A);
11520   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11521   ins_pipe(pipe_slow); // XXX
11522 %}
11523 
11524 instruct convXI2F_reg(regF dst, rRegI src)
11525 %{
11526   predicate(UseXmmI2F);
11527   match(Set dst (ConvI2F src));
11528 
11529   format %{ "movdl $dst, $src\n\t"
11530             "cvtdq2psl $dst, $dst\t# i2f" %}
11531   ins_encode %{
11532     __ movdl($dst$$XMMRegister, $src$$Register);
11533     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11534   %}
11535   ins_pipe(pipe_slow); // XXX
11536 %}
11537 
11538 instruct convXI2D_reg(regD dst, rRegI src)
11539 %{
11540   predicate(UseXmmI2D);
11541   match(Set dst (ConvI2D src));
11542 
11543   format %{ "movdl $dst, $src\n\t"
11544             "cvtdq2pdl $dst, $dst\t# i2d" %}
11545   ins_encode %{
11546     __ movdl($dst$$XMMRegister, $src$$Register);
11547     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11548   %}
11549   ins_pipe(pipe_slow); // XXX
11550 %}
11551 
11552 instruct convL2F_reg_reg(regF dst, rRegL src)
11553 %{
11554   match(Set dst (ConvL2F src));
11555 
11556   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11557   opcode(0xF3, 0x0F, 0x2A);
11558   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11559   ins_pipe(pipe_slow); // XXX
11560 %}
11561 
11562 instruct convL2F_reg_mem(regF dst, memory src)
11563 %{
11564   match(Set dst (ConvL2F (LoadL src)));
11565 
11566   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11567   opcode(0xF3, 0x0F, 0x2A);
11568   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11569   ins_pipe(pipe_slow); // XXX
11570 %}
11571 
11572 instruct convL2D_reg_reg(regD dst, rRegL src)
11573 %{
11574   match(Set dst (ConvL2D src));
11575 
11576   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11577   opcode(0xF2, 0x0F, 0x2A);
11578   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11579   ins_pipe(pipe_slow); // XXX
11580 %}
11581 
11582 instruct convL2D_reg_mem(regD dst, memory src)
11583 %{
11584   match(Set dst (ConvL2D (LoadL src)));
11585 
11586   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11587   opcode(0xF2, 0x0F, 0x2A);
11588   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11589   ins_pipe(pipe_slow); // XXX
11590 %}
11591 
11592 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11593 %{
11594   match(Set dst (ConvI2L src));
11595 
11596   ins_cost(125);
11597   format %{ "movslq  $dst, $src\t# i2l" %}
11598   opcode(0x63); // needs REX.W
11599   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11600   ins_pipe(ialu_reg_reg);
11601 %}
11602 
11603 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11604 // %{
11605 //   match(Set dst (ConvI2L src));
11606 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11607 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11608 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11609 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11610 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11611 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11612 
11613 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11614 //   ins_encode(enc_copy(dst, src));
11615 // //   opcode(0x63); // needs REX.W
11616 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11617 //   ins_pipe(ialu_reg_reg);
11618 // %}
11619 
11620 // Zero-extend convert int to long
11621 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11622 %{
11623   match(Set dst (AndL (ConvI2L src) mask));
11624 
11625   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11626   ins_encode(enc_copy(dst, src));
11627   ins_pipe(ialu_reg_reg);
11628 %}
11629 
11630 // Zero-extend convert int to long
11631 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11632 %{
11633   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11634 
11635   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11636   opcode(0x8B);
11637   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11638   ins_pipe(ialu_reg_mem);
11639 %}
11640 
11641 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11642 %{
11643   match(Set dst (AndL src mask));
11644 
11645   format %{ "movl    $dst, $src\t# zero-extend long" %}
11646   ins_encode(enc_copy_always(dst, src));
11647   ins_pipe(ialu_reg_reg);
11648 %}
11649 
11650 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11651 %{
11652   match(Set dst (ConvL2I src));
11653 
11654   format %{ "movl    $dst, $src\t# l2i" %}
11655   ins_encode(enc_copy_always(dst, src));
11656   ins_pipe(ialu_reg_reg);
11657 %}
11658 
11659 
11660 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11661   match(Set dst (MoveF2I src));
11662   effect(DEF dst, USE src);
11663 
11664   ins_cost(125);
11665   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11666   opcode(0x8B);
11667   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11668   ins_pipe(ialu_reg_mem);
11669 %}
11670 
11671 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11672   match(Set dst (MoveI2F src));
11673   effect(DEF dst, USE src);
11674 
11675   ins_cost(125);
11676   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11677   opcode(0xF3, 0x0F, 0x10);
11678   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11679   ins_pipe(pipe_slow);
11680 %}
11681 
11682 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11683   match(Set dst (MoveD2L src));
11684   effect(DEF dst, USE src);
11685 
11686   ins_cost(125);
11687   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11688   opcode(0x8B);
11689   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11690   ins_pipe(ialu_reg_mem);
11691 %}
11692 
11693 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11694   predicate(!UseXmmLoadAndClearUpper);
11695   match(Set dst (MoveL2D src));
11696   effect(DEF dst, USE src);
11697 
11698   ins_cost(125);
11699   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11700   opcode(0x66, 0x0F, 0x12);
11701   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11702   ins_pipe(pipe_slow);
11703 %}
11704 
11705 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11706   predicate(UseXmmLoadAndClearUpper);
11707   match(Set dst (MoveL2D src));
11708   effect(DEF dst, USE src);
11709 
11710   ins_cost(125);
11711   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11712   opcode(0xF2, 0x0F, 0x10);
11713   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11714   ins_pipe(pipe_slow);
11715 %}
11716 
11717 
11718 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11719   match(Set dst (MoveF2I src));
11720   effect(DEF dst, USE src);
11721 
11722   ins_cost(95); // XXX
11723   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11724   opcode(0xF3, 0x0F, 0x11);
11725   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11726   ins_pipe(pipe_slow);
11727 %}
11728 
11729 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11730   match(Set dst (MoveI2F src));
11731   effect(DEF dst, USE src);
11732 
11733   ins_cost(100);
11734   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11735   opcode(0x89);
11736   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11737   ins_pipe( ialu_mem_reg );
11738 %}
11739 
11740 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11741   match(Set dst (MoveD2L src));
11742   effect(DEF dst, USE src);
11743 
11744   ins_cost(95); // XXX
11745   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11746   opcode(0xF2, 0x0F, 0x11);
11747   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11748   ins_pipe(pipe_slow);
11749 %}
11750 
11751 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11752   match(Set dst (MoveL2D src));
11753   effect(DEF dst, USE src);
11754 
11755   ins_cost(100);
11756   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11757   opcode(0x89);
11758   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11759   ins_pipe(ialu_mem_reg);
11760 %}
11761 
11762 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11763   match(Set dst (MoveF2I src));
11764   effect(DEF dst, USE src);
11765   ins_cost(85);
11766   format %{ "movd    $dst,$src\t# MoveF2I" %}
11767   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11768   ins_pipe( pipe_slow );
11769 %}
11770 
11771 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11772   match(Set dst (MoveD2L src));
11773   effect(DEF dst, USE src);
11774   ins_cost(85);
11775   format %{ "movd    $dst,$src\t# MoveD2L" %}
11776   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11777   ins_pipe( pipe_slow );
11778 %}
11779 
11780 // The next instructions have long latency and use Int unit. Set high cost.
11781 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11782   match(Set dst (MoveI2F src));
11783   effect(DEF dst, USE src);
11784   ins_cost(300);
11785   format %{ "movd    $dst,$src\t# MoveI2F" %}
11786   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11787   ins_pipe( pipe_slow );
11788 %}
11789 
11790 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11791   match(Set dst (MoveL2D src));
11792   effect(DEF dst, USE src);
11793   ins_cost(300);
11794   format %{ "movd    $dst,$src\t# MoveL2D" %}
11795   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11796   ins_pipe( pipe_slow );
11797 %}
11798 
11799 // Replicate scalar to packed byte (1 byte) values in xmm
11800 instruct Repl8B_reg(regD dst, regD src) %{
11801   match(Set dst (Replicate8B src));
11802   format %{ "MOVDQA  $dst,$src\n\t"
11803             "PUNPCKLBW $dst,$dst\n\t"
11804             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11805   ins_encode( pshufd_8x8(dst, src));
11806   ins_pipe( pipe_slow );
11807 %}
11808 
11809 // Replicate scalar to packed byte (1 byte) values in xmm
11810 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11811   match(Set dst (Replicate8B src));
11812   format %{ "MOVD    $dst,$src\n\t"
11813             "PUNPCKLBW $dst,$dst\n\t"
11814             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11815   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11816   ins_pipe( pipe_slow );
11817 %}
11818 
11819 // Replicate scalar zero to packed byte (1 byte) values in xmm
11820 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11821   match(Set dst (Replicate8B zero));
11822   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11823   ins_encode( pxor(dst, dst));
11824   ins_pipe( fpu_reg_reg );
11825 %}
11826 
11827 // Replicate scalar to packed shore (2 byte) values in xmm
11828 instruct Repl4S_reg(regD dst, regD src) %{
11829   match(Set dst (Replicate4S src));
11830   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11831   ins_encode( pshufd_4x16(dst, src));
11832   ins_pipe( fpu_reg_reg );
11833 %}
11834 
11835 // Replicate scalar to packed shore (2 byte) values in xmm
11836 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11837   match(Set dst (Replicate4S src));
11838   format %{ "MOVD    $dst,$src\n\t"
11839             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11840   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11841   ins_pipe( fpu_reg_reg );
11842 %}
11843 
11844 // Replicate scalar zero to packed short (2 byte) values in xmm
11845 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11846   match(Set dst (Replicate4S zero));
11847   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11848   ins_encode( pxor(dst, dst));
11849   ins_pipe( fpu_reg_reg );
11850 %}
11851 
11852 // Replicate scalar to packed char (2 byte) values in xmm
11853 instruct Repl4C_reg(regD dst, regD src) %{
11854   match(Set dst (Replicate4C src));
11855   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11856   ins_encode( pshufd_4x16(dst, src));
11857   ins_pipe( fpu_reg_reg );
11858 %}
11859 
11860 // Replicate scalar to packed char (2 byte) values in xmm
11861 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11862   match(Set dst (Replicate4C src));
11863   format %{ "MOVD    $dst,$src\n\t"
11864             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11865   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11866   ins_pipe( fpu_reg_reg );
11867 %}
11868 
11869 // Replicate scalar zero to packed char (2 byte) values in xmm
11870 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11871   match(Set dst (Replicate4C zero));
11872   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11873   ins_encode( pxor(dst, dst));
11874   ins_pipe( fpu_reg_reg );
11875 %}
11876 
11877 // Replicate scalar to packed integer (4 byte) values in xmm
11878 instruct Repl2I_reg(regD dst, regD src) %{
11879   match(Set dst (Replicate2I src));
11880   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11881   ins_encode( pshufd(dst, src, 0x00));
11882   ins_pipe( fpu_reg_reg );
11883 %}
11884 
11885 // Replicate scalar to packed integer (4 byte) values in xmm
11886 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11887   match(Set dst (Replicate2I src));
11888   format %{ "MOVD   $dst,$src\n\t"
11889             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11890   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11891   ins_pipe( fpu_reg_reg );
11892 %}
11893 
11894 // Replicate scalar zero to packed integer (2 byte) values in xmm
11895 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11896   match(Set dst (Replicate2I zero));
11897   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11898   ins_encode( pxor(dst, dst));
11899   ins_pipe( fpu_reg_reg );
11900 %}
11901 
11902 // Replicate scalar to packed single precision floating point values in xmm
11903 instruct Repl2F_reg(regD dst, regD src) %{
11904   match(Set dst (Replicate2F src));
11905   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11906   ins_encode( pshufd(dst, src, 0xe0));
11907   ins_pipe( fpu_reg_reg );
11908 %}
11909 
11910 // Replicate scalar to packed single precision floating point values in xmm
11911 instruct Repl2F_regF(regD dst, regF src) %{
11912   match(Set dst (Replicate2F src));
11913   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11914   ins_encode( pshufd(dst, src, 0xe0));
11915   ins_pipe( fpu_reg_reg );
11916 %}
11917 
11918 // Replicate scalar to packed single precision floating point values in xmm
11919 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11920   match(Set dst (Replicate2F zero));
11921   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11922   ins_encode( pxor(dst, dst));
11923   ins_pipe( fpu_reg_reg );
11924 %}
11925 
11926 
11927 // =======================================================================
11928 // fast clearing of an array
11929 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11930                   rFlagsReg cr)
11931 %{
11932   match(Set dummy (ClearArray cnt base));
11933   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11934 
11935   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11936             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11937   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11938              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11939   ins_pipe(pipe_slow);
11940 %}
11941 
11942 instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
11943                         rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
11944 %{
11945   match(Set result (StrComp str1 str2));
11946   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
11947   //ins_cost(300);
11948 
11949   format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
11950   ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11951   ins_pipe( pipe_slow );
11952 %}
11953 
11954 instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
11955                         rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr)
11956 %{
11957   predicate(UseSSE42Intrinsics);
11958   match(Set result (StrIndexOf str1 str2));
11959   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
11960 
11961   format %{ "String IndexOf $str1,$str2 -> $result   // KILL RAX, RCX, RDX" %}
11962   ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11963   ins_pipe( pipe_slow );
11964 %}
11965 
11966 // fast string equals
11967 instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3,
11968                        rcx_RegI tmp4, rax_RegI result, rFlagsReg cr)
11969 %{
11970   match(Set result (StrEquals str1 str2));
11971   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
11972 
11973   format %{ "String Equals $str1,$str2 -> $result    // KILL RBX, RCX" %}
11974   ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
11975   ins_pipe( pipe_slow );
11976 %}
11977 
11978 // fast array equals
11979 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3,
11980                       rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
11981 %{
11982   match(Set result (AryEq ary1 ary2));
11983   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11984   //ins_cost(300);
11985 
11986   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL RAX, RBX" %}
11987   ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
11988   ins_pipe( pipe_slow );
11989 %}
11990 
11991 //----------Control Flow Instructions------------------------------------------
11992 // Signed compare Instructions
11993 
11994 // XXX more variants!!
11995 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11996 %{
11997   match(Set cr (CmpI op1 op2));
11998   effect(DEF cr, USE op1, USE op2);
11999 
12000   format %{ "cmpl    $op1, $op2" %}
12001   opcode(0x3B);  /* Opcode 3B /r */
12002   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
12003   ins_pipe(ialu_cr_reg_reg);
12004 %}
12005 
12006 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12007 %{
12008   match(Set cr (CmpI op1 op2));
12009 
12010   format %{ "cmpl    $op1, $op2" %}
12011   opcode(0x81, 0x07); /* Opcode 81 /7 */
12012   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
12013   ins_pipe(ialu_cr_reg_imm);
12014 %}
12015 
12016 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12017 %{
12018   match(Set cr (CmpI op1 (LoadI op2)));
12019 
12020   ins_cost(500); // XXX
12021   format %{ "cmpl    $op1, $op2" %}
12022   opcode(0x3B); /* Opcode 3B /r */
12023   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
12024   ins_pipe(ialu_cr_reg_mem);
12025 %}
12026 
12027 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
12028 %{
12029   match(Set cr (CmpI src zero));
12030 
12031   format %{ "testl   $src, $src" %}
12032   opcode(0x85);
12033   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
12034   ins_pipe(ialu_cr_reg_imm);
12035 %}
12036 
12037 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
12038 %{
12039   match(Set cr (CmpI (AndI src con) zero));
12040 
12041   format %{ "testl   $src, $con" %}
12042   opcode(0xF7, 0x00);
12043   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
12044   ins_pipe(ialu_cr_reg_imm);
12045 %}
12046 
12047 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
12048 %{
12049   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12050 
12051   format %{ "testl   $src, $mem" %}
12052   opcode(0x85);
12053   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
12054   ins_pipe(ialu_cr_reg_mem);
12055 %}
12056 
12057 // Unsigned compare Instructions; really, same as signed except they
12058 // produce an rFlagsRegU instead of rFlagsReg.
12059 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12060 %{
12061   match(Set cr (CmpU op1 op2));
12062 
12063   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12064   opcode(0x3B); /* Opcode 3B /r */
12065   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
12066   ins_pipe(ialu_cr_reg_reg);
12067 %}
12068 
12069 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12070 %{
12071   match(Set cr (CmpU op1 op2));
12072 
12073   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12074   opcode(0x81,0x07); /* Opcode 81 /7 */
12075   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
12076   ins_pipe(ialu_cr_reg_imm);
12077 %}
12078 
12079 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12080 %{
12081   match(Set cr (CmpU op1 (LoadI op2)));
12082 
12083   ins_cost(500); // XXX
12084   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12085   opcode(0x3B); /* Opcode 3B /r */
12086   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
12087   ins_pipe(ialu_cr_reg_mem);
12088 %}
12089 
12090 // // // Cisc-spilled version of cmpU_rReg
12091 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12092 // //%{
12093 // //  match(Set cr (CmpU (LoadI op1) op2));
12094 // //
12095 // //  format %{ "CMPu   $op1,$op2" %}
12096 // //  ins_cost(500);
12097 // //  opcode(0x39);  /* Opcode 39 /r */
12098 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12099 // //%}
12100 
12101 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
12102 %{
12103   match(Set cr (CmpU src zero));
12104 
12105   format %{ "testl  $src, $src\t# unsigned" %}
12106   opcode(0x85);
12107   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
12108   ins_pipe(ialu_cr_reg_imm);
12109 %}
12110 
12111 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12112 %{
12113   match(Set cr (CmpP op1 op2));
12114 
12115   format %{ "cmpq    $op1, $op2\t# ptr" %}
12116   opcode(0x3B); /* Opcode 3B /r */
12117   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12118   ins_pipe(ialu_cr_reg_reg);
12119 %}
12120 
12121 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12122 %{
12123   match(Set cr (CmpP op1 (LoadP op2)));
12124 
12125   ins_cost(500); // XXX
12126   format %{ "cmpq    $op1, $op2\t# ptr" %}
12127   opcode(0x3B); /* Opcode 3B /r */
12128   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12129   ins_pipe(ialu_cr_reg_mem);
12130 %}
12131 
12132 // // // Cisc-spilled version of cmpP_rReg
12133 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12134 // //%{
12135 // //  match(Set cr (CmpP (LoadP op1) op2));
12136 // //
12137 // //  format %{ "CMPu   $op1,$op2" %}
12138 // //  ins_cost(500);
12139 // //  opcode(0x39);  /* Opcode 39 /r */
12140 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12141 // //%}
12142 
12143 // XXX this is generalized by compP_rReg_mem???
12144 // Compare raw pointer (used in out-of-heap check).
12145 // Only works because non-oop pointers must be raw pointers
12146 // and raw pointers have no anti-dependencies.
12147 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12148 %{
12149   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
12150   match(Set cr (CmpP op1 (LoadP op2)));
12151 
12152   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12153   opcode(0x3B); /* Opcode 3B /r */
12154   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12155   ins_pipe(ialu_cr_reg_mem);
12156 %}
12157 
12158 // This will generate a signed flags result. This should be OK since
12159 // any compare to a zero should be eq/neq.
12160 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12161 %{
12162   match(Set cr (CmpP src zero));
12163 
12164   format %{ "testq   $src, $src\t# ptr" %}
12165   opcode(0x85);
12166   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12167   ins_pipe(ialu_cr_reg_imm);
12168 %}
12169 
12170 // This will generate a signed flags result. This should be OK since
12171 // any compare to a zero should be eq/neq.
12172 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12173 %{
12174   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
12175   match(Set cr (CmpP (LoadP op) zero));
12176 
12177   ins_cost(500); // XXX
12178   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12179   opcode(0xF7); /* Opcode F7 /0 */
12180   ins_encode(REX_mem_wide(op),
12181              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
12182   ins_pipe(ialu_cr_reg_imm);
12183 %}
12184 
12185 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12186 %{
12187   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
12188   match(Set cr (CmpP (LoadP mem) zero));
12189 
12190   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12191   ins_encode %{
12192     __ cmpq(r12, $mem$$Address);
12193   %}
12194   ins_pipe(ialu_cr_reg_mem);
12195 %}
12196 
12197 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12198 %{
12199   match(Set cr (CmpN op1 op2));
12200 
12201   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12202   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12203   ins_pipe(ialu_cr_reg_reg);
12204 %}
12205 
12206 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12207 %{
12208   match(Set cr (CmpN src (LoadN mem)));
12209 
12210   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12211   ins_encode %{
12212     __ cmpl($src$$Register, $mem$$Address);
12213   %}
12214   ins_pipe(ialu_cr_reg_mem);
12215 %}
12216 
12217 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12218   match(Set cr (CmpN op1 op2));
12219 
12220   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12221   ins_encode %{
12222     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12223   %}
12224   ins_pipe(ialu_cr_reg_imm);
12225 %}
12226 
12227 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12228 %{
12229   match(Set cr (CmpN src (LoadN mem)));
12230 
12231   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12232   ins_encode %{
12233     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12234   %}
12235   ins_pipe(ialu_cr_reg_mem);
12236 %}
12237 
12238 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12239   match(Set cr (CmpN src zero));
12240 
12241   format %{ "testl   $src, $src\t# compressed ptr" %}
12242   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12243   ins_pipe(ialu_cr_reg_imm);
12244 %}
12245 
12246 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12247 %{
12248   predicate(Universe::narrow_oop_base() != NULL);
12249   match(Set cr (CmpN (LoadN mem) zero));
12250 
12251   ins_cost(500); // XXX
12252   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12253   ins_encode %{
12254     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12255   %}
12256   ins_pipe(ialu_cr_reg_mem);
12257 %}
12258 
12259 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12260 %{
12261   predicate(Universe::narrow_oop_base() == NULL);
12262   match(Set cr (CmpN (LoadN mem) zero));
12263 
12264   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12265   ins_encode %{
12266     __ cmpl(r12, $mem$$Address);
12267   %}
12268   ins_pipe(ialu_cr_reg_mem);
12269 %}
12270 
12271 // Yanked all unsigned pointer compare operations.
12272 // Pointer compares are done with CmpP which is already unsigned.
12273 
12274 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12275 %{
12276   match(Set cr (CmpL op1 op2));
12277 
12278   format %{ "cmpq    $op1, $op2" %}
12279   opcode(0x3B);  /* Opcode 3B /r */
12280   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12281   ins_pipe(ialu_cr_reg_reg);
12282 %}
12283 
12284 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12285 %{
12286   match(Set cr (CmpL op1 op2));
12287 
12288   format %{ "cmpq    $op1, $op2" %}
12289   opcode(0x81, 0x07); /* Opcode 81 /7 */
12290   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12291   ins_pipe(ialu_cr_reg_imm);
12292 %}
12293 
12294 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12295 %{
12296   match(Set cr (CmpL op1 (LoadL op2)));
12297 
12298   format %{ "cmpq    $op1, $op2" %}
12299   opcode(0x3B); /* Opcode 3B /r */
12300   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12301   ins_pipe(ialu_cr_reg_mem);
12302 %}
12303 
12304 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12305 %{
12306   match(Set cr (CmpL src zero));
12307 
12308   format %{ "testq   $src, $src" %}
12309   opcode(0x85);
12310   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12311   ins_pipe(ialu_cr_reg_imm);
12312 %}
12313 
12314 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12315 %{
12316   match(Set cr (CmpL (AndL src con) zero));
12317 
12318   format %{ "testq   $src, $con\t# long" %}
12319   opcode(0xF7, 0x00);
12320   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12321   ins_pipe(ialu_cr_reg_imm);
12322 %}
12323 
12324 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12325 %{
12326   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12327 
12328   format %{ "testq   $src, $mem" %}
12329   opcode(0x85);
12330   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12331   ins_pipe(ialu_cr_reg_mem);
12332 %}
12333 
12334 // Manifest a CmpL result in an integer register.  Very painful.
12335 // This is the test to avoid.
12336 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12337 %{
12338   match(Set dst (CmpL3 src1 src2));
12339   effect(KILL flags);
12340 
12341   ins_cost(275); // XXX
12342   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12343             "movl    $dst, -1\n\t"
12344             "jl,s    done\n\t"
12345             "setne   $dst\n\t"
12346             "movzbl  $dst, $dst\n\t"
12347     "done:" %}
12348   ins_encode(cmpl3_flag(src1, src2, dst));
12349   ins_pipe(pipe_slow);
12350 %}
12351 
12352 //----------Max and Min--------------------------------------------------------
12353 // Min Instructions
12354 
12355 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12356 %{
12357   effect(USE_DEF dst, USE src, USE cr);
12358 
12359   format %{ "cmovlgt $dst, $src\t# min" %}
12360   opcode(0x0F, 0x4F);
12361   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12362   ins_pipe(pipe_cmov_reg);
12363 %}
12364 
12365 
12366 instruct minI_rReg(rRegI dst, rRegI src)
12367 %{
12368   match(Set dst (MinI dst src));
12369 
12370   ins_cost(200);
12371   expand %{
12372     rFlagsReg cr;
12373     compI_rReg(cr, dst, src);
12374     cmovI_reg_g(dst, src, cr);
12375   %}
12376 %}
12377 
12378 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12379 %{
12380   effect(USE_DEF dst, USE src, USE cr);
12381 
12382   format %{ "cmovllt $dst, $src\t# max" %}
12383   opcode(0x0F, 0x4C);
12384   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12385   ins_pipe(pipe_cmov_reg);
12386 %}
12387 
12388 
12389 instruct maxI_rReg(rRegI dst, rRegI src)
12390 %{
12391   match(Set dst (MaxI dst src));
12392 
12393   ins_cost(200);
12394   expand %{
12395     rFlagsReg cr;
12396     compI_rReg(cr, dst, src);
12397     cmovI_reg_l(dst, src, cr);
12398   %}
12399 %}
12400 
12401 // ============================================================================
12402 // Branch Instructions
12403 
12404 // Jump Direct - Label defines a relative address from JMP+1
12405 instruct jmpDir(label labl)
12406 %{
12407   match(Goto);
12408   effect(USE labl);
12409 
12410   ins_cost(300);
12411   format %{ "jmp     $labl" %}
12412   size(5);
12413   opcode(0xE9);
12414   ins_encode(OpcP, Lbl(labl));
12415   ins_pipe(pipe_jmp);
12416   ins_pc_relative(1);
12417 %}
12418 
12419 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12420 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12421 %{
12422   match(If cop cr);
12423   effect(USE labl);
12424 
12425   ins_cost(300);
12426   format %{ "j$cop     $labl" %}
12427   size(6);
12428   opcode(0x0F, 0x80);
12429   ins_encode(Jcc(cop, labl));
12430   ins_pipe(pipe_jcc);
12431   ins_pc_relative(1);
12432 %}
12433 
12434 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12435 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12436 %{
12437   match(CountedLoopEnd cop cr);
12438   effect(USE labl);
12439 
12440   ins_cost(300);
12441   format %{ "j$cop     $labl\t# loop end" %}
12442   size(6);
12443   opcode(0x0F, 0x80);
12444   ins_encode(Jcc(cop, labl));
12445   ins_pipe(pipe_jcc);
12446   ins_pc_relative(1);
12447 %}
12448 
12449 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12450 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12451   match(CountedLoopEnd cop cmp);
12452   effect(USE labl);
12453 
12454   ins_cost(300);
12455   format %{ "j$cop,u   $labl\t# loop end" %}
12456   size(6);
12457   opcode(0x0F, 0x80);
12458   ins_encode(Jcc(cop, labl));
12459   ins_pipe(pipe_jcc);
12460   ins_pc_relative(1);
12461 %}
12462 
12463 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12464   match(CountedLoopEnd cop cmp);
12465   effect(USE labl);
12466 
12467   ins_cost(200);
12468   format %{ "j$cop,u   $labl\t# loop end" %}
12469   size(6);
12470   opcode(0x0F, 0x80);
12471   ins_encode(Jcc(cop, labl));
12472   ins_pipe(pipe_jcc);
12473   ins_pc_relative(1);
12474 %}
12475 
12476 // Jump Direct Conditional - using unsigned comparison
12477 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12478   match(If cop cmp);
12479   effect(USE labl);
12480 
12481   ins_cost(300);
12482   format %{ "j$cop,u  $labl" %}
12483   size(6);
12484   opcode(0x0F, 0x80);
12485   ins_encode(Jcc(cop, labl));
12486   ins_pipe(pipe_jcc);
12487   ins_pc_relative(1);
12488 %}
12489 
12490 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12491   match(If cop cmp);
12492   effect(USE labl);
12493 
12494   ins_cost(200);
12495   format %{ "j$cop,u  $labl" %}
12496   size(6);
12497   opcode(0x0F, 0x80);
12498   ins_encode(Jcc(cop, labl));
12499   ins_pipe(pipe_jcc);
12500   ins_pc_relative(1);
12501 %}
12502 
12503 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12504   match(If cop cmp);
12505   effect(USE labl);
12506 
12507   ins_cost(200);
12508   format %{ $$template
12509     if ($cop$$cmpcode == Assembler::notEqual) {
12510       $$emit$$"jp,u   $labl\n\t"
12511       $$emit$$"j$cop,u   $labl"
12512     } else {
12513       $$emit$$"jp,u   done\n\t"
12514       $$emit$$"j$cop,u   $labl\n\t"
12515       $$emit$$"done:"
12516     }
12517   %}
12518   size(12);
12519   opcode(0x0F, 0x80);
12520   ins_encode %{
12521     Label* l = $labl$$label;
12522     $$$emit8$primary;
12523     emit_cc(cbuf, $secondary, Assembler::parity);
12524     int parity_disp = -1;
12525     if ($cop$$cmpcode == Assembler::notEqual) {
12526        // the two jumps 6 bytes apart so the jump distances are too
12527        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12528     } else if ($cop$$cmpcode == Assembler::equal) {
12529        parity_disp = 6;
12530     } else {
12531        ShouldNotReachHere();
12532     }
12533     emit_d32(cbuf, parity_disp);
12534     $$$emit8$primary;
12535     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12536     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12537     emit_d32(cbuf, disp);
12538   %}
12539   ins_pipe(pipe_jcc);
12540   ins_pc_relative(1);
12541 %}
12542 
12543 // ============================================================================
12544 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12545 // superklass array for an instance of the superklass.  Set a hidden
12546 // internal cache on a hit (cache is checked with exposed code in
12547 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12548 // encoding ALSO sets flags.
12549 
12550 instruct partialSubtypeCheck(rdi_RegP result,
12551                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12552                              rFlagsReg cr)
12553 %{
12554   match(Set result (PartialSubtypeCheck sub super));
12555   effect(KILL rcx, KILL cr);
12556 
12557   ins_cost(1100);  // slightly larger than the next version
12558   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12559             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12560             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12561             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12562             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12563             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12564             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12565     "miss:\t" %}
12566 
12567   opcode(0x1); // Force a XOR of RDI
12568   ins_encode(enc_PartialSubtypeCheck());
12569   ins_pipe(pipe_slow);
12570 %}
12571 
12572 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12573                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12574                                      immP0 zero,
12575                                      rdi_RegP result)
12576 %{
12577   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12578   effect(KILL rcx, KILL result);
12579 
12580   ins_cost(1000);
12581   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12582             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12583             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12584             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12585             "jne,s   miss\t\t# Missed: flags nz\n\t"
12586             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12587     "miss:\t" %}
12588 
12589   opcode(0x0); // No need to XOR RDI
12590   ins_encode(enc_PartialSubtypeCheck());
12591   ins_pipe(pipe_slow);
12592 %}
12593 
12594 // ============================================================================
12595 // Branch Instructions -- short offset versions
12596 //
12597 // These instructions are used to replace jumps of a long offset (the default
12598 // match) with jumps of a shorter offset.  These instructions are all tagged
12599 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12600 // match rules in general matching.  Instead, the ADLC generates a conversion
12601 // method in the MachNode which can be used to do in-place replacement of the
12602 // long variant with the shorter variant.  The compiler will determine if a
12603 // branch can be taken by the is_short_branch_offset() predicate in the machine
12604 // specific code section of the file.
12605 
12606 // Jump Direct - Label defines a relative address from JMP+1
12607 instruct jmpDir_short(label labl) %{
12608   match(Goto);
12609   effect(USE labl);
12610 
12611   ins_cost(300);
12612   format %{ "jmp,s   $labl" %}
12613   size(2);
12614   opcode(0xEB);
12615   ins_encode(OpcP, LblShort(labl));
12616   ins_pipe(pipe_jmp);
12617   ins_pc_relative(1);
12618   ins_short_branch(1);
12619 %}
12620 
12621 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12622 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12623   match(If cop cr);
12624   effect(USE labl);
12625 
12626   ins_cost(300);
12627   format %{ "j$cop,s   $labl" %}
12628   size(2);
12629   opcode(0x70);
12630   ins_encode(JccShort(cop, labl));
12631   ins_pipe(pipe_jcc);
12632   ins_pc_relative(1);
12633   ins_short_branch(1);
12634 %}
12635 
12636 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12637 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12638   match(CountedLoopEnd cop cr);
12639   effect(USE labl);
12640 
12641   ins_cost(300);
12642   format %{ "j$cop,s   $labl\t# loop end" %}
12643   size(2);
12644   opcode(0x70);
12645   ins_encode(JccShort(cop, labl));
12646   ins_pipe(pipe_jcc);
12647   ins_pc_relative(1);
12648   ins_short_branch(1);
12649 %}
12650 
12651 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12652 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12653   match(CountedLoopEnd cop cmp);
12654   effect(USE labl);
12655 
12656   ins_cost(300);
12657   format %{ "j$cop,us  $labl\t# loop end" %}
12658   size(2);
12659   opcode(0x70);
12660   ins_encode(JccShort(cop, labl));
12661   ins_pipe(pipe_jcc);
12662   ins_pc_relative(1);
12663   ins_short_branch(1);
12664 %}
12665 
12666 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12667   match(CountedLoopEnd cop cmp);
12668   effect(USE labl);
12669 
12670   ins_cost(300);
12671   format %{ "j$cop,us  $labl\t# loop end" %}
12672   size(2);
12673   opcode(0x70);
12674   ins_encode(JccShort(cop, labl));
12675   ins_pipe(pipe_jcc);
12676   ins_pc_relative(1);
12677   ins_short_branch(1);
12678 %}
12679 
12680 // Jump Direct Conditional - using unsigned comparison
12681 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12682   match(If cop cmp);
12683   effect(USE labl);
12684 
12685   ins_cost(300);
12686   format %{ "j$cop,us  $labl" %}
12687   size(2);
12688   opcode(0x70);
12689   ins_encode(JccShort(cop, labl));
12690   ins_pipe(pipe_jcc);
12691   ins_pc_relative(1);
12692   ins_short_branch(1);
12693 %}
12694 
12695 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12696   match(If cop cmp);
12697   effect(USE labl);
12698 
12699   ins_cost(300);
12700   format %{ "j$cop,us  $labl" %}
12701   size(2);
12702   opcode(0x70);
12703   ins_encode(JccShort(cop, labl));
12704   ins_pipe(pipe_jcc);
12705   ins_pc_relative(1);
12706   ins_short_branch(1);
12707 %}
12708 
12709 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12710   match(If cop cmp);
12711   effect(USE labl);
12712 
12713   ins_cost(300);
12714   format %{ $$template
12715     if ($cop$$cmpcode == Assembler::notEqual) {
12716       $$emit$$"jp,u,s   $labl\n\t"
12717       $$emit$$"j$cop,u,s   $labl"
12718     } else {
12719       $$emit$$"jp,u,s   done\n\t"
12720       $$emit$$"j$cop,u,s  $labl\n\t"
12721       $$emit$$"done:"
12722     }
12723   %}
12724   size(4);
12725   opcode(0x70);
12726   ins_encode %{
12727     Label* l = $labl$$label;
12728     emit_cc(cbuf, $primary, Assembler::parity);
12729     int parity_disp = -1;
12730     if ($cop$$cmpcode == Assembler::notEqual) {
12731       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12732     } else if ($cop$$cmpcode == Assembler::equal) {
12733       parity_disp = 2;
12734     } else {
12735       ShouldNotReachHere();
12736     }
12737     emit_d8(cbuf, parity_disp);
12738     emit_cc(cbuf, $primary, $cop$$cmpcode);
12739     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12740     emit_d8(cbuf, disp);
12741     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12742     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12743   %}
12744   ins_pipe(pipe_jcc);
12745   ins_pc_relative(1);
12746   ins_short_branch(1);
12747 %}
12748 
12749 // ============================================================================
12750 // inlined locking and unlocking
12751 
12752 instruct cmpFastLock(rFlagsReg cr,
12753                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12754 %{
12755   match(Set cr (FastLock object box));
12756   effect(TEMP tmp, TEMP scr);
12757 
12758   ins_cost(300);
12759   format %{ "fastlock $object,$box,$tmp,$scr" %}
12760   ins_encode(Fast_Lock(object, box, tmp, scr));
12761   ins_pipe(pipe_slow);
12762   ins_pc_relative(1);
12763 %}
12764 
12765 instruct cmpFastUnlock(rFlagsReg cr,
12766                        rRegP object, rax_RegP box, rRegP tmp)
12767 %{
12768   match(Set cr (FastUnlock object box));
12769   effect(TEMP tmp);
12770 
12771   ins_cost(300);
12772   format %{ "fastunlock $object, $box, $tmp" %}
12773   ins_encode(Fast_Unlock(object, box, tmp));
12774   ins_pipe(pipe_slow);
12775   ins_pc_relative(1);
12776 %}
12777 
12778 
12779 // ============================================================================
12780 // Safepoint Instructions
12781 instruct safePoint_poll(rFlagsReg cr)
12782 %{
12783   match(SafePoint);
12784   effect(KILL cr);
12785 
12786   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12787             "# Safepoint: poll for GC" %}
12788   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12789   ins_cost(125);
12790   ins_encode(enc_safepoint_poll);
12791   ins_pipe(ialu_reg_mem);
12792 %}
12793 
12794 // ============================================================================
12795 // Procedure Call/Return Instructions
12796 // Call Java Static Instruction
12797 // Note: If this code changes, the corresponding ret_addr_offset() and
12798 //       compute_padding() functions will have to be adjusted.
12799 instruct CallStaticJavaDirect(method meth)
12800 %{
12801   match(CallStaticJava);
12802   effect(USE meth);
12803 
12804   ins_cost(300);
12805   format %{ "call,static " %}
12806   opcode(0xE8); /* E8 cd */
12807   ins_encode(Java_Static_Call(meth), call_epilog);
12808   ins_pipe(pipe_slow);
12809   ins_pc_relative(1);
12810   ins_alignment(4);
12811 %}
12812 
12813 // Call Java Dynamic Instruction
12814 // Note: If this code changes, the corresponding ret_addr_offset() and
12815 //       compute_padding() functions will have to be adjusted.
12816 instruct CallDynamicJavaDirect(method meth)
12817 %{
12818   match(CallDynamicJava);
12819   effect(USE meth);
12820 
12821   ins_cost(300);
12822   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12823             "call,dynamic " %}
12824   opcode(0xE8); /* E8 cd */
12825   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12826   ins_pipe(pipe_slow);
12827   ins_pc_relative(1);
12828   ins_alignment(4);
12829 %}
12830 
12831 // Call Runtime Instruction
12832 instruct CallRuntimeDirect(method meth)
12833 %{
12834   match(CallRuntime);
12835   effect(USE meth);
12836 
12837   ins_cost(300);
12838   format %{ "call,runtime " %}
12839   opcode(0xE8); /* E8 cd */
12840   ins_encode(Java_To_Runtime(meth));
12841   ins_pipe(pipe_slow);
12842   ins_pc_relative(1);
12843 %}
12844 
12845 // Call runtime without safepoint
12846 instruct CallLeafDirect(method meth)
12847 %{
12848   match(CallLeaf);
12849   effect(USE meth);
12850 
12851   ins_cost(300);
12852   format %{ "call_leaf,runtime " %}
12853   opcode(0xE8); /* E8 cd */
12854   ins_encode(Java_To_Runtime(meth));
12855   ins_pipe(pipe_slow);
12856   ins_pc_relative(1);
12857 %}
12858 
12859 // Call runtime without safepoint
12860 instruct CallLeafNoFPDirect(method meth)
12861 %{
12862   match(CallLeafNoFP);
12863   effect(USE meth);
12864 
12865   ins_cost(300);
12866   format %{ "call_leaf_nofp,runtime " %}
12867   opcode(0xE8); /* E8 cd */
12868   ins_encode(Java_To_Runtime(meth));
12869   ins_pipe(pipe_slow);
12870   ins_pc_relative(1);
12871 %}
12872 
12873 // Return Instruction
12874 // Remove the return address & jump to it.
12875 // Notice: We always emit a nop after a ret to make sure there is room
12876 // for safepoint patching
12877 instruct Ret()
12878 %{
12879   match(Return);
12880 
12881   format %{ "ret" %}
12882   opcode(0xC3);
12883   ins_encode(OpcP);
12884   ins_pipe(pipe_jmp);
12885 %}
12886 
12887 // Tail Call; Jump from runtime stub to Java code.
12888 // Also known as an 'interprocedural jump'.
12889 // Target of jump will eventually return to caller.
12890 // TailJump below removes the return address.
12891 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12892 %{
12893   match(TailCall jump_target method_oop);
12894 
12895   ins_cost(300);
12896   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12897   opcode(0xFF, 0x4); /* Opcode FF /4 */
12898   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12899   ins_pipe(pipe_jmp);
12900 %}
12901 
12902 // Tail Jump; remove the return address; jump to target.
12903 // TailCall above leaves the return address around.
12904 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12905 %{
12906   match(TailJump jump_target ex_oop);
12907 
12908   ins_cost(300);
12909   format %{ "popq    rdx\t# pop return address\n\t"
12910             "jmp     $jump_target" %}
12911   opcode(0xFF, 0x4); /* Opcode FF /4 */
12912   ins_encode(Opcode(0x5a), // popq rdx
12913              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12914   ins_pipe(pipe_jmp);
12915 %}
12916 
12917 // Create exception oop: created by stack-crawling runtime code.
12918 // Created exception is now available to this handler, and is setup
12919 // just prior to jumping to this handler.  No code emitted.
12920 instruct CreateException(rax_RegP ex_oop)
12921 %{
12922   match(Set ex_oop (CreateEx));
12923 
12924   size(0);
12925   // use the following format syntax
12926   format %{ "# exception oop is in rax; no code emitted" %}
12927   ins_encode();
12928   ins_pipe(empty);
12929 %}
12930 
12931 // Rethrow exception:
12932 // The exception oop will come in the first argument position.
12933 // Then JUMP (not call) to the rethrow stub code.
12934 instruct RethrowException()
12935 %{
12936   match(Rethrow);
12937 
12938   // use the following format syntax
12939   format %{ "jmp     rethrow_stub" %}
12940   ins_encode(enc_rethrow);
12941   ins_pipe(pipe_jmp);
12942 %}
12943 
12944 
12945 //----------PEEPHOLE RULES-----------------------------------------------------
12946 // These must follow all instruction definitions as they use the names
12947 // defined in the instructions definitions.
12948 //
12949 // peepmatch ( root_instr_name [preceding_instruction]* );
12950 //
12951 // peepconstraint %{
12952 // (instruction_number.operand_name relational_op instruction_number.operand_name
12953 //  [, ...] );
12954 // // instruction numbers are zero-based using left to right order in peepmatch
12955 //
12956 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12957 // // provide an instruction_number.operand_name for each operand that appears
12958 // // in the replacement instruction's match rule
12959 //
12960 // ---------VM FLAGS---------------------------------------------------------
12961 //
12962 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12963 //
12964 // Each peephole rule is given an identifying number starting with zero and
12965 // increasing by one in the order seen by the parser.  An individual peephole
12966 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12967 // on the command-line.
12968 //
12969 // ---------CURRENT LIMITATIONS----------------------------------------------
12970 //
12971 // Only match adjacent instructions in same basic block
12972 // Only equality constraints
12973 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12974 // Only one replacement instruction
12975 //
12976 // ---------EXAMPLE----------------------------------------------------------
12977 //
12978 // // pertinent parts of existing instructions in architecture description
12979 // instruct movI(rRegI dst, rRegI src)
12980 // %{
12981 //   match(Set dst (CopyI src));
12982 // %}
12983 //
12984 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12985 // %{
12986 //   match(Set dst (AddI dst src));
12987 //   effect(KILL cr);
12988 // %}
12989 //
12990 // // Change (inc mov) to lea
12991 // peephole %{
12992 //   // increment preceeded by register-register move
12993 //   peepmatch ( incI_rReg movI );
12994 //   // require that the destination register of the increment
12995 //   // match the destination register of the move
12996 //   peepconstraint ( 0.dst == 1.dst );
12997 //   // construct a replacement instruction that sets
12998 //   // the destination to ( move's source register + one )
12999 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13000 // %}
13001 //
13002 
13003 // Implementation no longer uses movX instructions since
13004 // machine-independent system no longer uses CopyX nodes.
13005 //
13006 // peephole
13007 // %{
13008 //   peepmatch (incI_rReg movI);
13009 //   peepconstraint (0.dst == 1.dst);
13010 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13011 // %}
13012 
13013 // peephole
13014 // %{
13015 //   peepmatch (decI_rReg movI);
13016 //   peepconstraint (0.dst == 1.dst);
13017 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13018 // %}
13019 
13020 // peephole
13021 // %{
13022 //   peepmatch (addI_rReg_imm movI);
13023 //   peepconstraint (0.dst == 1.dst);
13024 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13025 // %}
13026 
13027 // peephole
13028 // %{
13029 //   peepmatch (incL_rReg movL);
13030 //   peepconstraint (0.dst == 1.dst);
13031 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13032 // %}
13033 
13034 // peephole
13035 // %{
13036 //   peepmatch (decL_rReg movL);
13037 //   peepconstraint (0.dst == 1.dst);
13038 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13039 // %}
13040 
13041 // peephole
13042 // %{
13043 //   peepmatch (addL_rReg_imm movL);
13044 //   peepconstraint (0.dst == 1.dst);
13045 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13046 // %}
13047 
13048 // peephole
13049 // %{
13050 //   peepmatch (addP_rReg_imm movP);
13051 //   peepconstraint (0.dst == 1.dst);
13052 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
13053 // %}
13054 
13055 // // Change load of spilled value to only a spill
13056 // instruct storeI(memory mem, rRegI src)
13057 // %{
13058 //   match(Set mem (StoreI mem src));
13059 // %}
13060 //
13061 // instruct loadI(rRegI dst, memory mem)
13062 // %{
13063 //   match(Set dst (LoadI mem));
13064 // %}
13065 //
13066 
13067 peephole
13068 %{
13069   peepmatch (loadI storeI);
13070   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13071   peepreplace (storeI(1.mem 1.mem 1.src));
13072 %}
13073 
13074 peephole
13075 %{
13076   peepmatch (loadL storeL);
13077   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13078   peepreplace (storeL(1.mem 1.mem 1.src));
13079 %}
13080 
13081 //----------SMARTSPILL RULES---------------------------------------------------
13082 // These must follow all instruction definitions as they use the names
13083 // defined in the instructions definitions.