1 //
   2 // Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 // !!!!! Special hack to get all types of calls to specify the byte offset
 555 //       from the start of the call to the point where the return address
 556 //       will point.
 557 int MachCallStaticJavaNode::ret_addr_offset()
 558 {
 559   return 5; // 5 bytes from start of call to where return address points
 560 }
 561 
 562 int MachCallDynamicJavaNode::ret_addr_offset()
 563 {
 564   return 15; // 15 bytes from start of call to where return address points
 565 }
 566 
 567 // In os_cpu .ad file
 568 // int MachCallRuntimeNode::ret_addr_offset()
 569 
 570 // Indicate if the safepoint node needs the polling page as an input.
 571 // Since amd64 does not have absolute addressing but RIP-relative
 572 // addressing and the polling page is within 2G, it doesn't.
 573 bool SafePointNode::needs_polling_address_input()
 574 {
 575   return false;
 576 }
 577 
 578 //
 579 // Compute padding required for nodes which need alignment
 580 //
 581 
 582 // The address of the call instruction needs to be 4-byte aligned to
 583 // ensure that it does not span a cache line so that it can be patched.
 584 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 585 {
 586   current_offset += 1; // skip call opcode byte
 587   return round_to(current_offset, alignment_required()) - current_offset;
 588 }
 589 
 590 // The address of the call instruction needs to be 4-byte aligned to
 591 // ensure that it does not span a cache line so that it can be patched.
 592 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 593 {
 594   current_offset += 11; // skip movq instruction + call opcode byte
 595   return round_to(current_offset, alignment_required()) - current_offset;
 596 }
 597 
 598 #ifndef PRODUCT
 599 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 600 {
 601   st->print("INT3");
 602 }
 603 #endif
 604 
 605 // EMIT_RM()
 606 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 607 {
 608   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 609   *(cbuf.code_end()) = c;
 610   cbuf.set_code_end(cbuf.code_end() + 1);
 611 }
 612 
 613 // EMIT_CC()
 614 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 615 {
 616   unsigned char c = (unsigned char) (f1 | f2);
 617   *(cbuf.code_end()) = c;
 618   cbuf.set_code_end(cbuf.code_end() + 1);
 619 }
 620 
 621 // EMIT_OPCODE()
 622 void emit_opcode(CodeBuffer &cbuf, int code)
 623 {
 624   *(cbuf.code_end()) = (unsigned char) code;
 625   cbuf.set_code_end(cbuf.code_end() + 1);
 626 }
 627 
 628 // EMIT_OPCODE() w/ relocation information
 629 void emit_opcode(CodeBuffer &cbuf,
 630                  int code, relocInfo::relocType reloc, int offset, int format)
 631 {
 632   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 633   emit_opcode(cbuf, code);
 634 }
 635 
 636 // EMIT_D8()
 637 void emit_d8(CodeBuffer &cbuf, int d8)
 638 {
 639   *(cbuf.code_end()) = (unsigned char) d8;
 640   cbuf.set_code_end(cbuf.code_end() + 1);
 641 }
 642 
 643 // EMIT_D16()
 644 void emit_d16(CodeBuffer &cbuf, int d16)
 645 {
 646   *((short *)(cbuf.code_end())) = d16;
 647   cbuf.set_code_end(cbuf.code_end() + 2);
 648 }
 649 
 650 // EMIT_D32()
 651 void emit_d32(CodeBuffer &cbuf, int d32)
 652 {
 653   *((int *)(cbuf.code_end())) = d32;
 654   cbuf.set_code_end(cbuf.code_end() + 4);
 655 }
 656 
 657 // EMIT_D64()
 658 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 659 {
 660   *((int64_t*) (cbuf.code_end())) = d64;
 661   cbuf.set_code_end(cbuf.code_end() + 8);
 662 }
 663 
 664 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 665 void emit_d32_reloc(CodeBuffer& cbuf,
 666                     int d32,
 667                     relocInfo::relocType reloc,
 668                     int format)
 669 {
 670   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 671   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 672 
 673   *((int*) (cbuf.code_end())) = d32;
 674   cbuf.set_code_end(cbuf.code_end() + 4);
 675 }
 676 
 677 // emit 32 bit value and construct relocation entry from RelocationHolder
 678 void emit_d32_reloc(CodeBuffer& cbuf,
 679                     int d32,
 680                     RelocationHolder const& rspec,
 681                     int format)
 682 {
 683 #ifdef ASSERT
 684   if (rspec.reloc()->type() == relocInfo::oop_type &&
 685       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 686     assert(oop((intptr_t)d32)->is_oop() && oop((intptr_t)d32)->is_perm(), "cannot embed non-perm oops in code");
 687   }
 688 #endif
 689   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 690 
 691   *((int* )(cbuf.code_end())) = d32;
 692   cbuf.set_code_end(cbuf.code_end() + 4);
 693 }
 694 
 695 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 696   address next_ip = cbuf.code_end() + 4;
 697   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 698                  external_word_Relocation::spec(addr),
 699                  RELOC_DISP32);
 700 }
 701 
 702 
 703 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 704 void emit_d64_reloc(CodeBuffer& cbuf,
 705                     int64_t d64,
 706                     relocInfo::relocType reloc,
 707                     int format)
 708 {
 709   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 710 
 711   *((int64_t*) (cbuf.code_end())) = d64;
 712   cbuf.set_code_end(cbuf.code_end() + 8);
 713 }
 714 
 715 // emit 64 bit value and construct relocation entry from RelocationHolder
 716 void emit_d64_reloc(CodeBuffer& cbuf,
 717                     int64_t d64,
 718                     RelocationHolder const& rspec,
 719                     int format)
 720 {
 721 #ifdef ASSERT
 722   if (rspec.reloc()->type() == relocInfo::oop_type &&
 723       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 724     assert(oop(d64)->is_oop() && oop(d64)->is_perm(),
 725            "cannot embed non-perm oops in code");
 726   }
 727 #endif
 728   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 729 
 730   *((int64_t*) (cbuf.code_end())) = d64;
 731   cbuf.set_code_end(cbuf.code_end() + 8);
 732 }
 733 
 734 // Access stack slot for load or store
 735 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 736 {
 737   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 738   if (-0x80 <= disp && disp < 0x80) {
 739     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 740     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 741     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 742   } else {
 743     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 744     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 745     emit_d32(cbuf, disp);     // Displacement // R/M byte
 746   }
 747 }
 748 
 749    // rRegI ereg, memory mem) %{    // emit_reg_mem
 750 void encode_RegMem(CodeBuffer &cbuf,
 751                    int reg,
 752                    int base, int index, int scale, int disp, bool disp_is_oop)
 753 {
 754   assert(!disp_is_oop, "cannot have disp");
 755   int regenc = reg & 7;
 756   int baseenc = base & 7;
 757   int indexenc = index & 7;
 758 
 759   // There is no index & no scale, use form without SIB byte
 760   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 761     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 762     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 763       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 764     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 765       // If 8-bit displacement, mode 0x1
 766       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 767       emit_d8(cbuf, disp);
 768     } else {
 769       // If 32-bit displacement
 770       if (base == -1) { // Special flag for absolute address
 771         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 772         if (disp_is_oop) {
 773           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 774         } else {
 775           emit_d32(cbuf, disp);
 776         }
 777       } else {
 778         // Normal base + offset
 779         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 780         if (disp_is_oop) {
 781           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 782         } else {
 783           emit_d32(cbuf, disp);
 784         }
 785       }
 786     }
 787   } else {
 788     // Else, encode with the SIB byte
 789     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 790     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 791       // If no displacement
 792       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 793       emit_rm(cbuf, scale, indexenc, baseenc);
 794     } else {
 795       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 796         // If 8-bit displacement, mode 0x1
 797         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 798         emit_rm(cbuf, scale, indexenc, baseenc);
 799         emit_d8(cbuf, disp);
 800       } else {
 801         // If 32-bit displacement
 802         if (base == 0x04 ) {
 803           emit_rm(cbuf, 0x2, regenc, 0x4);
 804           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 805         } else {
 806           emit_rm(cbuf, 0x2, regenc, 0x4);
 807           emit_rm(cbuf, scale, indexenc, baseenc); // *
 808         }
 809         if (disp_is_oop) {
 810           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 811         } else {
 812           emit_d32(cbuf, disp);
 813         }
 814       }
 815     }
 816   }
 817 }
 818 
 819 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 820 {
 821   if (dstenc != srcenc) {
 822     if (dstenc < 8) {
 823       if (srcenc >= 8) {
 824         emit_opcode(cbuf, Assembler::REX_B);
 825         srcenc -= 8;
 826       }
 827     } else {
 828       if (srcenc < 8) {
 829         emit_opcode(cbuf, Assembler::REX_R);
 830       } else {
 831         emit_opcode(cbuf, Assembler::REX_RB);
 832         srcenc -= 8;
 833       }
 834       dstenc -= 8;
 835     }
 836 
 837     emit_opcode(cbuf, 0x8B);
 838     emit_rm(cbuf, 0x3, dstenc, srcenc);
 839   }
 840 }
 841 
 842 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 843   if( dst_encoding == src_encoding ) {
 844     // reg-reg copy, use an empty encoding
 845   } else {
 846     MacroAssembler _masm(&cbuf);
 847 
 848     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 849   }
 850 }
 851 
 852 
 853 //=============================================================================
 854 #ifndef PRODUCT
 855 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 856 {
 857   Compile* C = ra_->C;
 858 
 859   int framesize = C->frame_slots() << LogBytesPerInt;
 860   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 861   // Remove wordSize for return adr already pushed
 862   // and another for the RBP we are going to save
 863   framesize -= 2*wordSize;
 864   bool need_nop = true;
 865 
 866   // Calls to C2R adapters often do not accept exceptional returns.
 867   // We require that their callers must bang for them.  But be
 868   // careful, because some VM calls (such as call site linkage) can
 869   // use several kilobytes of stack.  But the stack safety zone should
 870   // account for that.  See bugs 4446381, 4468289, 4497237.
 871   if (C->need_stack_bang(framesize)) {
 872     st->print_cr("# stack bang"); st->print("\t");
 873     need_nop = false;
 874   }
 875   st->print_cr("pushq   rbp"); st->print("\t");
 876 
 877   if (VerifyStackAtCalls) {
 878     // Majik cookie to verify stack depth
 879     st->print_cr("pushq   0xffffffffbadb100d"
 880                   "\t# Majik cookie for stack depth check");
 881     st->print("\t");
 882     framesize -= wordSize; // Remove 2 for cookie
 883     need_nop = false;
 884   }
 885 
 886   if (framesize) {
 887     st->print("subq    rsp, #%d\t# Create frame", framesize);
 888     if (framesize < 0x80 && need_nop) {
 889       st->print("\n\tnop\t# nop for patch_verified_entry");
 890     }
 891   }
 892 }
 893 #endif
 894 
 895 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 896 {
 897   Compile* C = ra_->C;
 898 
 899   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 900   // NativeJump::patch_verified_entry will be able to patch out the entry
 901   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 902   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 903   // 6 bytes. So if we don't do the fldcw or the push then we must
 904   // use the 6 byte frame allocation even if we have no frame. :-(
 905   // If method sets FPU control word do it now
 906 
 907   int framesize = C->frame_slots() << LogBytesPerInt;
 908   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 909   // Remove wordSize for return adr already pushed
 910   // and another for the RBP we are going to save
 911   framesize -= 2*wordSize;
 912   bool need_nop = true;
 913 
 914   // Calls to C2R adapters often do not accept exceptional returns.
 915   // We require that their callers must bang for them.  But be
 916   // careful, because some VM calls (such as call site linkage) can
 917   // use several kilobytes of stack.  But the stack safety zone should
 918   // account for that.  See bugs 4446381, 4468289, 4497237.
 919   if (C->need_stack_bang(framesize)) {
 920     MacroAssembler masm(&cbuf);
 921     masm.generate_stack_overflow_check(framesize);
 922     need_nop = false;
 923   }
 924 
 925   // We always push rbp so that on return to interpreter rbp will be
 926   // restored correctly and we can correct the stack.
 927   emit_opcode(cbuf, 0x50 | RBP_enc);
 928 
 929   if (VerifyStackAtCalls) {
 930     // Majik cookie to verify stack depth
 931     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 932     emit_d32(cbuf, 0xbadb100d);
 933     framesize -= wordSize; // Remove 2 for cookie
 934     need_nop = false;
 935   }
 936 
 937   if (framesize) {
 938     emit_opcode(cbuf, Assembler::REX_W);
 939     if (framesize < 0x80) {
 940       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 941       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 942       emit_d8(cbuf, framesize);
 943       if (need_nop) {
 944         emit_opcode(cbuf, 0x90); // nop
 945       }
 946     } else {
 947       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 948       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 949       emit_d32(cbuf, framesize);
 950     }
 951   }
 952 
 953   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 954 
 955 #ifdef ASSERT
 956   if (VerifyStackAtCalls) {
 957     Label L;
 958     MacroAssembler masm(&cbuf);
 959     masm.push(rax);
 960     masm.mov(rax, rsp);
 961     masm.andptr(rax, StackAlignmentInBytes-1);
 962     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 963     masm.pop(rax);
 964     masm.jcc(Assembler::equal, L);
 965     masm.stop("Stack is not properly aligned!");
 966     masm.bind(L);
 967   }
 968 #endif
 969 }
 970 
 971 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 972 {
 973   return MachNode::size(ra_); // too many variables; just compute it
 974                               // the hard way
 975 }
 976 
 977 int MachPrologNode::reloc() const
 978 {
 979   return 0; // a large enough number
 980 }
 981 
 982 //=============================================================================
 983 #ifndef PRODUCT
 984 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 985 {
 986   Compile* C = ra_->C;
 987   int framesize = C->frame_slots() << LogBytesPerInt;
 988   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 989   // Remove word for return adr already pushed
 990   // and RBP
 991   framesize -= 2*wordSize;
 992 
 993   if (framesize) {
 994     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
 995     st->print("\t");
 996   }
 997 
 998   st->print_cr("popq\trbp");
 999   if (do_polling() && C->is_method_compilation()) {
1000     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1001                   "# Safepoint: poll for GC");
1002     st->print("\t");
1003   }
1004 }
1005 #endif
1006 
1007 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1008 {
1009   Compile* C = ra_->C;
1010   int framesize = C->frame_slots() << LogBytesPerInt;
1011   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1012   // Remove word for return adr already pushed
1013   // and RBP
1014   framesize -= 2*wordSize;
1015 
1016   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1017 
1018   if (framesize) {
1019     emit_opcode(cbuf, Assembler::REX_W);
1020     if (framesize < 0x80) {
1021       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1022       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1023       emit_d8(cbuf, framesize);
1024     } else {
1025       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1026       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1027       emit_d32(cbuf, framesize);
1028     }
1029   }
1030 
1031   // popq rbp
1032   emit_opcode(cbuf, 0x58 | RBP_enc);
1033 
1034   if (do_polling() && C->is_method_compilation()) {
1035     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1036     // XXX reg_mem doesn't support RIP-relative addressing yet
1037     cbuf.set_inst_mark();
1038     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1039     emit_opcode(cbuf, 0x85); // testl
1040     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1041     // cbuf.inst_mark() is beginning of instruction
1042     emit_d32_reloc(cbuf, os::get_polling_page());
1043 //                    relocInfo::poll_return_type,
1044   }
1045 }
1046 
1047 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1048 {
1049   Compile* C = ra_->C;
1050   int framesize = C->frame_slots() << LogBytesPerInt;
1051   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1052   // Remove word for return adr already pushed
1053   // and RBP
1054   framesize -= 2*wordSize;
1055 
1056   uint size = 0;
1057 
1058   if (do_polling() && C->is_method_compilation()) {
1059     size += 6;
1060   }
1061 
1062   // count popq rbp
1063   size++;
1064 
1065   if (framesize) {
1066     if (framesize < 0x80) {
1067       size += 4;
1068     } else if (framesize) {
1069       size += 7;
1070     }
1071   }
1072 
1073   return size;
1074 }
1075 
1076 int MachEpilogNode::reloc() const
1077 {
1078   return 2; // a large enough number
1079 }
1080 
1081 const Pipeline* MachEpilogNode::pipeline() const
1082 {
1083   return MachNode::pipeline_class();
1084 }
1085 
1086 int MachEpilogNode::safepoint_offset() const
1087 {
1088   return 0;
1089 }
1090 
1091 //=============================================================================
1092 
1093 enum RC {
1094   rc_bad,
1095   rc_int,
1096   rc_float,
1097   rc_stack
1098 };
1099 
1100 static enum RC rc_class(OptoReg::Name reg)
1101 {
1102   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1103 
1104   if (OptoReg::is_stack(reg)) return rc_stack;
1105 
1106   VMReg r = OptoReg::as_VMReg(reg);
1107 
1108   if (r->is_Register()) return rc_int;
1109 
1110   assert(r->is_XMMRegister(), "must be");
1111   return rc_float;
1112 }
1113 
1114 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1115                                        PhaseRegAlloc* ra_,
1116                                        bool do_size,
1117                                        outputStream* st) const
1118 {
1119 
1120   // Get registers to move
1121   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1122   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1123   OptoReg::Name dst_second = ra_->get_reg_second(this);
1124   OptoReg::Name dst_first = ra_->get_reg_first(this);
1125 
1126   enum RC src_second_rc = rc_class(src_second);
1127   enum RC src_first_rc = rc_class(src_first);
1128   enum RC dst_second_rc = rc_class(dst_second);
1129   enum RC dst_first_rc = rc_class(dst_first);
1130 
1131   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1132          "must move at least 1 register" );
1133 
1134   if (src_first == dst_first && src_second == dst_second) {
1135     // Self copy, no move
1136     return 0;
1137   } else if (src_first_rc == rc_stack) {
1138     // mem ->
1139     if (dst_first_rc == rc_stack) {
1140       // mem -> mem
1141       assert(src_second != dst_first, "overlap");
1142       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1143           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1144         // 64-bit
1145         int src_offset = ra_->reg2offset(src_first);
1146         int dst_offset = ra_->reg2offset(dst_first);
1147         if (cbuf) {
1148           emit_opcode(*cbuf, 0xFF);
1149           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1150 
1151           emit_opcode(*cbuf, 0x8F);
1152           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1153 
1154 #ifndef PRODUCT
1155         } else if (!do_size) {
1156           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1157                      "popq    [rsp + #%d]",
1158                      src_offset,
1159                      dst_offset);
1160 #endif
1161         }
1162         return
1163           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1164           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1165       } else {
1166         // 32-bit
1167         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1168         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1169         // No pushl/popl, so:
1170         int src_offset = ra_->reg2offset(src_first);
1171         int dst_offset = ra_->reg2offset(dst_first);
1172         if (cbuf) {
1173           emit_opcode(*cbuf, Assembler::REX_W);
1174           emit_opcode(*cbuf, 0x89);
1175           emit_opcode(*cbuf, 0x44);
1176           emit_opcode(*cbuf, 0x24);
1177           emit_opcode(*cbuf, 0xF8);
1178 
1179           emit_opcode(*cbuf, 0x8B);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, src_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, 0x89);
1186           encode_RegMem(*cbuf,
1187                         RAX_enc,
1188                         RSP_enc, 0x4, 0, dst_offset,
1189                         false);
1190 
1191           emit_opcode(*cbuf, Assembler::REX_W);
1192           emit_opcode(*cbuf, 0x8B);
1193           emit_opcode(*cbuf, 0x44);
1194           emit_opcode(*cbuf, 0x24);
1195           emit_opcode(*cbuf, 0xF8);
1196 
1197 #ifndef PRODUCT
1198         } else if (!do_size) {
1199           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1200                      "movl    rax, [rsp + #%d]\n\t"
1201                      "movl    [rsp + #%d], rax\n\t"
1202                      "movq    rax, [rsp - #8]",
1203                      src_offset,
1204                      dst_offset);
1205 #endif
1206         }
1207         return
1208           5 + // movq
1209           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1210           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1211           5; // movq
1212       }
1213     } else if (dst_first_rc == rc_int) {
1214       // mem -> gpr
1215       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1216           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1217         // 64-bit
1218         int offset = ra_->reg2offset(src_first);
1219         if (cbuf) {
1220           if (Matcher::_regEncode[dst_first] < 8) {
1221             emit_opcode(*cbuf, Assembler::REX_W);
1222           } else {
1223             emit_opcode(*cbuf, Assembler::REX_WR);
1224           }
1225           emit_opcode(*cbuf, 0x8B);
1226           encode_RegMem(*cbuf,
1227                         Matcher::_regEncode[dst_first],
1228                         RSP_enc, 0x4, 0, offset,
1229                         false);
1230 #ifndef PRODUCT
1231         } else if (!do_size) {
1232           st->print("movq    %s, [rsp + #%d]\t# spill",
1233                      Matcher::regName[dst_first],
1234                      offset);
1235 #endif
1236         }
1237         return
1238           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1239       } else {
1240         // 32-bit
1241         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1242         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1243         int offset = ra_->reg2offset(src_first);
1244         if (cbuf) {
1245           if (Matcher::_regEncode[dst_first] >= 8) {
1246             emit_opcode(*cbuf, Assembler::REX_R);
1247           }
1248           emit_opcode(*cbuf, 0x8B);
1249           encode_RegMem(*cbuf,
1250                         Matcher::_regEncode[dst_first],
1251                         RSP_enc, 0x4, 0, offset,
1252                         false);
1253 #ifndef PRODUCT
1254         } else if (!do_size) {
1255           st->print("movl    %s, [rsp + #%d]\t# spill",
1256                      Matcher::regName[dst_first],
1257                      offset);
1258 #endif
1259         }
1260         return
1261           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1262           ((Matcher::_regEncode[dst_first] < 8)
1263            ? 3
1264            : 4); // REX
1265       }
1266     } else if (dst_first_rc == rc_float) {
1267       // mem-> xmm
1268       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1269           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1270         // 64-bit
1271         int offset = ra_->reg2offset(src_first);
1272         if (cbuf) {
1273           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1274           if (Matcher::_regEncode[dst_first] >= 8) {
1275             emit_opcode(*cbuf, Assembler::REX_R);
1276           }
1277           emit_opcode(*cbuf, 0x0F);
1278           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1279           encode_RegMem(*cbuf,
1280                         Matcher::_regEncode[dst_first],
1281                         RSP_enc, 0x4, 0, offset,
1282                         false);
1283 #ifndef PRODUCT
1284         } else if (!do_size) {
1285           st->print("%s  %s, [rsp + #%d]\t# spill",
1286                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1287                      Matcher::regName[dst_first],
1288                      offset);
1289 #endif
1290         }
1291         return
1292           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1293           ((Matcher::_regEncode[dst_first] < 8)
1294            ? 5
1295            : 6); // REX
1296       } else {
1297         // 32-bit
1298         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1299         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1300         int offset = ra_->reg2offset(src_first);
1301         if (cbuf) {
1302           emit_opcode(*cbuf, 0xF3);
1303           if (Matcher::_regEncode[dst_first] >= 8) {
1304             emit_opcode(*cbuf, Assembler::REX_R);
1305           }
1306           emit_opcode(*cbuf, 0x0F);
1307           emit_opcode(*cbuf, 0x10);
1308           encode_RegMem(*cbuf,
1309                         Matcher::_regEncode[dst_first],
1310                         RSP_enc, 0x4, 0, offset,
1311                         false);
1312 #ifndef PRODUCT
1313         } else if (!do_size) {
1314           st->print("movss   %s, [rsp + #%d]\t# spill",
1315                      Matcher::regName[dst_first],
1316                      offset);
1317 #endif
1318         }
1319         return
1320           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1321           ((Matcher::_regEncode[dst_first] < 8)
1322            ? 5
1323            : 6); // REX
1324       }
1325     }
1326   } else if (src_first_rc == rc_int) {
1327     // gpr ->
1328     if (dst_first_rc == rc_stack) {
1329       // gpr -> mem
1330       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1331           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1332         // 64-bit
1333         int offset = ra_->reg2offset(dst_first);
1334         if (cbuf) {
1335           if (Matcher::_regEncode[src_first] < 8) {
1336             emit_opcode(*cbuf, Assembler::REX_W);
1337           } else {
1338             emit_opcode(*cbuf, Assembler::REX_WR);
1339           }
1340           emit_opcode(*cbuf, 0x89);
1341           encode_RegMem(*cbuf,
1342                         Matcher::_regEncode[src_first],
1343                         RSP_enc, 0x4, 0, offset,
1344                         false);
1345 #ifndef PRODUCT
1346         } else if (!do_size) {
1347           st->print("movq    [rsp + #%d], %s\t# spill",
1348                      offset,
1349                      Matcher::regName[src_first]);
1350 #endif
1351         }
1352         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1353       } else {
1354         // 32-bit
1355         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1356         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1357         int offset = ra_->reg2offset(dst_first);
1358         if (cbuf) {
1359           if (Matcher::_regEncode[src_first] >= 8) {
1360             emit_opcode(*cbuf, Assembler::REX_R);
1361           }
1362           emit_opcode(*cbuf, 0x89);
1363           encode_RegMem(*cbuf,
1364                         Matcher::_regEncode[src_first],
1365                         RSP_enc, 0x4, 0, offset,
1366                         false);
1367 #ifndef PRODUCT
1368         } else if (!do_size) {
1369           st->print("movl    [rsp + #%d], %s\t# spill",
1370                      offset,
1371                      Matcher::regName[src_first]);
1372 #endif
1373         }
1374         return
1375           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1376           ((Matcher::_regEncode[src_first] < 8)
1377            ? 3
1378            : 4); // REX
1379       }
1380     } else if (dst_first_rc == rc_int) {
1381       // gpr -> gpr
1382       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1383           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1384         // 64-bit
1385         if (cbuf) {
1386           if (Matcher::_regEncode[dst_first] < 8) {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_W);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WB);
1391             }
1392           } else {
1393             if (Matcher::_regEncode[src_first] < 8) {
1394               emit_opcode(*cbuf, Assembler::REX_WR);
1395             } else {
1396               emit_opcode(*cbuf, Assembler::REX_WRB);
1397             }
1398           }
1399           emit_opcode(*cbuf, 0x8B);
1400           emit_rm(*cbuf, 0x3,
1401                   Matcher::_regEncode[dst_first] & 7,
1402                   Matcher::_regEncode[src_first] & 7);
1403 #ifndef PRODUCT
1404         } else if (!do_size) {
1405           st->print("movq    %s, %s\t# spill",
1406                      Matcher::regName[dst_first],
1407                      Matcher::regName[src_first]);
1408 #endif
1409         }
1410         return 3; // REX
1411       } else {
1412         // 32-bit
1413         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1414         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1415         if (cbuf) {
1416           if (Matcher::_regEncode[dst_first] < 8) {
1417             if (Matcher::_regEncode[src_first] >= 8) {
1418               emit_opcode(*cbuf, Assembler::REX_B);
1419             }
1420           } else {
1421             if (Matcher::_regEncode[src_first] < 8) {
1422               emit_opcode(*cbuf, Assembler::REX_R);
1423             } else {
1424               emit_opcode(*cbuf, Assembler::REX_RB);
1425             }
1426           }
1427           emit_opcode(*cbuf, 0x8B);
1428           emit_rm(*cbuf, 0x3,
1429                   Matcher::_regEncode[dst_first] & 7,
1430                   Matcher::_regEncode[src_first] & 7);
1431 #ifndef PRODUCT
1432         } else if (!do_size) {
1433           st->print("movl    %s, %s\t# spill",
1434                      Matcher::regName[dst_first],
1435                      Matcher::regName[src_first]);
1436 #endif
1437         }
1438         return
1439           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1440           ? 2
1441           : 3; // REX
1442       }
1443     } else if (dst_first_rc == rc_float) {
1444       // gpr -> xmm
1445       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1446           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1447         // 64-bit
1448         if (cbuf) {
1449           emit_opcode(*cbuf, 0x66);
1450           if (Matcher::_regEncode[dst_first] < 8) {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_W);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WB);
1455             }
1456           } else {
1457             if (Matcher::_regEncode[src_first] < 8) {
1458               emit_opcode(*cbuf, Assembler::REX_WR);
1459             } else {
1460               emit_opcode(*cbuf, Assembler::REX_WRB);
1461             }
1462           }
1463           emit_opcode(*cbuf, 0x0F);
1464           emit_opcode(*cbuf, 0x6E);
1465           emit_rm(*cbuf, 0x3,
1466                   Matcher::_regEncode[dst_first] & 7,
1467                   Matcher::_regEncode[src_first] & 7);
1468 #ifndef PRODUCT
1469         } else if (!do_size) {
1470           st->print("movdq   %s, %s\t# spill",
1471                      Matcher::regName[dst_first],
1472                      Matcher::regName[src_first]);
1473 #endif
1474         }
1475         return 5; // REX
1476       } else {
1477         // 32-bit
1478         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1479         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1480         if (cbuf) {
1481           emit_opcode(*cbuf, 0x66);
1482           if (Matcher::_regEncode[dst_first] < 8) {
1483             if (Matcher::_regEncode[src_first] >= 8) {
1484               emit_opcode(*cbuf, Assembler::REX_B);
1485             }
1486           } else {
1487             if (Matcher::_regEncode[src_first] < 8) {
1488               emit_opcode(*cbuf, Assembler::REX_R);
1489             } else {
1490               emit_opcode(*cbuf, Assembler::REX_RB);
1491             }
1492           }
1493           emit_opcode(*cbuf, 0x0F);
1494           emit_opcode(*cbuf, 0x6E);
1495           emit_rm(*cbuf, 0x3,
1496                   Matcher::_regEncode[dst_first] & 7,
1497                   Matcher::_regEncode[src_first] & 7);
1498 #ifndef PRODUCT
1499         } else if (!do_size) {
1500           st->print("movdl   %s, %s\t# spill",
1501                      Matcher::regName[dst_first],
1502                      Matcher::regName[src_first]);
1503 #endif
1504         }
1505         return
1506           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1507           ? 4
1508           : 5; // REX
1509       }
1510     }
1511   } else if (src_first_rc == rc_float) {
1512     // xmm ->
1513     if (dst_first_rc == rc_stack) {
1514       // xmm -> mem
1515       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1516           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1517         // 64-bit
1518         int offset = ra_->reg2offset(dst_first);
1519         if (cbuf) {
1520           emit_opcode(*cbuf, 0xF2);
1521           if (Matcher::_regEncode[src_first] >= 8) {
1522               emit_opcode(*cbuf, Assembler::REX_R);
1523           }
1524           emit_opcode(*cbuf, 0x0F);
1525           emit_opcode(*cbuf, 0x11);
1526           encode_RegMem(*cbuf,
1527                         Matcher::_regEncode[src_first],
1528                         RSP_enc, 0x4, 0, offset,
1529                         false);
1530 #ifndef PRODUCT
1531         } else if (!do_size) {
1532           st->print("movsd   [rsp + #%d], %s\t# spill",
1533                      offset,
1534                      Matcher::regName[src_first]);
1535 #endif
1536         }
1537         return
1538           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1539           ((Matcher::_regEncode[src_first] < 8)
1540            ? 5
1541            : 6); // REX
1542       } else {
1543         // 32-bit
1544         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1545         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1546         int offset = ra_->reg2offset(dst_first);
1547         if (cbuf) {
1548           emit_opcode(*cbuf, 0xF3);
1549           if (Matcher::_regEncode[src_first] >= 8) {
1550               emit_opcode(*cbuf, Assembler::REX_R);
1551           }
1552           emit_opcode(*cbuf, 0x0F);
1553           emit_opcode(*cbuf, 0x11);
1554           encode_RegMem(*cbuf,
1555                         Matcher::_regEncode[src_first],
1556                         RSP_enc, 0x4, 0, offset,
1557                         false);
1558 #ifndef PRODUCT
1559         } else if (!do_size) {
1560           st->print("movss   [rsp + #%d], %s\t# spill",
1561                      offset,
1562                      Matcher::regName[src_first]);
1563 #endif
1564         }
1565         return
1566           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1567           ((Matcher::_regEncode[src_first] < 8)
1568            ? 5
1569            : 6); // REX
1570       }
1571     } else if (dst_first_rc == rc_int) {
1572       // xmm -> gpr
1573       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1574           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1575         // 64-bit
1576         if (cbuf) {
1577           emit_opcode(*cbuf, 0x66);
1578           if (Matcher::_regEncode[dst_first] < 8) {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_W);
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1583             }
1584           } else {
1585             if (Matcher::_regEncode[src_first] < 8) {
1586               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1587             } else {
1588               emit_opcode(*cbuf, Assembler::REX_WRB);
1589             }
1590           }
1591           emit_opcode(*cbuf, 0x0F);
1592           emit_opcode(*cbuf, 0x7E);
1593           emit_rm(*cbuf, 0x3,
1594                   Matcher::_regEncode[dst_first] & 7,
1595                   Matcher::_regEncode[src_first] & 7);
1596 #ifndef PRODUCT
1597         } else if (!do_size) {
1598           st->print("movdq   %s, %s\t# spill",
1599                      Matcher::regName[dst_first],
1600                      Matcher::regName[src_first]);
1601 #endif
1602         }
1603         return 5; // REX
1604       } else {
1605         // 32-bit
1606         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1607         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1608         if (cbuf) {
1609           emit_opcode(*cbuf, 0x66);
1610           if (Matcher::_regEncode[dst_first] < 8) {
1611             if (Matcher::_regEncode[src_first] >= 8) {
1612               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1613             }
1614           } else {
1615             if (Matcher::_regEncode[src_first] < 8) {
1616               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1617             } else {
1618               emit_opcode(*cbuf, Assembler::REX_RB);
1619             }
1620           }
1621           emit_opcode(*cbuf, 0x0F);
1622           emit_opcode(*cbuf, 0x7E);
1623           emit_rm(*cbuf, 0x3,
1624                   Matcher::_regEncode[dst_first] & 7,
1625                   Matcher::_regEncode[src_first] & 7);
1626 #ifndef PRODUCT
1627         } else if (!do_size) {
1628           st->print("movdl   %s, %s\t# spill",
1629                      Matcher::regName[dst_first],
1630                      Matcher::regName[src_first]);
1631 #endif
1632         }
1633         return
1634           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1635           ? 4
1636           : 5; // REX
1637       }
1638     } else if (dst_first_rc == rc_float) {
1639       // xmm -> xmm
1640       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1641           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1642         // 64-bit
1643         if (cbuf) {
1644           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1645           if (Matcher::_regEncode[dst_first] < 8) {
1646             if (Matcher::_regEncode[src_first] >= 8) {
1647               emit_opcode(*cbuf, Assembler::REX_B);
1648             }
1649           } else {
1650             if (Matcher::_regEncode[src_first] < 8) {
1651               emit_opcode(*cbuf, Assembler::REX_R);
1652             } else {
1653               emit_opcode(*cbuf, Assembler::REX_RB);
1654             }
1655           }
1656           emit_opcode(*cbuf, 0x0F);
1657           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1658           emit_rm(*cbuf, 0x3,
1659                   Matcher::_regEncode[dst_first] & 7,
1660                   Matcher::_regEncode[src_first] & 7);
1661 #ifndef PRODUCT
1662         } else if (!do_size) {
1663           st->print("%s  %s, %s\t# spill",
1664                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1665                      Matcher::regName[dst_first],
1666                      Matcher::regName[src_first]);
1667 #endif
1668         }
1669         return
1670           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1671           ? 4
1672           : 5; // REX
1673       } else {
1674         // 32-bit
1675         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1676         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1677         if (cbuf) {
1678           if (!UseXmmRegToRegMoveAll)
1679             emit_opcode(*cbuf, 0xF3);
1680           if (Matcher::_regEncode[dst_first] < 8) {
1681             if (Matcher::_regEncode[src_first] >= 8) {
1682               emit_opcode(*cbuf, Assembler::REX_B);
1683             }
1684           } else {
1685             if (Matcher::_regEncode[src_first] < 8) {
1686               emit_opcode(*cbuf, Assembler::REX_R);
1687             } else {
1688               emit_opcode(*cbuf, Assembler::REX_RB);
1689             }
1690           }
1691           emit_opcode(*cbuf, 0x0F);
1692           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1693           emit_rm(*cbuf, 0x3,
1694                   Matcher::_regEncode[dst_first] & 7,
1695                   Matcher::_regEncode[src_first] & 7);
1696 #ifndef PRODUCT
1697         } else if (!do_size) {
1698           st->print("%s  %s, %s\t# spill",
1699                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1700                      Matcher::regName[dst_first],
1701                      Matcher::regName[src_first]);
1702 #endif
1703         }
1704         return
1705           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1706           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1707           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1708       }
1709     }
1710   }
1711 
1712   assert(0," foo ");
1713   Unimplemented();
1714 
1715   return 0;
1716 }
1717 
1718 #ifndef PRODUCT
1719 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1720 {
1721   implementation(NULL, ra_, false, st);
1722 }
1723 #endif
1724 
1725 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1726 {
1727   implementation(&cbuf, ra_, false, NULL);
1728 }
1729 
1730 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1731 {
1732   return implementation(NULL, ra_, true, NULL);
1733 }
1734 
1735 //=============================================================================
1736 #ifndef PRODUCT
1737 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1738 {
1739   st->print("nop \t# %d bytes pad for loops and calls", _count);
1740 }
1741 #endif
1742 
1743 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1744 {
1745   MacroAssembler _masm(&cbuf);
1746   __ nop(_count);
1747 }
1748 
1749 uint MachNopNode::size(PhaseRegAlloc*) const
1750 {
1751   return _count;
1752 }
1753 
1754 
1755 //=============================================================================
1756 #ifndef PRODUCT
1757 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1758 {
1759   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1760   int reg = ra_->get_reg_first(this);
1761   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1762             Matcher::regName[reg], offset);
1763 }
1764 #endif
1765 
1766 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1767 {
1768   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1769   int reg = ra_->get_encode(this);
1770   if (offset >= 0x80) {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d32(cbuf, offset);
1776   } else {
1777     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1780     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781     emit_d8(cbuf, offset);
1782   }
1783 }
1784 
1785 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1786 {
1787   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1788   return (offset < 0x80) ? 5 : 8; // REX
1789 }
1790 
1791 //=============================================================================
1792 
1793 // emit call stub, compiled java to interpreter
1794 void emit_java_to_interp(CodeBuffer& cbuf)
1795 {
1796   // Stub is fixed up when the corresponding call is converted from
1797   // calling compiled code to calling interpreted code.
1798   // movq rbx, 0
1799   // jmp -5 # to self
1800 
1801   address mark = cbuf.inst_mark();  // get mark within main instrs section
1802 
1803   // Note that the code buffer's inst_mark is always relative to insts.
1804   // That's why we must use the macroassembler to generate a stub.
1805   MacroAssembler _masm(&cbuf);
1806 
1807   address base =
1808   __ start_a_stub(Compile::MAX_stubs_size);
1809   if (base == NULL)  return;  // CodeBuffer::expand failed
1810   // static stub relocation stores the instruction address of the call
1811   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1812   // static stub relocation also tags the methodOop in the code-stream.
1813   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1814   // This is recognized as unresolved by relocs/nativeinst/ic code
1815   __ jump(RuntimeAddress(__ pc()));
1816 
1817   // Update current stubs pointer and restore code_end.
1818   __ end_a_stub();
1819 }
1820 
1821 // size of call stub, compiled java to interpretor
1822 uint size_java_to_interp()
1823 {
1824   return 15;  // movq (1+1+8); jmp (1+4)
1825 }
1826 
1827 // relocation entries for call stub, compiled java to interpretor
1828 uint reloc_java_to_interp()
1829 {
1830   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1831 }
1832 
1833 //=============================================================================
1834 #ifndef PRODUCT
1835 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1836 {
1837   if (UseCompressedOops) {
1838     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1839     if (Universe::narrow_oop_shift() != 0) {
1840       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1841     }
1842     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1843   } else {
1844     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1845                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1846   }
1847   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1848   st->print_cr("\tnop");
1849   if (!OptoBreakpoint) {
1850     st->print_cr("\tnop");
1851   }
1852 }
1853 #endif
1854 
1855 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1856 {
1857   MacroAssembler masm(&cbuf);
1858 #ifdef ASSERT
1859   uint code_size = cbuf.code_size();
1860 #endif
1861   if (UseCompressedOops) {
1862     masm.load_klass(rscratch1, j_rarg0);
1863     masm.cmpptr(rax, rscratch1);
1864   } else {
1865     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1866   }
1867 
1868   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1869 
1870   /* WARNING these NOPs are critical so that verified entry point is properly
1871      aligned for patching by NativeJump::patch_verified_entry() */
1872   int nops_cnt = 1;
1873   if (!OptoBreakpoint) {
1874     // Leave space for int3
1875      nops_cnt += 1;
1876   }
1877   if (UseCompressedOops) {
1878     // ??? divisible by 4 is aligned?
1879     nops_cnt += 1;
1880   }
1881   masm.nop(nops_cnt);
1882 
1883   assert(cbuf.code_size() - code_size == size(ra_),
1884          "checking code size of inline cache node");
1885 }
1886 
1887 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1888 {
1889   if (UseCompressedOops) {
1890     if (Universe::narrow_oop_shift() == 0) {
1891       return OptoBreakpoint ? 15 : 16;
1892     } else {
1893       return OptoBreakpoint ? 19 : 20;
1894     }
1895   } else {
1896     return OptoBreakpoint ? 11 : 12;
1897   }
1898 }
1899 
1900 
1901 //=============================================================================
1902 uint size_exception_handler()
1903 {
1904   // NativeCall instruction size is the same as NativeJump.
1905   // Note that this value is also credited (in output.cpp) to
1906   // the size of the code section.
1907   return NativeJump::instruction_size;
1908 }
1909 
1910 // Emit exception handler code.
1911 int emit_exception_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's inst_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_exception_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1922   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1923   __ end_a_stub();
1924   return offset;
1925 }
1926 
1927 uint size_deopt_handler()
1928 {
1929   // three 5 byte instructions
1930   return 15;
1931 }
1932 
1933 // Emit deopt handler code.
1934 int emit_deopt_handler(CodeBuffer& cbuf)
1935 {
1936 
1937   // Note that the code buffer's inst_mark is always relative to insts.
1938   // That's why we must use the macroassembler to generate a handler.
1939   MacroAssembler _masm(&cbuf);
1940   address base =
1941   __ start_a_stub(size_deopt_handler());
1942   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1943   int offset = __ offset();
1944   address the_pc = (address) __ pc();
1945   Label next;
1946   // push a "the_pc" on the stack without destroying any registers
1947   // as they all may be live.
1948 
1949   // push address of "next"
1950   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1951   __ bind(next);
1952   // adjust it so it matches "the_pc"
1953   __ subptr(Address(rsp, 0), __ offset() - offset);
1954   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1955   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1956   __ end_a_stub();
1957   return offset;
1958 }
1959 
1960 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1961   int mark = cbuf.insts()->mark_off();
1962   MacroAssembler _masm(&cbuf);
1963   address double_address = __ double_constant(x);
1964   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1965   emit_d32_reloc(cbuf,
1966                  (int) (double_address - cbuf.code_end() - 4),
1967                  internal_word_Relocation::spec(double_address),
1968                  RELOC_DISP32);
1969 }
1970 
1971 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1972   int mark = cbuf.insts()->mark_off();
1973   MacroAssembler _masm(&cbuf);
1974   address float_address = __ float_constant(x);
1975   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1976   emit_d32_reloc(cbuf,
1977                  (int) (float_address - cbuf.code_end() - 4),
1978                  internal_word_Relocation::spec(float_address),
1979                  RELOC_DISP32);
1980 }
1981 
1982 
1983 const bool Matcher::match_rule_supported(int opcode) {
1984   if (!has_match_rule(opcode))
1985     return false;
1986 
1987   return true;  // Per default match rules are supported.
1988 }
1989 
1990 int Matcher::regnum_to_fpu_offset(int regnum)
1991 {
1992   return regnum - 32; // The FP registers are in the second chunk
1993 }
1994 
1995 // This is UltraSparc specific, true just means we have fast l2f conversion
1996 const bool Matcher::convL2FSupported(void) {
1997   return true;
1998 }
1999 
2000 // Vector width in bytes
2001 const uint Matcher::vector_width_in_bytes(void) {
2002   return 8;
2003 }
2004 
2005 // Vector ideal reg
2006 const uint Matcher::vector_ideal_reg(void) {
2007   return Op_RegD;
2008 }
2009 
2010 // Is this branch offset short enough that a short branch can be used?
2011 //
2012 // NOTE: If the platform does not provide any short branch variants, then
2013 //       this method should return false for offset 0.
2014 bool Matcher::is_short_branch_offset(int rule, int offset) {
2015   // the short version of jmpConUCF2 contains multiple branches,
2016   // making the reach slightly less
2017   if (rule == jmpConUCF2_rule)
2018     return (-126 <= offset && offset <= 125);
2019   return (-128 <= offset && offset <= 127);
2020 }
2021 
2022 const bool Matcher::isSimpleConstant64(jlong value) {
2023   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2024   //return value == (int) value;  // Cf. storeImmL and immL32.
2025 
2026   // Probably always true, even if a temp register is required.
2027   return true;
2028 }
2029 
2030 // The ecx parameter to rep stosq for the ClearArray node is in words.
2031 const bool Matcher::init_array_count_is_in_bytes = false;
2032 
2033 // Threshold size for cleararray.
2034 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2035 
2036 // Should the Matcher clone shifts on addressing modes, expecting them
2037 // to be subsumed into complex addressing expressions or compute them
2038 // into registers?  True for Intel but false for most RISCs
2039 const bool Matcher::clone_shift_expressions = true;
2040 
2041 // Is it better to copy float constants, or load them directly from
2042 // memory?  Intel can load a float constant from a direct address,
2043 // requiring no extra registers.  Most RISCs will have to materialize
2044 // an address into a register first, so they would do better to copy
2045 // the constant from stack.
2046 const bool Matcher::rematerialize_float_constants = true; // XXX
2047 
2048 // If CPU can load and store mis-aligned doubles directly then no
2049 // fixup is needed.  Else we split the double into 2 integer pieces
2050 // and move it piece-by-piece.  Only happens when passing doubles into
2051 // C code as the Java calling convention forces doubles to be aligned.
2052 const bool Matcher::misaligned_doubles_ok = true;
2053 
2054 // No-op on amd64
2055 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2056 
2057 // Advertise here if the CPU requires explicit rounding operations to
2058 // implement the UseStrictFP mode.
2059 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2060 
2061 // Do floats take an entire double register or just half?
2062 const bool Matcher::float_in_double = true;
2063 // Do ints take an entire long register or just half?
2064 const bool Matcher::int_in_long = true;
2065 
2066 // Return whether or not this register is ever used as an argument.
2067 // This function is used on startup to build the trampoline stubs in
2068 // generateOptoStub.  Registers not mentioned will be killed by the VM
2069 // call in the trampoline, and arguments in those registers not be
2070 // available to the callee.
2071 bool Matcher::can_be_java_arg(int reg)
2072 {
2073   return
2074     reg ==  RDI_num || reg ==  RDI_H_num ||
2075     reg ==  RSI_num || reg ==  RSI_H_num ||
2076     reg ==  RDX_num || reg ==  RDX_H_num ||
2077     reg ==  RCX_num || reg ==  RCX_H_num ||
2078     reg ==   R8_num || reg ==   R8_H_num ||
2079     reg ==   R9_num || reg ==   R9_H_num ||
2080     reg ==  R12_num || reg ==  R12_H_num ||
2081     reg == XMM0_num || reg == XMM0_H_num ||
2082     reg == XMM1_num || reg == XMM1_H_num ||
2083     reg == XMM2_num || reg == XMM2_H_num ||
2084     reg == XMM3_num || reg == XMM3_H_num ||
2085     reg == XMM4_num || reg == XMM4_H_num ||
2086     reg == XMM5_num || reg == XMM5_H_num ||
2087     reg == XMM6_num || reg == XMM6_H_num ||
2088     reg == XMM7_num || reg == XMM7_H_num;
2089 }
2090 
2091 bool Matcher::is_spillable_arg(int reg)
2092 {
2093   return can_be_java_arg(reg);
2094 }
2095 
2096 // Register for DIVI projection of divmodI
2097 RegMask Matcher::divI_proj_mask() {
2098   return INT_RAX_REG_mask;
2099 }
2100 
2101 // Register for MODI projection of divmodI
2102 RegMask Matcher::modI_proj_mask() {
2103   return INT_RDX_REG_mask;
2104 }
2105 
2106 // Register for DIVL projection of divmodL
2107 RegMask Matcher::divL_proj_mask() {
2108   return LONG_RAX_REG_mask;
2109 }
2110 
2111 // Register for MODL projection of divmodL
2112 RegMask Matcher::modL_proj_mask() {
2113   return LONG_RDX_REG_mask;
2114 }
2115 
2116 static Address build_address(int b, int i, int s, int d) {
2117   Register index = as_Register(i);
2118   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2119   if (index == rsp) {
2120     index = noreg;
2121     scale = Address::no_scale;
2122   }
2123   Address addr(as_Register(b), index, scale, d);
2124   return addr;
2125 }
2126 
2127 %}
2128 
2129 //----------ENCODING BLOCK-----------------------------------------------------
2130 // This block specifies the encoding classes used by the compiler to
2131 // output byte streams.  Encoding classes are parameterized macros
2132 // used by Machine Instruction Nodes in order to generate the bit
2133 // encoding of the instruction.  Operands specify their base encoding
2134 // interface with the interface keyword.  There are currently
2135 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2136 // COND_INTER.  REG_INTER causes an operand to generate a function
2137 // which returns its register number when queried.  CONST_INTER causes
2138 // an operand to generate a function which returns the value of the
2139 // constant when queried.  MEMORY_INTER causes an operand to generate
2140 // four functions which return the Base Register, the Index Register,
2141 // the Scale Value, and the Offset Value of the operand when queried.
2142 // COND_INTER causes an operand to generate six functions which return
2143 // the encoding code (ie - encoding bits for the instruction)
2144 // associated with each basic boolean condition for a conditional
2145 // instruction.
2146 //
2147 // Instructions specify two basic values for encoding.  Again, a
2148 // function is available to check if the constant displacement is an
2149 // oop. They use the ins_encode keyword to specify their encoding
2150 // classes (which must be a sequence of enc_class names, and their
2151 // parameters, specified in the encoding block), and they use the
2152 // opcode keyword to specify, in order, their primary, secondary, and
2153 // tertiary opcode.  Only the opcode sections which a particular
2154 // instruction needs for encoding need to be specified.
2155 encode %{
2156   // Build emit functions for each basic byte or larger field in the
2157   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2158   // from C++ code in the enc_class source block.  Emit functions will
2159   // live in the main source block for now.  In future, we can
2160   // generalize this by adding a syntax that specifies the sizes of
2161   // fields in an order, so that the adlc can build the emit functions
2162   // automagically
2163 
2164   // Emit primary opcode
2165   enc_class OpcP
2166   %{
2167     emit_opcode(cbuf, $primary);
2168   %}
2169 
2170   // Emit secondary opcode
2171   enc_class OpcS
2172   %{
2173     emit_opcode(cbuf, $secondary);
2174   %}
2175 
2176   // Emit tertiary opcode
2177   enc_class OpcT
2178   %{
2179     emit_opcode(cbuf, $tertiary);
2180   %}
2181 
2182   // Emit opcode directly
2183   enc_class Opcode(immI d8)
2184   %{
2185     emit_opcode(cbuf, $d8$$constant);
2186   %}
2187 
2188   // Emit size prefix
2189   enc_class SizePrefix
2190   %{
2191     emit_opcode(cbuf, 0x66);
2192   %}
2193 
2194   enc_class reg(rRegI reg)
2195   %{
2196     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2197   %}
2198 
2199   enc_class reg_reg(rRegI dst, rRegI src)
2200   %{
2201     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2202   %}
2203 
2204   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2205   %{
2206     emit_opcode(cbuf, $opcode$$constant);
2207     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2208   %}
2209 
2210   enc_class cmpfp_fixup()
2211   %{
2212     // jnp,s exit
2213     emit_opcode(cbuf, 0x7B);
2214     emit_d8(cbuf, 0x0A);
2215 
2216     // pushfq
2217     emit_opcode(cbuf, 0x9C);
2218 
2219     // andq $0xffffff2b, (%rsp)
2220     emit_opcode(cbuf, Assembler::REX_W);
2221     emit_opcode(cbuf, 0x81);
2222     emit_opcode(cbuf, 0x24);
2223     emit_opcode(cbuf, 0x24);
2224     emit_d32(cbuf, 0xffffff2b);
2225 
2226     // popfq
2227     emit_opcode(cbuf, 0x9D);
2228 
2229     // nop (target for branch to avoid branch to branch)
2230     emit_opcode(cbuf, 0x90);
2231   %}
2232 
2233   enc_class cmpfp3(rRegI dst)
2234   %{
2235     int dstenc = $dst$$reg;
2236 
2237     // movl $dst, -1
2238     if (dstenc >= 8) {
2239       emit_opcode(cbuf, Assembler::REX_B);
2240     }
2241     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2242     emit_d32(cbuf, -1);
2243 
2244     // jp,s done
2245     emit_opcode(cbuf, 0x7A);
2246     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2247 
2248     // jb,s done
2249     emit_opcode(cbuf, 0x72);
2250     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2251 
2252     // setne $dst
2253     if (dstenc >= 4) {
2254       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2255     }
2256     emit_opcode(cbuf, 0x0F);
2257     emit_opcode(cbuf, 0x95);
2258     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2259 
2260     // movzbl $dst, $dst
2261     if (dstenc >= 4) {
2262       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2263     }
2264     emit_opcode(cbuf, 0x0F);
2265     emit_opcode(cbuf, 0xB6);
2266     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2267   %}
2268 
2269   enc_class cdql_enc(no_rax_rdx_RegI div)
2270   %{
2271     // Full implementation of Java idiv and irem; checks for
2272     // special case as described in JVM spec., p.243 & p.271.
2273     //
2274     //         normal case                           special case
2275     //
2276     // input : rax: dividend                         min_int
2277     //         reg: divisor                          -1
2278     //
2279     // output: rax: quotient  (= rax idiv reg)       min_int
2280     //         rdx: remainder (= rax irem reg)       0
2281     //
2282     //  Code sequnce:
2283     //
2284     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2285     //    5:   75 07/08                jne    e <normal>
2286     //    7:   33 d2                   xor    %edx,%edx
2287     //  [div >= 8 -> offset + 1]
2288     //  [REX_B]
2289     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2290     //    c:   74 03/04                je     11 <done>
2291     // 000000000000000e <normal>:
2292     //    e:   99                      cltd
2293     //  [div >= 8 -> offset + 1]
2294     //  [REX_B]
2295     //    f:   f7 f9                   idiv   $div
2296     // 0000000000000011 <done>:
2297 
2298     // cmp    $0x80000000,%eax
2299     emit_opcode(cbuf, 0x3d);
2300     emit_d8(cbuf, 0x00);
2301     emit_d8(cbuf, 0x00);
2302     emit_d8(cbuf, 0x00);
2303     emit_d8(cbuf, 0x80);
2304 
2305     // jne    e <normal>
2306     emit_opcode(cbuf, 0x75);
2307     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2308 
2309     // xor    %edx,%edx
2310     emit_opcode(cbuf, 0x33);
2311     emit_d8(cbuf, 0xD2);
2312 
2313     // cmp    $0xffffffffffffffff,%ecx
2314     if ($div$$reg >= 8) {
2315       emit_opcode(cbuf, Assembler::REX_B);
2316     }
2317     emit_opcode(cbuf, 0x83);
2318     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2319     emit_d8(cbuf, 0xFF);
2320 
2321     // je     11 <done>
2322     emit_opcode(cbuf, 0x74);
2323     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2324 
2325     // <normal>
2326     // cltd
2327     emit_opcode(cbuf, 0x99);
2328 
2329     // idivl (note: must be emitted by the user of this rule)
2330     // <done>
2331   %}
2332 
2333   enc_class cdqq_enc(no_rax_rdx_RegL div)
2334   %{
2335     // Full implementation of Java ldiv and lrem; checks for
2336     // special case as described in JVM spec., p.243 & p.271.
2337     //
2338     //         normal case                           special case
2339     //
2340     // input : rax: dividend                         min_long
2341     //         reg: divisor                          -1
2342     //
2343     // output: rax: quotient  (= rax idiv reg)       min_long
2344     //         rdx: remainder (= rax irem reg)       0
2345     //
2346     //  Code sequnce:
2347     //
2348     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2349     //    7:   00 00 80
2350     //    a:   48 39 d0                cmp    %rdx,%rax
2351     //    d:   75 08                   jne    17 <normal>
2352     //    f:   33 d2                   xor    %edx,%edx
2353     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2354     //   15:   74 05                   je     1c <done>
2355     // 0000000000000017 <normal>:
2356     //   17:   48 99                   cqto
2357     //   19:   48 f7 f9                idiv   $div
2358     // 000000000000001c <done>:
2359 
2360     // mov    $0x8000000000000000,%rdx
2361     emit_opcode(cbuf, Assembler::REX_W);
2362     emit_opcode(cbuf, 0xBA);
2363     emit_d8(cbuf, 0x00);
2364     emit_d8(cbuf, 0x00);
2365     emit_d8(cbuf, 0x00);
2366     emit_d8(cbuf, 0x00);
2367     emit_d8(cbuf, 0x00);
2368     emit_d8(cbuf, 0x00);
2369     emit_d8(cbuf, 0x00);
2370     emit_d8(cbuf, 0x80);
2371 
2372     // cmp    %rdx,%rax
2373     emit_opcode(cbuf, Assembler::REX_W);
2374     emit_opcode(cbuf, 0x39);
2375     emit_d8(cbuf, 0xD0);
2376 
2377     // jne    17 <normal>
2378     emit_opcode(cbuf, 0x75);
2379     emit_d8(cbuf, 0x08);
2380 
2381     // xor    %edx,%edx
2382     emit_opcode(cbuf, 0x33);
2383     emit_d8(cbuf, 0xD2);
2384 
2385     // cmp    $0xffffffffffffffff,$div
2386     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2387     emit_opcode(cbuf, 0x83);
2388     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2389     emit_d8(cbuf, 0xFF);
2390 
2391     // je     1e <done>
2392     emit_opcode(cbuf, 0x74);
2393     emit_d8(cbuf, 0x05);
2394 
2395     // <normal>
2396     // cqto
2397     emit_opcode(cbuf, Assembler::REX_W);
2398     emit_opcode(cbuf, 0x99);
2399 
2400     // idivq (note: must be emitted by the user of this rule)
2401     // <done>
2402   %}
2403 
2404   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2405   enc_class OpcSE(immI imm)
2406   %{
2407     // Emit primary opcode and set sign-extend bit
2408     // Check for 8-bit immediate, and set sign extend bit in opcode
2409     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2410       emit_opcode(cbuf, $primary | 0x02);
2411     } else {
2412       // 32-bit immediate
2413       emit_opcode(cbuf, $primary);
2414     }
2415   %}
2416 
2417   enc_class OpcSErm(rRegI dst, immI imm)
2418   %{
2419     // OpcSEr/m
2420     int dstenc = $dst$$reg;
2421     if (dstenc >= 8) {
2422       emit_opcode(cbuf, Assembler::REX_B);
2423       dstenc -= 8;
2424     }
2425     // Emit primary opcode and set sign-extend bit
2426     // Check for 8-bit immediate, and set sign extend bit in opcode
2427     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2428       emit_opcode(cbuf, $primary | 0x02);
2429     } else {
2430       // 32-bit immediate
2431       emit_opcode(cbuf, $primary);
2432     }
2433     // Emit r/m byte with secondary opcode, after primary opcode.
2434     emit_rm(cbuf, 0x3, $secondary, dstenc);
2435   %}
2436 
2437   enc_class OpcSErm_wide(rRegL dst, immI imm)
2438   %{
2439     // OpcSEr/m
2440     int dstenc = $dst$$reg;
2441     if (dstenc < 8) {
2442       emit_opcode(cbuf, Assembler::REX_W);
2443     } else {
2444       emit_opcode(cbuf, Assembler::REX_WB);
2445       dstenc -= 8;
2446     }
2447     // Emit primary opcode and set sign-extend bit
2448     // Check for 8-bit immediate, and set sign extend bit in opcode
2449     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2450       emit_opcode(cbuf, $primary | 0x02);
2451     } else {
2452       // 32-bit immediate
2453       emit_opcode(cbuf, $primary);
2454     }
2455     // Emit r/m byte with secondary opcode, after primary opcode.
2456     emit_rm(cbuf, 0x3, $secondary, dstenc);
2457   %}
2458 
2459   enc_class Con8or32(immI imm)
2460   %{
2461     // Check for 8-bit immediate, and set sign extend bit in opcode
2462     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2463       $$$emit8$imm$$constant;
2464     } else {
2465       // 32-bit immediate
2466       $$$emit32$imm$$constant;
2467     }
2468   %}
2469 
2470   enc_class Lbl(label labl)
2471   %{
2472     // JMP, CALL
2473     Label* l = $labl$$label;
2474     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2475   %}
2476 
2477   enc_class LblShort(label labl)
2478   %{
2479     // JMP, CALL
2480     Label* l = $labl$$label;
2481     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2482     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2483     emit_d8(cbuf, disp);
2484   %}
2485 
2486   enc_class opc2_reg(rRegI dst)
2487   %{
2488     // BSWAP
2489     emit_cc(cbuf, $secondary, $dst$$reg);
2490   %}
2491 
2492   enc_class opc3_reg(rRegI dst)
2493   %{
2494     // BSWAP
2495     emit_cc(cbuf, $tertiary, $dst$$reg);
2496   %}
2497 
2498   enc_class reg_opc(rRegI div)
2499   %{
2500     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2501     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2502   %}
2503 
2504   enc_class Jcc(cmpOp cop, label labl)
2505   %{
2506     // JCC
2507     Label* l = $labl$$label;
2508     $$$emit8$primary;
2509     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2510     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2511   %}
2512 
2513   enc_class JccShort (cmpOp cop, label labl)
2514   %{
2515   // JCC
2516     Label *l = $labl$$label;
2517     emit_cc(cbuf, $primary, $cop$$cmpcode);
2518     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2519     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2520     emit_d8(cbuf, disp);
2521   %}
2522 
2523   enc_class enc_cmov(cmpOp cop)
2524   %{
2525     // CMOV
2526     $$$emit8$primary;
2527     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2528   %}
2529 
2530   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2531   %{
2532     // Invert sense of branch from sense of cmov
2533     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2534     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2535                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2536                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2537     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2538     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2539     if ($dst$$reg < 8) {
2540       if ($src$$reg >= 8) {
2541         emit_opcode(cbuf, Assembler::REX_B);
2542       }
2543     } else {
2544       if ($src$$reg < 8) {
2545         emit_opcode(cbuf, Assembler::REX_R);
2546       } else {
2547         emit_opcode(cbuf, Assembler::REX_RB);
2548       }
2549     }
2550     emit_opcode(cbuf, 0x0F);
2551     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2552     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2553   %}
2554 
2555   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2556   %{
2557     // Invert sense of branch from sense of cmov
2558     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2559     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2560 
2561     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2562     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2563     if ($dst$$reg < 8) {
2564       if ($src$$reg >= 8) {
2565         emit_opcode(cbuf, Assembler::REX_B);
2566       }
2567     } else {
2568       if ($src$$reg < 8) {
2569         emit_opcode(cbuf, Assembler::REX_R);
2570       } else {
2571         emit_opcode(cbuf, Assembler::REX_RB);
2572       }
2573     }
2574     emit_opcode(cbuf, 0x0F);
2575     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2576     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2577   %}
2578 
2579   enc_class enc_PartialSubtypeCheck()
2580   %{
2581     Register Rrdi = as_Register(RDI_enc); // result register
2582     Register Rrax = as_Register(RAX_enc); // super class
2583     Register Rrcx = as_Register(RCX_enc); // killed
2584     Register Rrsi = as_Register(RSI_enc); // sub class
2585     Label miss;
2586     const bool set_cond_codes = true;
2587 
2588     MacroAssembler _masm(&cbuf);
2589     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2590                                      NULL, &miss,
2591                                      /*set_cond_codes:*/ true);
2592     if ($primary) {
2593       __ xorptr(Rrdi, Rrdi);
2594     }
2595     __ bind(miss);
2596   %}
2597 
2598   enc_class Java_To_Interpreter(method meth)
2599   %{
2600     // CALL Java_To_Interpreter
2601     // This is the instruction starting address for relocation info.
2602     cbuf.set_inst_mark();
2603     $$$emit8$primary;
2604     // CALL directly to the runtime
2605     emit_d32_reloc(cbuf,
2606                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2607                    runtime_call_Relocation::spec(),
2608                    RELOC_DISP32);
2609   %}
2610 
2611   enc_class Java_Static_Call(method meth)
2612   %{
2613     // JAVA STATIC CALL
2614     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2615     // determine who we intended to call.
2616     cbuf.set_inst_mark();
2617     $$$emit8$primary;
2618 
2619     if (!_method) {
2620       emit_d32_reloc(cbuf,
2621                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2622                      runtime_call_Relocation::spec(),
2623                      RELOC_DISP32);
2624     } else if (_optimized_virtual) {
2625       emit_d32_reloc(cbuf,
2626                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2627                      opt_virtual_call_Relocation::spec(),
2628                      RELOC_DISP32);
2629     } else {
2630       emit_d32_reloc(cbuf,
2631                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2632                      static_call_Relocation::spec(),
2633                      RELOC_DISP32);
2634     }
2635     if (_method) {
2636       // Emit stub for static call
2637       emit_java_to_interp(cbuf);
2638     }
2639   %}
2640 
2641   enc_class Java_Dynamic_Call(method meth)
2642   %{
2643     // JAVA DYNAMIC CALL
2644     // !!!!!
2645     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2646     // emit_call_dynamic_prologue( cbuf );
2647     cbuf.set_inst_mark();
2648 
2649     // movq rax, -1
2650     emit_opcode(cbuf, Assembler::REX_W);
2651     emit_opcode(cbuf, 0xB8 | RAX_enc);
2652     emit_d64_reloc(cbuf,
2653                    (int64_t) Universe::non_oop_word(),
2654                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2655     address virtual_call_oop_addr = cbuf.inst_mark();
2656     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2657     // who we intended to call.
2658     cbuf.set_inst_mark();
2659     $$$emit8$primary;
2660     emit_d32_reloc(cbuf,
2661                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2662                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2663                    RELOC_DISP32);
2664   %}
2665 
2666   enc_class Java_Compiled_Call(method meth)
2667   %{
2668     // JAVA COMPILED CALL
2669     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2670 
2671     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2672     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2673 
2674     // callq *disp(%rax)
2675     cbuf.set_inst_mark();
2676     $$$emit8$primary;
2677     if (disp < 0x80) {
2678       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2679       emit_d8(cbuf, disp); // Displacement
2680     } else {
2681       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2682       emit_d32(cbuf, disp); // Displacement
2683     }
2684   %}
2685 
2686   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2687   %{
2688     // SAL, SAR, SHR
2689     int dstenc = $dst$$reg;
2690     if (dstenc >= 8) {
2691       emit_opcode(cbuf, Assembler::REX_B);
2692       dstenc -= 8;
2693     }
2694     $$$emit8$primary;
2695     emit_rm(cbuf, 0x3, $secondary, dstenc);
2696     $$$emit8$shift$$constant;
2697   %}
2698 
2699   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2700   %{
2701     // SAL, SAR, SHR
2702     int dstenc = $dst$$reg;
2703     if (dstenc < 8) {
2704       emit_opcode(cbuf, Assembler::REX_W);
2705     } else {
2706       emit_opcode(cbuf, Assembler::REX_WB);
2707       dstenc -= 8;
2708     }
2709     $$$emit8$primary;
2710     emit_rm(cbuf, 0x3, $secondary, dstenc);
2711     $$$emit8$shift$$constant;
2712   %}
2713 
2714   enc_class load_immI(rRegI dst, immI src)
2715   %{
2716     int dstenc = $dst$$reg;
2717     if (dstenc >= 8) {
2718       emit_opcode(cbuf, Assembler::REX_B);
2719       dstenc -= 8;
2720     }
2721     emit_opcode(cbuf, 0xB8 | dstenc);
2722     $$$emit32$src$$constant;
2723   %}
2724 
2725   enc_class load_immL(rRegL dst, immL src)
2726   %{
2727     int dstenc = $dst$$reg;
2728     if (dstenc < 8) {
2729       emit_opcode(cbuf, Assembler::REX_W);
2730     } else {
2731       emit_opcode(cbuf, Assembler::REX_WB);
2732       dstenc -= 8;
2733     }
2734     emit_opcode(cbuf, 0xB8 | dstenc);
2735     emit_d64(cbuf, $src$$constant);
2736   %}
2737 
2738   enc_class load_immUL32(rRegL dst, immUL32 src)
2739   %{
2740     // same as load_immI, but this time we care about zeroes in the high word
2741     int dstenc = $dst$$reg;
2742     if (dstenc >= 8) {
2743       emit_opcode(cbuf, Assembler::REX_B);
2744       dstenc -= 8;
2745     }
2746     emit_opcode(cbuf, 0xB8 | dstenc);
2747     $$$emit32$src$$constant;
2748   %}
2749 
2750   enc_class load_immL32(rRegL dst, immL32 src)
2751   %{
2752     int dstenc = $dst$$reg;
2753     if (dstenc < 8) {
2754       emit_opcode(cbuf, Assembler::REX_W);
2755     } else {
2756       emit_opcode(cbuf, Assembler::REX_WB);
2757       dstenc -= 8;
2758     }
2759     emit_opcode(cbuf, 0xC7);
2760     emit_rm(cbuf, 0x03, 0x00, dstenc);
2761     $$$emit32$src$$constant;
2762   %}
2763 
2764   enc_class load_immP31(rRegP dst, immP32 src)
2765   %{
2766     // same as load_immI, but this time we care about zeroes in the high word
2767     int dstenc = $dst$$reg;
2768     if (dstenc >= 8) {
2769       emit_opcode(cbuf, Assembler::REX_B);
2770       dstenc -= 8;
2771     }
2772     emit_opcode(cbuf, 0xB8 | dstenc);
2773     $$$emit32$src$$constant;
2774   %}
2775 
2776   enc_class load_immP(rRegP dst, immP src)
2777   %{
2778     int dstenc = $dst$$reg;
2779     if (dstenc < 8) {
2780       emit_opcode(cbuf, Assembler::REX_W);
2781     } else {
2782       emit_opcode(cbuf, Assembler::REX_WB);
2783       dstenc -= 8;
2784     }
2785     emit_opcode(cbuf, 0xB8 | dstenc);
2786     // This next line should be generated from ADLC
2787     if ($src->constant_is_oop()) {
2788       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2789     } else {
2790       emit_d64(cbuf, $src$$constant);
2791     }
2792   %}
2793 
2794   enc_class load_immF(regF dst, immF con)
2795   %{
2796     // XXX reg_mem doesn't support RIP-relative addressing yet
2797     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2798     emit_float_constant(cbuf, $con$$constant);
2799   %}
2800 
2801   enc_class load_immD(regD dst, immD con)
2802   %{
2803     // XXX reg_mem doesn't support RIP-relative addressing yet
2804     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2805     emit_double_constant(cbuf, $con$$constant);
2806   %}
2807 
2808   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2809     emit_opcode(cbuf, 0xF3);
2810     if ($dst$$reg >= 8) {
2811       emit_opcode(cbuf, Assembler::REX_R);
2812     }
2813     emit_opcode(cbuf, 0x0F);
2814     emit_opcode(cbuf, 0x10);
2815     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2816     emit_float_constant(cbuf, $con$$constant);
2817   %}
2818 
2819   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2820     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2821     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2822     if ($dst$$reg >= 8) {
2823       emit_opcode(cbuf, Assembler::REX_R);
2824     }
2825     emit_opcode(cbuf, 0x0F);
2826     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2827     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2828     emit_double_constant(cbuf, $con$$constant);
2829   %}
2830 
2831   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2832   enc_class enc_copy(rRegI dst, rRegI src)
2833   %{
2834     encode_copy(cbuf, $dst$$reg, $src$$reg);
2835   %}
2836 
2837   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2838   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2839     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2840   %}
2841 
2842   enc_class enc_copy_always(rRegI dst, rRegI src)
2843   %{
2844     int srcenc = $src$$reg;
2845     int dstenc = $dst$$reg;
2846 
2847     if (dstenc < 8) {
2848       if (srcenc >= 8) {
2849         emit_opcode(cbuf, Assembler::REX_B);
2850         srcenc -= 8;
2851       }
2852     } else {
2853       if (srcenc < 8) {
2854         emit_opcode(cbuf, Assembler::REX_R);
2855       } else {
2856         emit_opcode(cbuf, Assembler::REX_RB);
2857         srcenc -= 8;
2858       }
2859       dstenc -= 8;
2860     }
2861 
2862     emit_opcode(cbuf, 0x8B);
2863     emit_rm(cbuf, 0x3, dstenc, srcenc);
2864   %}
2865 
2866   enc_class enc_copy_wide(rRegL dst, rRegL src)
2867   %{
2868     int srcenc = $src$$reg;
2869     int dstenc = $dst$$reg;
2870 
2871     if (dstenc != srcenc) {
2872       if (dstenc < 8) {
2873         if (srcenc < 8) {
2874           emit_opcode(cbuf, Assembler::REX_W);
2875         } else {
2876           emit_opcode(cbuf, Assembler::REX_WB);
2877           srcenc -= 8;
2878         }
2879       } else {
2880         if (srcenc < 8) {
2881           emit_opcode(cbuf, Assembler::REX_WR);
2882         } else {
2883           emit_opcode(cbuf, Assembler::REX_WRB);
2884           srcenc -= 8;
2885         }
2886         dstenc -= 8;
2887       }
2888       emit_opcode(cbuf, 0x8B);
2889       emit_rm(cbuf, 0x3, dstenc, srcenc);
2890     }
2891   %}
2892 
2893   enc_class Con32(immI src)
2894   %{
2895     // Output immediate
2896     $$$emit32$src$$constant;
2897   %}
2898 
2899   enc_class Con64(immL src)
2900   %{
2901     // Output immediate
2902     emit_d64($src$$constant);
2903   %}
2904 
2905   enc_class Con32F_as_bits(immF src)
2906   %{
2907     // Output Float immediate bits
2908     jfloat jf = $src$$constant;
2909     jint jf_as_bits = jint_cast(jf);
2910     emit_d32(cbuf, jf_as_bits);
2911   %}
2912 
2913   enc_class Con16(immI src)
2914   %{
2915     // Output immediate
2916     $$$emit16$src$$constant;
2917   %}
2918 
2919   // How is this different from Con32??? XXX
2920   enc_class Con_d32(immI src)
2921   %{
2922     emit_d32(cbuf,$src$$constant);
2923   %}
2924 
2925   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2926     // Output immediate memory reference
2927     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2928     emit_d32(cbuf, 0x00);
2929   %}
2930 
2931   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2932     MacroAssembler masm(&cbuf);
2933 
2934     Register switch_reg = as_Register($switch_val$$reg);
2935     Register dest_reg   = as_Register($dest$$reg);
2936     address table_base  = masm.address_table_constant(_index2label);
2937 
2938     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2939     // to do that and the compiler is using that register as one it can allocate.
2940     // So we build it all by hand.
2941     // Address index(noreg, switch_reg, Address::times_1);
2942     // ArrayAddress dispatch(table, index);
2943 
2944     Address dispatch(dest_reg, switch_reg, Address::times_1);
2945 
2946     masm.lea(dest_reg, InternalAddress(table_base));
2947     masm.jmp(dispatch);
2948   %}
2949 
2950   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2951     MacroAssembler masm(&cbuf);
2952 
2953     Register switch_reg = as_Register($switch_val$$reg);
2954     Register dest_reg   = as_Register($dest$$reg);
2955     address table_base  = masm.address_table_constant(_index2label);
2956 
2957     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2958     // to do that and the compiler is using that register as one it can allocate.
2959     // So we build it all by hand.
2960     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2961     // ArrayAddress dispatch(table, index);
2962 
2963     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2964 
2965     masm.lea(dest_reg, InternalAddress(table_base));
2966     masm.jmp(dispatch);
2967   %}
2968 
2969   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2970     MacroAssembler masm(&cbuf);
2971 
2972     Register switch_reg = as_Register($switch_val$$reg);
2973     Register dest_reg   = as_Register($dest$$reg);
2974     address table_base  = masm.address_table_constant(_index2label);
2975 
2976     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2977     // to do that and the compiler is using that register as one it can allocate.
2978     // So we build it all by hand.
2979     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2980     // ArrayAddress dispatch(table, index);
2981 
2982     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2983     masm.lea(dest_reg, InternalAddress(table_base));
2984     masm.jmp(dispatch);
2985 
2986   %}
2987 
2988   enc_class lock_prefix()
2989   %{
2990     if (os::is_MP()) {
2991       emit_opcode(cbuf, 0xF0); // lock
2992     }
2993   %}
2994 
2995   enc_class REX_mem(memory mem)
2996   %{
2997     if ($mem$$base >= 8) {
2998       if ($mem$$index < 8) {
2999         emit_opcode(cbuf, Assembler::REX_B);
3000       } else {
3001         emit_opcode(cbuf, Assembler::REX_XB);
3002       }
3003     } else {
3004       if ($mem$$index >= 8) {
3005         emit_opcode(cbuf, Assembler::REX_X);
3006       }
3007     }
3008   %}
3009 
3010   enc_class REX_mem_wide(memory mem)
3011   %{
3012     if ($mem$$base >= 8) {
3013       if ($mem$$index < 8) {
3014         emit_opcode(cbuf, Assembler::REX_WB);
3015       } else {
3016         emit_opcode(cbuf, Assembler::REX_WXB);
3017       }
3018     } else {
3019       if ($mem$$index < 8) {
3020         emit_opcode(cbuf, Assembler::REX_W);
3021       } else {
3022         emit_opcode(cbuf, Assembler::REX_WX);
3023       }
3024     }
3025   %}
3026 
3027   // for byte regs
3028   enc_class REX_breg(rRegI reg)
3029   %{
3030     if ($reg$$reg >= 4) {
3031       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3032     }
3033   %}
3034 
3035   // for byte regs
3036   enc_class REX_reg_breg(rRegI dst, rRegI src)
3037   %{
3038     if ($dst$$reg < 8) {
3039       if ($src$$reg >= 4) {
3040         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3041       }
3042     } else {
3043       if ($src$$reg < 8) {
3044         emit_opcode(cbuf, Assembler::REX_R);
3045       } else {
3046         emit_opcode(cbuf, Assembler::REX_RB);
3047       }
3048     }
3049   %}
3050 
3051   // for byte regs
3052   enc_class REX_breg_mem(rRegI reg, memory mem)
3053   %{
3054     if ($reg$$reg < 8) {
3055       if ($mem$$base < 8) {
3056         if ($mem$$index >= 8) {
3057           emit_opcode(cbuf, Assembler::REX_X);
3058         } else if ($reg$$reg >= 4) {
3059           emit_opcode(cbuf, Assembler::REX);
3060         }
3061       } else {
3062         if ($mem$$index < 8) {
3063           emit_opcode(cbuf, Assembler::REX_B);
3064         } else {
3065           emit_opcode(cbuf, Assembler::REX_XB);
3066         }
3067       }
3068     } else {
3069       if ($mem$$base < 8) {
3070         if ($mem$$index < 8) {
3071           emit_opcode(cbuf, Assembler::REX_R);
3072         } else {
3073           emit_opcode(cbuf, Assembler::REX_RX);
3074         }
3075       } else {
3076         if ($mem$$index < 8) {
3077           emit_opcode(cbuf, Assembler::REX_RB);
3078         } else {
3079           emit_opcode(cbuf, Assembler::REX_RXB);
3080         }
3081       }
3082     }
3083   %}
3084 
3085   enc_class REX_reg(rRegI reg)
3086   %{
3087     if ($reg$$reg >= 8) {
3088       emit_opcode(cbuf, Assembler::REX_B);
3089     }
3090   %}
3091 
3092   enc_class REX_reg_wide(rRegI reg)
3093   %{
3094     if ($reg$$reg < 8) {
3095       emit_opcode(cbuf, Assembler::REX_W);
3096     } else {
3097       emit_opcode(cbuf, Assembler::REX_WB);
3098     }
3099   %}
3100 
3101   enc_class REX_reg_reg(rRegI dst, rRegI src)
3102   %{
3103     if ($dst$$reg < 8) {
3104       if ($src$$reg >= 8) {
3105         emit_opcode(cbuf, Assembler::REX_B);
3106       }
3107     } else {
3108       if ($src$$reg < 8) {
3109         emit_opcode(cbuf, Assembler::REX_R);
3110       } else {
3111         emit_opcode(cbuf, Assembler::REX_RB);
3112       }
3113     }
3114   %}
3115 
3116   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3117   %{
3118     if ($dst$$reg < 8) {
3119       if ($src$$reg < 8) {
3120         emit_opcode(cbuf, Assembler::REX_W);
3121       } else {
3122         emit_opcode(cbuf, Assembler::REX_WB);
3123       }
3124     } else {
3125       if ($src$$reg < 8) {
3126         emit_opcode(cbuf, Assembler::REX_WR);
3127       } else {
3128         emit_opcode(cbuf, Assembler::REX_WRB);
3129       }
3130     }
3131   %}
3132 
3133   enc_class REX_reg_mem(rRegI reg, memory mem)
3134   %{
3135     if ($reg$$reg < 8) {
3136       if ($mem$$base < 8) {
3137         if ($mem$$index >= 8) {
3138           emit_opcode(cbuf, Assembler::REX_X);
3139         }
3140       } else {
3141         if ($mem$$index < 8) {
3142           emit_opcode(cbuf, Assembler::REX_B);
3143         } else {
3144           emit_opcode(cbuf, Assembler::REX_XB);
3145         }
3146       }
3147     } else {
3148       if ($mem$$base < 8) {
3149         if ($mem$$index < 8) {
3150           emit_opcode(cbuf, Assembler::REX_R);
3151         } else {
3152           emit_opcode(cbuf, Assembler::REX_RX);
3153         }
3154       } else {
3155         if ($mem$$index < 8) {
3156           emit_opcode(cbuf, Assembler::REX_RB);
3157         } else {
3158           emit_opcode(cbuf, Assembler::REX_RXB);
3159         }
3160       }
3161     }
3162   %}
3163 
3164   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3165   %{
3166     if ($reg$$reg < 8) {
3167       if ($mem$$base < 8) {
3168         if ($mem$$index < 8) {
3169           emit_opcode(cbuf, Assembler::REX_W);
3170         } else {
3171           emit_opcode(cbuf, Assembler::REX_WX);
3172         }
3173       } else {
3174         if ($mem$$index < 8) {
3175           emit_opcode(cbuf, Assembler::REX_WB);
3176         } else {
3177           emit_opcode(cbuf, Assembler::REX_WXB);
3178         }
3179       }
3180     } else {
3181       if ($mem$$base < 8) {
3182         if ($mem$$index < 8) {
3183           emit_opcode(cbuf, Assembler::REX_WR);
3184         } else {
3185           emit_opcode(cbuf, Assembler::REX_WRX);
3186         }
3187       } else {
3188         if ($mem$$index < 8) {
3189           emit_opcode(cbuf, Assembler::REX_WRB);
3190         } else {
3191           emit_opcode(cbuf, Assembler::REX_WRXB);
3192         }
3193       }
3194     }
3195   %}
3196 
3197   enc_class reg_mem(rRegI ereg, memory mem)
3198   %{
3199     // High registers handle in encode_RegMem
3200     int reg = $ereg$$reg;
3201     int base = $mem$$base;
3202     int index = $mem$$index;
3203     int scale = $mem$$scale;
3204     int disp = $mem$$disp;
3205     bool disp_is_oop = $mem->disp_is_oop();
3206 
3207     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3208   %}
3209 
3210   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3211   %{
3212     int rm_byte_opcode = $rm_opcode$$constant;
3213 
3214     // High registers handle in encode_RegMem
3215     int base = $mem$$base;
3216     int index = $mem$$index;
3217     int scale = $mem$$scale;
3218     int displace = $mem$$disp;
3219 
3220     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3221                                             // working with static
3222                                             // globals
3223     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3224                   disp_is_oop);
3225   %}
3226 
3227   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3228   %{
3229     int reg_encoding = $dst$$reg;
3230     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3231     int index        = 0x04;            // 0x04 indicates no index
3232     int scale        = 0x00;            // 0x00 indicates no scale
3233     int displace     = $src1$$constant; // 0x00 indicates no displacement
3234     bool disp_is_oop = false;
3235     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3236                   disp_is_oop);
3237   %}
3238 
3239   enc_class neg_reg(rRegI dst)
3240   %{
3241     int dstenc = $dst$$reg;
3242     if (dstenc >= 8) {
3243       emit_opcode(cbuf, Assembler::REX_B);
3244       dstenc -= 8;
3245     }
3246     // NEG $dst
3247     emit_opcode(cbuf, 0xF7);
3248     emit_rm(cbuf, 0x3, 0x03, dstenc);
3249   %}
3250 
3251   enc_class neg_reg_wide(rRegI dst)
3252   %{
3253     int dstenc = $dst$$reg;
3254     if (dstenc < 8) {
3255       emit_opcode(cbuf, Assembler::REX_W);
3256     } else {
3257       emit_opcode(cbuf, Assembler::REX_WB);
3258       dstenc -= 8;
3259     }
3260     // NEG $dst
3261     emit_opcode(cbuf, 0xF7);
3262     emit_rm(cbuf, 0x3, 0x03, dstenc);
3263   %}
3264 
3265   enc_class setLT_reg(rRegI dst)
3266   %{
3267     int dstenc = $dst$$reg;
3268     if (dstenc >= 8) {
3269       emit_opcode(cbuf, Assembler::REX_B);
3270       dstenc -= 8;
3271     } else if (dstenc >= 4) {
3272       emit_opcode(cbuf, Assembler::REX);
3273     }
3274     // SETLT $dst
3275     emit_opcode(cbuf, 0x0F);
3276     emit_opcode(cbuf, 0x9C);
3277     emit_rm(cbuf, 0x3, 0x0, dstenc);
3278   %}
3279 
3280   enc_class setNZ_reg(rRegI dst)
3281   %{
3282     int dstenc = $dst$$reg;
3283     if (dstenc >= 8) {
3284       emit_opcode(cbuf, Assembler::REX_B);
3285       dstenc -= 8;
3286     } else if (dstenc >= 4) {
3287       emit_opcode(cbuf, Assembler::REX);
3288     }
3289     // SETNZ $dst
3290     emit_opcode(cbuf, 0x0F);
3291     emit_opcode(cbuf, 0x95);
3292     emit_rm(cbuf, 0x3, 0x0, dstenc);
3293   %}
3294 
3295   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3296                        rcx_RegI tmp)
3297   %{
3298     // cadd_cmpLT
3299 
3300     int tmpReg = $tmp$$reg;
3301 
3302     int penc = $p$$reg;
3303     int qenc = $q$$reg;
3304     int yenc = $y$$reg;
3305 
3306     // subl $p,$q
3307     if (penc < 8) {
3308       if (qenc >= 8) {
3309         emit_opcode(cbuf, Assembler::REX_B);
3310       }
3311     } else {
3312       if (qenc < 8) {
3313         emit_opcode(cbuf, Assembler::REX_R);
3314       } else {
3315         emit_opcode(cbuf, Assembler::REX_RB);
3316       }
3317     }
3318     emit_opcode(cbuf, 0x2B);
3319     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3320 
3321     // sbbl $tmp, $tmp
3322     emit_opcode(cbuf, 0x1B);
3323     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3324 
3325     // andl $tmp, $y
3326     if (yenc >= 8) {
3327       emit_opcode(cbuf, Assembler::REX_B);
3328     }
3329     emit_opcode(cbuf, 0x23);
3330     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3331 
3332     // addl $p,$tmp
3333     if (penc >= 8) {
3334         emit_opcode(cbuf, Assembler::REX_R);
3335     }
3336     emit_opcode(cbuf, 0x03);
3337     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3338   %}
3339 
3340   // Compare the lonogs and set -1, 0, or 1 into dst
3341   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3342   %{
3343     int src1enc = $src1$$reg;
3344     int src2enc = $src2$$reg;
3345     int dstenc = $dst$$reg;
3346 
3347     // cmpq $src1, $src2
3348     if (src1enc < 8) {
3349       if (src2enc < 8) {
3350         emit_opcode(cbuf, Assembler::REX_W);
3351       } else {
3352         emit_opcode(cbuf, Assembler::REX_WB);
3353       }
3354     } else {
3355       if (src2enc < 8) {
3356         emit_opcode(cbuf, Assembler::REX_WR);
3357       } else {
3358         emit_opcode(cbuf, Assembler::REX_WRB);
3359       }
3360     }
3361     emit_opcode(cbuf, 0x3B);
3362     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3363 
3364     // movl $dst, -1
3365     if (dstenc >= 8) {
3366       emit_opcode(cbuf, Assembler::REX_B);
3367     }
3368     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3369     emit_d32(cbuf, -1);
3370 
3371     // jl,s done
3372     emit_opcode(cbuf, 0x7C);
3373     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3374 
3375     // setne $dst
3376     if (dstenc >= 4) {
3377       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3378     }
3379     emit_opcode(cbuf, 0x0F);
3380     emit_opcode(cbuf, 0x95);
3381     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3382 
3383     // movzbl $dst, $dst
3384     if (dstenc >= 4) {
3385       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3386     }
3387     emit_opcode(cbuf, 0x0F);
3388     emit_opcode(cbuf, 0xB6);
3389     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3390   %}
3391 
3392   enc_class Push_ResultXD(regD dst) %{
3393     int dstenc = $dst$$reg;
3394 
3395     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3396 
3397     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3398     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3399     if (dstenc >= 8) {
3400       emit_opcode(cbuf, Assembler::REX_R);
3401     }
3402     emit_opcode  (cbuf, 0x0F );
3403     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3404     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3405 
3406     // add rsp,8
3407     emit_opcode(cbuf, Assembler::REX_W);
3408     emit_opcode(cbuf,0x83);
3409     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3410     emit_d8(cbuf,0x08);
3411   %}
3412 
3413   enc_class Push_SrcXD(regD src) %{
3414     int srcenc = $src$$reg;
3415 
3416     // subq rsp,#8
3417     emit_opcode(cbuf, Assembler::REX_W);
3418     emit_opcode(cbuf, 0x83);
3419     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3420     emit_d8(cbuf, 0x8);
3421 
3422     // movsd [rsp],src
3423     emit_opcode(cbuf, 0xF2);
3424     if (srcenc >= 8) {
3425       emit_opcode(cbuf, Assembler::REX_R);
3426     }
3427     emit_opcode(cbuf, 0x0F);
3428     emit_opcode(cbuf, 0x11);
3429     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3430 
3431     // fldd [rsp]
3432     emit_opcode(cbuf, 0x66);
3433     emit_opcode(cbuf, 0xDD);
3434     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3435   %}
3436 
3437 
3438   enc_class movq_ld(regD dst, memory mem) %{
3439     MacroAssembler _masm(&cbuf);
3440     __ movq($dst$$XMMRegister, $mem$$Address);
3441   %}
3442 
3443   enc_class movq_st(memory mem, regD src) %{
3444     MacroAssembler _masm(&cbuf);
3445     __ movq($mem$$Address, $src$$XMMRegister);
3446   %}
3447 
3448   enc_class pshufd_8x8(regF dst, regF src) %{
3449     MacroAssembler _masm(&cbuf);
3450 
3451     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3452     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3453     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3454   %}
3455 
3456   enc_class pshufd_4x16(regF dst, regF src) %{
3457     MacroAssembler _masm(&cbuf);
3458 
3459     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3460   %}
3461 
3462   enc_class pshufd(regD dst, regD src, int mode) %{
3463     MacroAssembler _masm(&cbuf);
3464 
3465     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3466   %}
3467 
3468   enc_class pxor(regD dst, regD src) %{
3469     MacroAssembler _masm(&cbuf);
3470 
3471     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3472   %}
3473 
3474   enc_class mov_i2x(regD dst, rRegI src) %{
3475     MacroAssembler _masm(&cbuf);
3476 
3477     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3478   %}
3479 
3480   // obj: object to lock
3481   // box: box address (header location) -- killed
3482   // tmp: rax -- killed
3483   // scr: rbx -- killed
3484   //
3485   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3486   // from i486.ad.  See that file for comments.
3487   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3488   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3489 
3490 
3491   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3492   %{
3493     Register objReg = as_Register((int)$obj$$reg);
3494     Register boxReg = as_Register((int)$box$$reg);
3495     Register tmpReg = as_Register($tmp$$reg);
3496     Register scrReg = as_Register($scr$$reg);
3497     MacroAssembler masm(&cbuf);
3498 
3499     // Verify uniqueness of register assignments -- necessary but not sufficient
3500     assert (objReg != boxReg && objReg != tmpReg &&
3501             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3502 
3503     if (_counters != NULL) {
3504       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3505     }
3506     if (EmitSync & 1) {
3507         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3508         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3509         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3510     } else
3511     if (EmitSync & 2) {
3512         Label DONE_LABEL;
3513         if (UseBiasedLocking) {
3514            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3515           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3516         }
3517         // QQQ was movl...
3518         masm.movptr(tmpReg, 0x1);
3519         masm.orptr(tmpReg, Address(objReg, 0));
3520         masm.movptr(Address(boxReg, 0), tmpReg);
3521         if (os::is_MP()) {
3522           masm.lock();
3523         }
3524         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3525         masm.jcc(Assembler::equal, DONE_LABEL);
3526 
3527         // Recursive locking
3528         masm.subptr(tmpReg, rsp);
3529         masm.andptr(tmpReg, 7 - os::vm_page_size());
3530         masm.movptr(Address(boxReg, 0), tmpReg);
3531 
3532         masm.bind(DONE_LABEL);
3533         masm.nop(); // avoid branch to branch
3534     } else {
3535         Label DONE_LABEL, IsInflated, Egress;
3536 
3537         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3538         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3539         masm.jcc   (Assembler::notZero, IsInflated) ; 
3540          
3541         // it's stack-locked, biased or neutral
3542         // TODO: optimize markword triage order to reduce the number of
3543         // conditional branches in the most common cases.
3544         // Beware -- there's a subtle invariant that fetch of the markword
3545         // at [FETCH], below, will never observe a biased encoding (*101b).
3546         // If this invariant is not held we'll suffer exclusion (safety) failure.
3547 
3548         if (UseBiasedLocking && !UseOptoBiasInlining) {
3549           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3550           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3551         }
3552 
3553         // was q will it destroy high?
3554         masm.orl   (tmpReg, 1) ; 
3555         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3556         if (os::is_MP()) { masm.lock(); } 
3557         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3558         if (_counters != NULL) {
3559            masm.cond_inc32(Assembler::equal,
3560                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3561         }
3562         masm.jcc   (Assembler::equal, DONE_LABEL);
3563 
3564         // Recursive locking
3565         masm.subptr(tmpReg, rsp);
3566         masm.andptr(tmpReg, 7 - os::vm_page_size());
3567         masm.movptr(Address(boxReg, 0), tmpReg);
3568         if (_counters != NULL) {
3569            masm.cond_inc32(Assembler::equal,
3570                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3571         }
3572         masm.jmp   (DONE_LABEL) ;
3573 
3574         masm.bind  (IsInflated) ;
3575         // It's inflated
3576 
3577         // TODO: someday avoid the ST-before-CAS penalty by
3578         // relocating (deferring) the following ST.
3579         // We should also think about trying a CAS without having
3580         // fetched _owner.  If the CAS is successful we may
3581         // avoid an RTO->RTS upgrade on the $line.
3582         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3583         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3584 
3585         masm.mov    (boxReg, tmpReg) ; 
3586         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3587         masm.testptr(tmpReg, tmpReg) ;   
3588         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3589 
3590         // It's inflated and appears unlocked
3591         if (os::is_MP()) { masm.lock(); } 
3592         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3593         // Intentional fall-through into DONE_LABEL ...
3594 
3595         masm.bind  (DONE_LABEL) ;
3596         masm.nop   () ;                 // avoid jmp to jmp
3597     }
3598   %}
3599 
3600   // obj: object to unlock
3601   // box: box address (displaced header location), killed
3602   // RBX: killed tmp; cannot be obj nor box
3603   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3604   %{
3605 
3606     Register objReg = as_Register($obj$$reg);
3607     Register boxReg = as_Register($box$$reg);
3608     Register tmpReg = as_Register($tmp$$reg);
3609     MacroAssembler masm(&cbuf);
3610 
3611     if (EmitSync & 4) { 
3612        masm.cmpptr(rsp, 0) ; 
3613     } else
3614     if (EmitSync & 8) {
3615        Label DONE_LABEL;
3616        if (UseBiasedLocking) {
3617          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3618        }
3619 
3620        // Check whether the displaced header is 0
3621        //(=> recursive unlock)
3622        masm.movptr(tmpReg, Address(boxReg, 0));
3623        masm.testptr(tmpReg, tmpReg);
3624        masm.jcc(Assembler::zero, DONE_LABEL);
3625 
3626        // If not recursive lock, reset the header to displaced header
3627        if (os::is_MP()) {
3628          masm.lock();
3629        }
3630        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3631        masm.bind(DONE_LABEL);
3632        masm.nop(); // avoid branch to branch
3633     } else {
3634        Label DONE_LABEL, Stacked, CheckSucc ;
3635 
3636        if (UseBiasedLocking && !UseOptoBiasInlining) {
3637          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3638        }
3639         
3640        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3641        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3642        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3643        masm.testl (tmpReg, 0x02) ; 
3644        masm.jcc   (Assembler::zero, Stacked) ; 
3645         
3646        // It's inflated
3647        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3648        masm.xorptr(boxReg, r15_thread) ; 
3649        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3650        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3651        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3652        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3653        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3654        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3655        masm.jmp   (DONE_LABEL) ; 
3656         
3657        if ((EmitSync & 65536) == 0) { 
3658          Label LSuccess, LGoSlowPath ;
3659          masm.bind  (CheckSucc) ;
3660          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3661          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3662 
3663          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3664          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3665          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3666          // are all faster when the write buffer is populated.
3667          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3668          if (os::is_MP()) {
3669             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3670          }
3671          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3672          masm.jcc   (Assembler::notZero, LSuccess) ;
3673 
3674          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3675          if (os::is_MP()) { masm.lock(); }
3676          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3677          masm.jcc   (Assembler::notEqual, LSuccess) ;
3678          // Intentional fall-through into slow-path
3679 
3680          masm.bind  (LGoSlowPath) ;
3681          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3682          masm.jmp   (DONE_LABEL) ;
3683 
3684          masm.bind  (LSuccess) ;
3685          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3686          masm.jmp   (DONE_LABEL) ;
3687        }
3688 
3689        masm.bind  (Stacked) ; 
3690        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3691        if (os::is_MP()) { masm.lock(); } 
3692        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3693 
3694        if (EmitSync & 65536) {
3695           masm.bind (CheckSucc) ;
3696        }
3697        masm.bind(DONE_LABEL);
3698        if (EmitSync & 32768) {
3699           masm.nop();                      // avoid branch to branch
3700        }
3701     }
3702   %}
3703 
3704   enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, rbx_RegI cnt1, rax_RegI cnt2,
3705                                regD tmp1, regD tmp2, rcx_RegI result) %{
3706     Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3707           POP_LABEL, DONE_LABEL, CONT_LABEL,
3708           WHILE_HEAD_LABEL;
3709     MacroAssembler masm(&cbuf);
3710 
3711     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3712     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
3713 
3714     // Compute the minimum of the string lengths(rsi) and the
3715     // difference of the string lengths (stack)
3716 
3717     // do the conditional move stuff
3718     masm.movl(rcx, rbx);
3719     masm.subl(rbx, rax);
3720     masm.push(rbx);
3721     masm.cmov(Assembler::lessEqual, rax, rcx);
3722 
3723     // Is the minimum length zero?
3724     masm.bind(RCX_GOOD_LABEL);
3725     masm.testl(rax, rax);
3726     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3727 
3728     // Load first characters
3729     masm.load_unsigned_short(rcx, Address(rdi, 0));
3730     masm.load_unsigned_short(rbx, Address(rsi, 0));
3731 
3732     // Compare first characters
3733     masm.subl(rcx, rbx);
3734     masm.jcc(Assembler::notZero,  POP_LABEL);
3735     masm.decrementl(rax);
3736     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3737 
3738     {
3739       // Check after comparing first character to see if strings are equivalent
3740       Label LSkip2;
3741       // Check if the strings start at same location
3742       masm.cmpptr(rdi, rsi);
3743       masm.jccb(Assembler::notEqual, LSkip2);
3744 
3745       // Check if the length difference is zero (from stack)
3746       masm.cmpl(Address(rsp, 0), 0x0);
3747       masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3748 
3749       // Strings might not be equivalent
3750       masm.bind(LSkip2);
3751     }
3752 
3753     // Advance to next character
3754     masm.addptr(rsi, 2);
3755     masm.addptr(rdi, 2);
3756 
3757     if (UseSSE42Intrinsics) {
3758       // With SSE4.2, use double quad vector compare
3759       Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3760       // Setup to compare 16-byte vectors
3761       masm.movl(rbx, rax);
3762       masm.andl(rax, 0xfffffff8); // rax holds the vector count
3763       masm.andl(rbx, 0x00000007); // rbx holds the tail count
3764       masm.testl(rax, rax);
3765       masm.jccb(Assembler::zero, COMPARE_TAIL);
3766 
3767       masm.lea(rsi, Address(rsi, rax, Address::times_2));
3768       masm.lea(rdi, Address(rdi, rax, Address::times_2));
3769       masm.negptr(rax);
3770 
3771       masm.bind(COMPARE_VECTORS);
3772       masm.movdqu(tmp1Reg, Address(rsi, rax, Address::times_2));
3773       masm.movdqu(tmp2Reg, Address(rdi, rax, Address::times_2));
3774       masm.pxor(tmp1Reg, tmp2Reg);
3775       masm.ptest(tmp1Reg, tmp1Reg);
3776       masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
3777       masm.addptr(rax, 8);
3778       masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3779       masm.jmpb(COMPARE_TAIL);
3780 
3781       // Mismatched characters in the vectors
3782       masm.bind(VECTOR_NOT_EQUAL);
3783       masm.lea(rsi, Address(rsi, rax, Address::times_2));
3784       masm.lea(rdi, Address(rdi, rax, Address::times_2));
3785       masm.movl(rbx, 8);
3786 
3787       // Compare tail (< 8 chars), or rescan last vectors to
3788       // find 1st mismatched characters
3789       masm.bind(COMPARE_TAIL);
3790       masm.testl(rbx, rbx);
3791       masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3792       masm.movl(rax, rbx);
3793       // Fallthru to tail compare
3794     }
3795 
3796     // Shift rsi and rdi to the end of the arrays, negate min
3797     masm.lea(rsi, Address(rsi, rax, Address::times_2, 0));
3798     masm.lea(rdi, Address(rdi, rax, Address::times_2, 0));
3799     masm.negptr(rax);
3800 
3801     // Compare the rest of the characters
3802     masm.bind(WHILE_HEAD_LABEL);
3803     masm.load_unsigned_short(rcx, Address(rdi, rax, Address::times_2, 0));
3804     masm.load_unsigned_short(rbx, Address(rsi, rax, Address::times_2, 0));
3805     masm.subl(rcx, rbx);
3806     masm.jccb(Assembler::notZero, POP_LABEL);
3807     masm.increment(rax);
3808     masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3809 
3810     // Strings are equal up to min length.  Return the length difference.
3811     masm.bind(LENGTH_DIFF_LABEL);
3812     masm.pop(rcx);
3813     masm.jmpb(DONE_LABEL);
3814 
3815     // Discard the stored length difference
3816     masm.bind(POP_LABEL);
3817     masm.addptr(rsp, 8);
3818 
3819     // That's it
3820     masm.bind(DONE_LABEL);
3821   %}
3822 
3823  enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, rdx_RegI cnt1, rax_RegI cnt2,
3824                               regD tmp1, rcx_RegI tmp2, rbx_RegI result) %{
3825     // SSE4.2 version
3826     Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
3827           SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
3828     MacroAssembler masm(&cbuf);
3829 
3830     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3831 
3832     // Start the indexOf operation
3833     // Get start addr of string
3834     masm.push(rsi);
3835 
3836     // Get start addr of substr
3837     masm.push(rdi);
3838     masm.push(rax);
3839     masm.jmpb(PREP_FOR_SCAN);
3840 
3841     // Substr count saved at sp
3842     // Substr saved at sp+8
3843     // String saved at sp+16
3844 
3845     // Prep to load substr for scan
3846     masm.bind(LOAD_SUBSTR);
3847     masm.movptr(rdi, Address(rsp, 8));
3848     masm.movl(rax, Address(rsp, 0));
3849 
3850     // Load substr
3851     masm.bind(PREP_FOR_SCAN);
3852     masm.movdqu(tmp1Reg, Address(rdi, 0));
3853     masm.addq(rdx, 8);    // prime the loop
3854     masm.subptr(rsi, 16);
3855 
3856     // Scan string for substr in 16-byte vectors
3857     masm.bind(SCAN_TO_SUBSTR);
3858     masm.subq(rdx, 8);
3859     masm.addptr(rsi, 16);
3860     // pcmpestri
3861     //   inputs:
3862     //     xmm - substring
3863     //     rax - substring length (elements count)
3864     //     mem - scaned string
3865     //     rdx - string length (elements count)
3866     //   outputs:
3867     //     rcx - matched index in string 
3868     masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3869     masm.jcc(Assembler::above, SCAN_TO_SUBSTR);
3870     masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND);
3871 
3872     // Fallthru: found a potential substr
3873 
3874     // Make sure string is still long enough
3875     masm.subl(rdx, rcx);
3876     masm.cmpl(rdx, rax);
3877     masm.jccb(Assembler::negative, RET_NOT_FOUND);
3878     // Compute start addr of substr
3879     masm.lea(rsi, Address(rsi, rcx, Address::times_2));
3880     masm.movptr(rbx, rsi);
3881 
3882     // Compare potential substr
3883     masm.addq(rdx, 8);        // prime the loop
3884     masm.addq(rax, 8);
3885     masm.subptr(rsi, 16);
3886     masm.subptr(rdi, 16);
3887 
3888     // Scan 16-byte vectors of string and substr
3889     masm.bind(SCAN_SUBSTR);
3890     masm.subq(rax, 8);
3891     masm.subq(rdx, 8);
3892     masm.addptr(rsi, 16);
3893     masm.addptr(rdi, 16);
3894     masm.movdqu(tmp1Reg, Address(rdi, 0));
3895     masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3896     masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
3897     masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
3898 
3899     // Compute substr offset
3900     masm.movptr(rsi, Address(rsp, 16));
3901     masm.subptr(rbx, rsi);
3902     masm.shrl(rbx, 1);
3903     masm.jmpb(CLEANUP);
3904 
3905     masm.bind(RET_NEG_ONE);
3906     masm.movl(rbx, -1);
3907     masm.jmpb(DONE);
3908 
3909     masm.bind(RET_NOT_FOUND);
3910     masm.movl(rbx, -1);
3911 
3912     masm.bind(CLEANUP);
3913     masm.addptr(rsp, 24);
3914 
3915     masm.bind(DONE);
3916   %}
3917 
3918   enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, regD tmp1,
3919                               regD tmp2, rbx_RegI tmp3, rax_RegI result) %{
3920     Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
3921     MacroAssembler masm(&cbuf);
3922 
3923     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3924     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
3925 
3926     // does source == target?
3927     masm.cmpptr(rdi, rsi);
3928     masm.jcc(Assembler::equal, RET_TRUE);
3929 
3930     // count == 0
3931     masm.testl(rcx, rcx);
3932     masm.jcc(Assembler::zero, RET_TRUE);
3933 
3934     // Set byte count
3935     masm.shll(rcx, 1);
3936     masm.movl(rax, rcx);
3937 
3938     if (UseSSE42Intrinsics) {
3939       // With SSE4.2, use double quad vector compare
3940       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3941       // Compare 16-byte vectors
3942       masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
3943       masm.andl(rax, 0x0000000e);  // tail count (in bytes)
3944       masm.testl(rcx, rcx);
3945       masm.jccb(Assembler::zero, COMPARE_TAIL);
3946       masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3947       masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3948       masm.negptr(rcx);
3949 
3950       masm.bind(COMPARE_WIDE_VECTORS);
3951       masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
3952       masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
3953       masm.pxor(tmp1Reg, tmp2Reg);
3954       masm.ptest(tmp1Reg, tmp1Reg);
3955       masm.jccb(Assembler::notZero, RET_FALSE);
3956       masm.addptr(rcx, 16);
3957       masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3958       masm.bind(COMPARE_TAIL);
3959       masm.movl(rcx, rax);
3960       // Fallthru to tail compare
3961     }
3962 
3963     // Compare 4-byte vectors
3964     masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
3965     masm.andl(rax, 0x00000002);  // tail char (in bytes)
3966     masm.testl(rcx, rcx);
3967     masm.jccb(Assembler::zero, COMPARE_CHAR);
3968     masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3969     masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3970     masm.negptr(rcx);
3971 
3972     masm.bind(COMPARE_VECTORS);
3973     masm.movl(rbx, Address(rdi, rcx, Address::times_1));
3974     masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
3975     masm.jccb(Assembler::notEqual, RET_FALSE);
3976     masm.addptr(rcx, 4);
3977     masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3978 
3979     // Compare trailing char (final 2 bytes), if any
3980     masm.bind(COMPARE_CHAR);
3981     masm.testl(rax, rax);
3982     masm.jccb(Assembler::zero, RET_TRUE);
3983     masm.load_unsigned_short(rbx, Address(rdi, 0));
3984     masm.load_unsigned_short(rcx, Address(rsi, 0));
3985     masm.cmpl(rbx, rcx);
3986     masm.jccb(Assembler::notEqual, RET_FALSE);
3987 
3988     masm.bind(RET_TRUE);
3989     masm.movl(rax, 1);   // return true
3990     masm.jmpb(DONE);
3991 
3992     masm.bind(RET_FALSE);
3993     masm.xorl(rax, rax); // return false
3994 
3995     masm.bind(DONE);
3996   %}
3997 
3998   enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2,
3999                              rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
4000     Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
4001     MacroAssembler masm(&cbuf);
4002 
4003     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
4004     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
4005     Register ary1Reg      = as_Register($ary1$$reg);
4006     Register ary2Reg      = as_Register($ary2$$reg);
4007     Register tmp3Reg      = as_Register($tmp3$$reg);
4008     Register tmp4Reg      = as_Register($tmp4$$reg);
4009     Register resultReg    = as_Register($result$$reg);
4010 
4011     int length_offset  = arrayOopDesc::length_offset_in_bytes();
4012     int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
4013 
4014     // Check the input args
4015     masm.cmpq(ary1Reg, ary2Reg);
4016     masm.jcc(Assembler::equal, TRUE_LABEL);
4017     masm.testq(ary1Reg, ary1Reg);
4018     masm.jcc(Assembler::zero, FALSE_LABEL);
4019     masm.testq(ary2Reg, ary2Reg);
4020     masm.jcc(Assembler::zero, FALSE_LABEL);
4021 
4022     // Check the lengths
4023     masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
4024     masm.movl(resultReg, Address(ary2Reg, length_offset));
4025     masm.cmpl(tmp4Reg, resultReg);
4026     masm.jcc(Assembler::notEqual, FALSE_LABEL);
4027     masm.testl(resultReg, resultReg);
4028     masm.jcc(Assembler::zero, TRUE_LABEL);
4029 
4030     //load array address
4031     masm.lea(ary1Reg, Address(ary1Reg, base_offset));
4032     masm.lea(ary2Reg, Address(ary2Reg, base_offset));
4033 
4034     //set byte count
4035     masm.shll(tmp4Reg, 1);
4036     masm.movl(resultReg,tmp4Reg);
4037 
4038     if (UseSSE42Intrinsics){
4039       // With SSE4.2, use double quad vector compare
4040       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
4041       // Compare 16-byte vectors
4042       masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
4043       masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
4044       masm.testl(tmp4Reg, tmp4Reg);
4045       masm.jccb(Assembler::zero, COMPARE_TAIL);
4046       masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4047       masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4048       masm.negptr(tmp4Reg);
4049 
4050       masm.bind(COMPARE_WIDE_VECTORS);
4051       masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4052       masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4053       masm.pxor(tmp1Reg, tmp2Reg);
4054       masm.ptest(tmp1Reg, tmp1Reg);
4055 
4056       masm.jccb(Assembler::notZero, FALSE_LABEL);
4057       masm.addptr(tmp4Reg, 16);
4058       masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4059       masm.bind(COMPARE_TAIL);
4060       masm.movl(tmp4Reg, resultReg);
4061       // Fallthru to tail compare
4062     }
4063 
4064    // Compare 4-byte vectors
4065     masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
4066     masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
4067     masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char
4068     masm.jccb(Assembler::zero, COMPARE_CHAR);
4069     masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4070     masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4071     masm.negptr(tmp4Reg);
4072 
4073     masm.bind(COMPARE_VECTORS);
4074     masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4075     masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4076     masm.jccb(Assembler::notEqual, FALSE_LABEL);
4077     masm.addptr(tmp4Reg, 4);
4078     masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4079 
4080     // Compare trailing char (final 2 bytes), if any
4081     masm.bind(COMPARE_CHAR);
4082     masm.testl(resultReg, resultReg);
4083     masm.jccb(Assembler::zero, TRUE_LABEL);
4084     masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
4085     masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
4086     masm.cmpl(tmp3Reg, tmp4Reg);
4087     masm.jccb(Assembler::notEqual, FALSE_LABEL);
4088 
4089     masm.bind(TRUE_LABEL);
4090     masm.movl(resultReg, 1);   // return true
4091     masm.jmpb(DONE);
4092 
4093     masm.bind(FALSE_LABEL);
4094     masm.xorl(resultReg, resultReg); // return false
4095 
4096     // That's it
4097     masm.bind(DONE);
4098   %}
4099 
4100   enc_class enc_rethrow()
4101   %{
4102     cbuf.set_inst_mark();
4103     emit_opcode(cbuf, 0xE9); // jmp entry
4104     emit_d32_reloc(cbuf,
4105                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
4106                    runtime_call_Relocation::spec(),
4107                    RELOC_DISP32);
4108   %}
4109 
4110   enc_class absF_encoding(regF dst)
4111   %{
4112     int dstenc = $dst$$reg;
4113     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
4114 
4115     cbuf.set_inst_mark();
4116     if (dstenc >= 8) {
4117       emit_opcode(cbuf, Assembler::REX_R);
4118       dstenc -= 8;
4119     }
4120     // XXX reg_mem doesn't support RIP-relative addressing yet
4121     emit_opcode(cbuf, 0x0F);
4122     emit_opcode(cbuf, 0x54);
4123     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4124     emit_d32_reloc(cbuf, signmask_address);
4125   %}
4126 
4127   enc_class absD_encoding(regD dst)
4128   %{
4129     int dstenc = $dst$$reg;
4130     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
4131 
4132     cbuf.set_inst_mark();
4133     emit_opcode(cbuf, 0x66);
4134     if (dstenc >= 8) {
4135       emit_opcode(cbuf, Assembler::REX_R);
4136       dstenc -= 8;
4137     }
4138     // XXX reg_mem doesn't support RIP-relative addressing yet
4139     emit_opcode(cbuf, 0x0F);
4140     emit_opcode(cbuf, 0x54);
4141     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4142     emit_d32_reloc(cbuf, signmask_address);
4143   %}
4144 
4145   enc_class negF_encoding(regF dst)
4146   %{
4147     int dstenc = $dst$$reg;
4148     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
4149 
4150     cbuf.set_inst_mark();
4151     if (dstenc >= 8) {
4152       emit_opcode(cbuf, Assembler::REX_R);
4153       dstenc -= 8;
4154     }
4155     // XXX reg_mem doesn't support RIP-relative addressing yet
4156     emit_opcode(cbuf, 0x0F);
4157     emit_opcode(cbuf, 0x57);
4158     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4159     emit_d32_reloc(cbuf, signflip_address);
4160   %}
4161 
4162   enc_class negD_encoding(regD dst)
4163   %{
4164     int dstenc = $dst$$reg;
4165     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
4166 
4167     cbuf.set_inst_mark();
4168     emit_opcode(cbuf, 0x66);
4169     if (dstenc >= 8) {
4170       emit_opcode(cbuf, Assembler::REX_R);
4171       dstenc -= 8;
4172     }
4173     // XXX reg_mem doesn't support RIP-relative addressing yet
4174     emit_opcode(cbuf, 0x0F);
4175     emit_opcode(cbuf, 0x57);
4176     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4177     emit_d32_reloc(cbuf, signflip_address);
4178   %}
4179 
4180   enc_class f2i_fixup(rRegI dst, regF src)
4181   %{
4182     int dstenc = $dst$$reg;
4183     int srcenc = $src$$reg;
4184 
4185     // cmpl $dst, #0x80000000
4186     if (dstenc >= 8) {
4187       emit_opcode(cbuf, Assembler::REX_B);
4188     }
4189     emit_opcode(cbuf, 0x81);
4190     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4191     emit_d32(cbuf, 0x80000000);
4192 
4193     // jne,s done
4194     emit_opcode(cbuf, 0x75);
4195     if (srcenc < 8 && dstenc < 8) {
4196       emit_d8(cbuf, 0xF);
4197     } else if (srcenc >= 8 && dstenc >= 8) {
4198       emit_d8(cbuf, 0x11);
4199     } else {
4200       emit_d8(cbuf, 0x10);
4201     }
4202 
4203     // subq rsp, #8
4204     emit_opcode(cbuf, Assembler::REX_W);
4205     emit_opcode(cbuf, 0x83);
4206     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4207     emit_d8(cbuf, 8);
4208 
4209     // movss [rsp], $src
4210     emit_opcode(cbuf, 0xF3);
4211     if (srcenc >= 8) {
4212       emit_opcode(cbuf, Assembler::REX_R);
4213     }
4214     emit_opcode(cbuf, 0x0F);
4215     emit_opcode(cbuf, 0x11);
4216     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4217 
4218     // call f2i_fixup
4219     cbuf.set_inst_mark();
4220     emit_opcode(cbuf, 0xE8);
4221     emit_d32_reloc(cbuf,
4222                    (int)
4223                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
4224                    runtime_call_Relocation::spec(),
4225                    RELOC_DISP32);
4226 
4227     // popq $dst
4228     if (dstenc >= 8) {
4229       emit_opcode(cbuf, Assembler::REX_B);
4230     }
4231     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4232 
4233     // done:
4234   %}
4235 
4236   enc_class f2l_fixup(rRegL dst, regF src)
4237   %{
4238     int dstenc = $dst$$reg;
4239     int srcenc = $src$$reg;
4240     address const_address = (address) StubRoutines::x86::double_sign_flip();
4241 
4242     // cmpq $dst, [0x8000000000000000]
4243     cbuf.set_inst_mark();
4244     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4245     emit_opcode(cbuf, 0x39);
4246     // XXX reg_mem doesn't support RIP-relative addressing yet
4247     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4248     emit_d32_reloc(cbuf, const_address);
4249 
4250 
4251     // jne,s done
4252     emit_opcode(cbuf, 0x75);
4253     if (srcenc < 8 && dstenc < 8) {
4254       emit_d8(cbuf, 0xF);
4255     } else if (srcenc >= 8 && dstenc >= 8) {
4256       emit_d8(cbuf, 0x11);
4257     } else {
4258       emit_d8(cbuf, 0x10);
4259     }
4260 
4261     // subq rsp, #8
4262     emit_opcode(cbuf, Assembler::REX_W);
4263     emit_opcode(cbuf, 0x83);
4264     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4265     emit_d8(cbuf, 8);
4266 
4267     // movss [rsp], $src
4268     emit_opcode(cbuf, 0xF3);
4269     if (srcenc >= 8) {
4270       emit_opcode(cbuf, Assembler::REX_R);
4271     }
4272     emit_opcode(cbuf, 0x0F);
4273     emit_opcode(cbuf, 0x11);
4274     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4275 
4276     // call f2l_fixup
4277     cbuf.set_inst_mark();
4278     emit_opcode(cbuf, 0xE8);
4279     emit_d32_reloc(cbuf,
4280                    (int)
4281                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
4282                    runtime_call_Relocation::spec(),
4283                    RELOC_DISP32);
4284 
4285     // popq $dst
4286     if (dstenc >= 8) {
4287       emit_opcode(cbuf, Assembler::REX_B);
4288     }
4289     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4290 
4291     // done:
4292   %}
4293 
4294   enc_class d2i_fixup(rRegI dst, regD src)
4295   %{
4296     int dstenc = $dst$$reg;
4297     int srcenc = $src$$reg;
4298 
4299     // cmpl $dst, #0x80000000
4300     if (dstenc >= 8) {
4301       emit_opcode(cbuf, Assembler::REX_B);
4302     }
4303     emit_opcode(cbuf, 0x81);
4304     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4305     emit_d32(cbuf, 0x80000000);
4306 
4307     // jne,s done
4308     emit_opcode(cbuf, 0x75);
4309     if (srcenc < 8 && dstenc < 8) {
4310       emit_d8(cbuf, 0xF);
4311     } else if (srcenc >= 8 && dstenc >= 8) {
4312       emit_d8(cbuf, 0x11);
4313     } else {
4314       emit_d8(cbuf, 0x10);
4315     }
4316 
4317     // subq rsp, #8
4318     emit_opcode(cbuf, Assembler::REX_W);
4319     emit_opcode(cbuf, 0x83);
4320     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4321     emit_d8(cbuf, 8);
4322 
4323     // movsd [rsp], $src
4324     emit_opcode(cbuf, 0xF2);
4325     if (srcenc >= 8) {
4326       emit_opcode(cbuf, Assembler::REX_R);
4327     }
4328     emit_opcode(cbuf, 0x0F);
4329     emit_opcode(cbuf, 0x11);
4330     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4331 
4332     // call d2i_fixup
4333     cbuf.set_inst_mark();
4334     emit_opcode(cbuf, 0xE8);
4335     emit_d32_reloc(cbuf,
4336                    (int)
4337                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
4338                    runtime_call_Relocation::spec(),
4339                    RELOC_DISP32);
4340 
4341     // popq $dst
4342     if (dstenc >= 8) {
4343       emit_opcode(cbuf, Assembler::REX_B);
4344     }
4345     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4346 
4347     // done:
4348   %}
4349 
4350   enc_class d2l_fixup(rRegL dst, regD src)
4351   %{
4352     int dstenc = $dst$$reg;
4353     int srcenc = $src$$reg;
4354     address const_address = (address) StubRoutines::x86::double_sign_flip();
4355 
4356     // cmpq $dst, [0x8000000000000000]
4357     cbuf.set_inst_mark();
4358     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4359     emit_opcode(cbuf, 0x39);
4360     // XXX reg_mem doesn't support RIP-relative addressing yet
4361     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4362     emit_d32_reloc(cbuf, const_address);
4363 
4364 
4365     // jne,s done
4366     emit_opcode(cbuf, 0x75);
4367     if (srcenc < 8 && dstenc < 8) {
4368       emit_d8(cbuf, 0xF);
4369     } else if (srcenc >= 8 && dstenc >= 8) {
4370       emit_d8(cbuf, 0x11);
4371     } else {
4372       emit_d8(cbuf, 0x10);
4373     }
4374 
4375     // subq rsp, #8
4376     emit_opcode(cbuf, Assembler::REX_W);
4377     emit_opcode(cbuf, 0x83);
4378     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4379     emit_d8(cbuf, 8);
4380 
4381     // movsd [rsp], $src
4382     emit_opcode(cbuf, 0xF2);
4383     if (srcenc >= 8) {
4384       emit_opcode(cbuf, Assembler::REX_R);
4385     }
4386     emit_opcode(cbuf, 0x0F);
4387     emit_opcode(cbuf, 0x11);
4388     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4389 
4390     // call d2l_fixup
4391     cbuf.set_inst_mark();
4392     emit_opcode(cbuf, 0xE8);
4393     emit_d32_reloc(cbuf,
4394                    (int)
4395                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4396                    runtime_call_Relocation::spec(),
4397                    RELOC_DISP32);
4398 
4399     // popq $dst
4400     if (dstenc >= 8) {
4401       emit_opcode(cbuf, Assembler::REX_B);
4402     }
4403     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4404 
4405     // done:
4406   %}
4407 
4408   // Safepoint Poll.  This polls the safepoint page, and causes an
4409   // exception if it is not readable. Unfortunately, it kills
4410   // RFLAGS in the process.
4411   enc_class enc_safepoint_poll
4412   %{
4413     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4414     // XXX reg_mem doesn't support RIP-relative addressing yet
4415     cbuf.set_inst_mark();
4416     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4417     emit_opcode(cbuf, 0x85); // testl
4418     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4419     // cbuf.inst_mark() is beginning of instruction
4420     emit_d32_reloc(cbuf, os::get_polling_page());
4421 //                    relocInfo::poll_type,
4422   %}
4423 %}
4424 
4425 
4426 
4427 //----------FRAME--------------------------------------------------------------
4428 // Definition of frame structure and management information.
4429 //
4430 //  S T A C K   L A Y O U T    Allocators stack-slot number
4431 //                             |   (to get allocators register number
4432 //  G  Owned by    |        |  v    add OptoReg::stack0())
4433 //  r   CALLER     |        |
4434 //  o     |        +--------+      pad to even-align allocators stack-slot
4435 //  w     V        |  pad0  |        numbers; owned by CALLER
4436 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4437 //  h     ^        |   in   |  5
4438 //        |        |  args  |  4   Holes in incoming args owned by SELF
4439 //  |     |        |        |  3
4440 //  |     |        +--------+
4441 //  V     |        | old out|      Empty on Intel, window on Sparc
4442 //        |    old |preserve|      Must be even aligned.
4443 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4444 //        |        |   in   |  3   area for Intel ret address
4445 //     Owned by    |preserve|      Empty on Sparc.
4446 //       SELF      +--------+
4447 //        |        |  pad2  |  2   pad to align old SP
4448 //        |        +--------+  1
4449 //        |        | locks  |  0
4450 //        |        +--------+----> OptoReg::stack0(), even aligned
4451 //        |        |  pad1  | 11   pad to align new SP
4452 //        |        +--------+
4453 //        |        |        | 10
4454 //        |        | spills |  9   spills
4455 //        V        |        |  8   (pad0 slot for callee)
4456 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4457 //        ^        |  out   |  7
4458 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4459 //     Owned by    +--------+
4460 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4461 //        |    new |preserve|      Must be even-aligned.
4462 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4463 //        |        |        |
4464 //
4465 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4466 //         known from SELF's arguments and the Java calling convention.
4467 //         Region 6-7 is determined per call site.
4468 // Note 2: If the calling convention leaves holes in the incoming argument
4469 //         area, those holes are owned by SELF.  Holes in the outgoing area
4470 //         are owned by the CALLEE.  Holes should not be nessecary in the
4471 //         incoming area, as the Java calling convention is completely under
4472 //         the control of the AD file.  Doubles can be sorted and packed to
4473 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4474 //         varargs C calling conventions.
4475 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4476 //         even aligned with pad0 as needed.
4477 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4478 //         region 6-11 is even aligned; it may be padded out more so that
4479 //         the region from SP to FP meets the minimum stack alignment.
4480 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4481 //         alignment.  Region 11, pad1, may be dynamically extended so that
4482 //         SP meets the minimum alignment.
4483 
4484 frame
4485 %{
4486   // What direction does stack grow in (assumed to be same for C & Java)
4487   stack_direction(TOWARDS_LOW);
4488 
4489   // These three registers define part of the calling convention
4490   // between compiled code and the interpreter.
4491   inline_cache_reg(RAX);                // Inline Cache Register
4492   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4493                                         // calling interpreter
4494 
4495   // Optional: name the operand used by cisc-spilling to access
4496   // [stack_pointer + offset]
4497   cisc_spilling_operand_name(indOffset32);
4498 
4499   // Number of stack slots consumed by locking an object
4500   sync_stack_slots(2);
4501 
4502   // Compiled code's Frame Pointer
4503   frame_pointer(RSP);
4504 
4505   // Interpreter stores its frame pointer in a register which is
4506   // stored to the stack by I2CAdaptors.
4507   // I2CAdaptors convert from interpreted java to compiled java.
4508   interpreter_frame_pointer(RBP);
4509 
4510   // Stack alignment requirement
4511   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4512 
4513   // Number of stack slots between incoming argument block and the start of
4514   // a new frame.  The PROLOG must add this many slots to the stack.  The
4515   // EPILOG must remove this many slots.  amd64 needs two slots for
4516   // return address.
4517   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4518 
4519   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4520   // for calls to C.  Supports the var-args backing area for register parms.
4521   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4522 
4523   // The after-PROLOG location of the return address.  Location of
4524   // return address specifies a type (REG or STACK) and a number
4525   // representing the register number (i.e. - use a register name) or
4526   // stack slot.
4527   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4528   // Otherwise, it is above the locks and verification slot and alignment word
4529   return_addr(STACK - 2 +
4530               round_to(2 + 2 * VerifyStackAtCalls +
4531                        Compile::current()->fixed_slots(),
4532                        WordsPerLong * 2));
4533 
4534   // Body of function which returns an integer array locating
4535   // arguments either in registers or in stack slots.  Passed an array
4536   // of ideal registers called "sig" and a "length" count.  Stack-slot
4537   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4538   // arguments for a CALLEE.  Incoming stack arguments are
4539   // automatically biased by the preserve_stack_slots field above.
4540 
4541   calling_convention
4542   %{
4543     // No difference between ingoing/outgoing just pass false
4544     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4545   %}
4546 
4547   c_calling_convention
4548   %{
4549     // This is obviously always outgoing
4550     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4551   %}
4552 
4553   // Location of compiled Java return values.  Same as C for now.
4554   return_value
4555   %{
4556     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4557            "only return normal values");
4558 
4559     static const int lo[Op_RegL + 1] = {
4560       0,
4561       0,
4562       RAX_num,  // Op_RegN
4563       RAX_num,  // Op_RegI
4564       RAX_num,  // Op_RegP
4565       XMM0_num, // Op_RegF
4566       XMM0_num, // Op_RegD
4567       RAX_num   // Op_RegL
4568     };
4569     static const int hi[Op_RegL + 1] = {
4570       0,
4571       0,
4572       OptoReg::Bad, // Op_RegN
4573       OptoReg::Bad, // Op_RegI
4574       RAX_H_num,    // Op_RegP
4575       OptoReg::Bad, // Op_RegF
4576       XMM0_H_num,   // Op_RegD
4577       RAX_H_num     // Op_RegL
4578     };
4579     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4580     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4581   %}
4582 %}
4583 
4584 //----------ATTRIBUTES---------------------------------------------------------
4585 //----------Operand Attributes-------------------------------------------------
4586 op_attrib op_cost(0);        // Required cost attribute
4587 
4588 //----------Instruction Attributes---------------------------------------------
4589 ins_attrib ins_cost(100);       // Required cost attribute
4590 ins_attrib ins_size(8);         // Required size attribute (in bits)
4591 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4592 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4593                                 // a non-matching short branch variant
4594                                 // of some long branch?
4595 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4596                                 // be a power of 2) specifies the
4597                                 // alignment that some part of the
4598                                 // instruction (not necessarily the
4599                                 // start) requires.  If > 1, a
4600                                 // compute_padding() function must be
4601                                 // provided for the instruction
4602 
4603 //----------OPERANDS-----------------------------------------------------------
4604 // Operand definitions must precede instruction definitions for correct parsing
4605 // in the ADLC because operands constitute user defined types which are used in
4606 // instruction definitions.
4607 
4608 //----------Simple Operands----------------------------------------------------
4609 // Immediate Operands
4610 // Integer Immediate
4611 operand immI()
4612 %{
4613   match(ConI);
4614 
4615   op_cost(10);
4616   format %{ %}
4617   interface(CONST_INTER);
4618 %}
4619 
4620 // Constant for test vs zero
4621 operand immI0()
4622 %{
4623   predicate(n->get_int() == 0);
4624   match(ConI);
4625 
4626   op_cost(0);
4627   format %{ %}
4628   interface(CONST_INTER);
4629 %}
4630 
4631 // Constant for increment
4632 operand immI1()
4633 %{
4634   predicate(n->get_int() == 1);
4635   match(ConI);
4636 
4637   op_cost(0);
4638   format %{ %}
4639   interface(CONST_INTER);
4640 %}
4641 
4642 // Constant for decrement
4643 operand immI_M1()
4644 %{
4645   predicate(n->get_int() == -1);
4646   match(ConI);
4647 
4648   op_cost(0);
4649   format %{ %}
4650   interface(CONST_INTER);
4651 %}
4652 
4653 // Valid scale values for addressing modes
4654 operand immI2()
4655 %{
4656   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4657   match(ConI);
4658 
4659   format %{ %}
4660   interface(CONST_INTER);
4661 %}
4662 
4663 operand immI8()
4664 %{
4665   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4666   match(ConI);
4667 
4668   op_cost(5);
4669   format %{ %}
4670   interface(CONST_INTER);
4671 %}
4672 
4673 operand immI16()
4674 %{
4675   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4676   match(ConI);
4677 
4678   op_cost(10);
4679   format %{ %}
4680   interface(CONST_INTER);
4681 %}
4682 
4683 // Constant for long shifts
4684 operand immI_32()
4685 %{
4686   predicate( n->get_int() == 32 );
4687   match(ConI);
4688 
4689   op_cost(0);
4690   format %{ %}
4691   interface(CONST_INTER);
4692 %}
4693 
4694 // Constant for long shifts
4695 operand immI_64()
4696 %{
4697   predicate( n->get_int() == 64 );
4698   match(ConI);
4699 
4700   op_cost(0);
4701   format %{ %}
4702   interface(CONST_INTER);
4703 %}
4704 
4705 // Pointer Immediate
4706 operand immP()
4707 %{
4708   match(ConP);
4709 
4710   op_cost(10);
4711   format %{ %}
4712   interface(CONST_INTER);
4713 %}
4714 
4715 // NULL Pointer Immediate
4716 operand immP0()
4717 %{
4718   predicate(n->get_ptr() == 0);
4719   match(ConP);
4720 
4721   op_cost(5);
4722   format %{ %}
4723   interface(CONST_INTER);
4724 %}
4725 
4726 // Pointer Immediate
4727 operand immN() %{
4728   match(ConN);
4729 
4730   op_cost(10);
4731   format %{ %}
4732   interface(CONST_INTER);
4733 %}
4734 
4735 // NULL Pointer Immediate
4736 operand immN0() %{
4737   predicate(n->get_narrowcon() == 0);
4738   match(ConN);
4739 
4740   op_cost(5);
4741   format %{ %}
4742   interface(CONST_INTER);
4743 %}
4744 
4745 operand immP31()
4746 %{
4747   predicate(!n->as_Type()->type()->isa_oopptr()
4748             && (n->get_ptr() >> 31) == 0);
4749   match(ConP);
4750 
4751   op_cost(5);
4752   format %{ %}
4753   interface(CONST_INTER);
4754 %}
4755 
4756 
4757 // Long Immediate
4758 operand immL()
4759 %{
4760   match(ConL);
4761 
4762   op_cost(20);
4763   format %{ %}
4764   interface(CONST_INTER);
4765 %}
4766 
4767 // Long Immediate 8-bit
4768 operand immL8()
4769 %{
4770   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4771   match(ConL);
4772 
4773   op_cost(5);
4774   format %{ %}
4775   interface(CONST_INTER);
4776 %}
4777 
4778 // Long Immediate 32-bit unsigned
4779 operand immUL32()
4780 %{
4781   predicate(n->get_long() == (unsigned int) (n->get_long()));
4782   match(ConL);
4783 
4784   op_cost(10);
4785   format %{ %}
4786   interface(CONST_INTER);
4787 %}
4788 
4789 // Long Immediate 32-bit signed
4790 operand immL32()
4791 %{
4792   predicate(n->get_long() == (int) (n->get_long()));
4793   match(ConL);
4794 
4795   op_cost(15);
4796   format %{ %}
4797   interface(CONST_INTER);
4798 %}
4799 
4800 // Long Immediate zero
4801 operand immL0()
4802 %{
4803   predicate(n->get_long() == 0L);
4804   match(ConL);
4805 
4806   op_cost(10);
4807   format %{ %}
4808   interface(CONST_INTER);
4809 %}
4810 
4811 // Constant for increment
4812 operand immL1()
4813 %{
4814   predicate(n->get_long() == 1);
4815   match(ConL);
4816 
4817   format %{ %}
4818   interface(CONST_INTER);
4819 %}
4820 
4821 // Constant for decrement
4822 operand immL_M1()
4823 %{
4824   predicate(n->get_long() == -1);
4825   match(ConL);
4826 
4827   format %{ %}
4828   interface(CONST_INTER);
4829 %}
4830 
4831 // Long Immediate: the value 10
4832 operand immL10()
4833 %{
4834   predicate(n->get_long() == 10);
4835   match(ConL);
4836 
4837   format %{ %}
4838   interface(CONST_INTER);
4839 %}
4840 
4841 // Long immediate from 0 to 127.
4842 // Used for a shorter form of long mul by 10.
4843 operand immL_127()
4844 %{
4845   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4846   match(ConL);
4847 
4848   op_cost(10);
4849   format %{ %}
4850   interface(CONST_INTER);
4851 %}
4852 
4853 // Long Immediate: low 32-bit mask
4854 operand immL_32bits()
4855 %{
4856   predicate(n->get_long() == 0xFFFFFFFFL);
4857   match(ConL);
4858   op_cost(20);
4859 
4860   format %{ %}
4861   interface(CONST_INTER);
4862 %}
4863 
4864 // Float Immediate zero
4865 operand immF0()
4866 %{
4867   predicate(jint_cast(n->getf()) == 0);
4868   match(ConF);
4869 
4870   op_cost(5);
4871   format %{ %}
4872   interface(CONST_INTER);
4873 %}
4874 
4875 // Float Immediate
4876 operand immF()
4877 %{
4878   match(ConF);
4879 
4880   op_cost(15);
4881   format %{ %}
4882   interface(CONST_INTER);
4883 %}
4884 
4885 // Double Immediate zero
4886 operand immD0()
4887 %{
4888   predicate(jlong_cast(n->getd()) == 0);
4889   match(ConD);
4890 
4891   op_cost(5);
4892   format %{ %}
4893   interface(CONST_INTER);
4894 %}
4895 
4896 // Double Immediate
4897 operand immD()
4898 %{
4899   match(ConD);
4900 
4901   op_cost(15);
4902   format %{ %}
4903   interface(CONST_INTER);
4904 %}
4905 
4906 // Immediates for special shifts (sign extend)
4907 
4908 // Constants for increment
4909 operand immI_16()
4910 %{
4911   predicate(n->get_int() == 16);
4912   match(ConI);
4913 
4914   format %{ %}
4915   interface(CONST_INTER);
4916 %}
4917 
4918 operand immI_24()
4919 %{
4920   predicate(n->get_int() == 24);
4921   match(ConI);
4922 
4923   format %{ %}
4924   interface(CONST_INTER);
4925 %}
4926 
4927 // Constant for byte-wide masking
4928 operand immI_255()
4929 %{
4930   predicate(n->get_int() == 255);
4931   match(ConI);
4932 
4933   format %{ %}
4934   interface(CONST_INTER);
4935 %}
4936 
4937 // Constant for short-wide masking
4938 operand immI_65535()
4939 %{
4940   predicate(n->get_int() == 65535);
4941   match(ConI);
4942 
4943   format %{ %}
4944   interface(CONST_INTER);
4945 %}
4946 
4947 // Constant for byte-wide masking
4948 operand immL_255()
4949 %{
4950   predicate(n->get_long() == 255);
4951   match(ConL);
4952 
4953   format %{ %}
4954   interface(CONST_INTER);
4955 %}
4956 
4957 // Constant for short-wide masking
4958 operand immL_65535()
4959 %{
4960   predicate(n->get_long() == 65535);
4961   match(ConL);
4962 
4963   format %{ %}
4964   interface(CONST_INTER);
4965 %}
4966 
4967 // Register Operands
4968 // Integer Register
4969 operand rRegI()
4970 %{
4971   constraint(ALLOC_IN_RC(int_reg));
4972   match(RegI);
4973 
4974   match(rax_RegI);
4975   match(rbx_RegI);
4976   match(rcx_RegI);
4977   match(rdx_RegI);
4978   match(rdi_RegI);
4979 
4980   format %{ %}
4981   interface(REG_INTER);
4982 %}
4983 
4984 // Special Registers
4985 operand rax_RegI()
4986 %{
4987   constraint(ALLOC_IN_RC(int_rax_reg));
4988   match(RegI);
4989   match(rRegI);
4990 
4991   format %{ "RAX" %}
4992   interface(REG_INTER);
4993 %}
4994 
4995 // Special Registers
4996 operand rbx_RegI()
4997 %{
4998   constraint(ALLOC_IN_RC(int_rbx_reg));
4999   match(RegI);
5000   match(rRegI);
5001 
5002   format %{ "RBX" %}
5003   interface(REG_INTER);
5004 %}
5005 
5006 operand rcx_RegI()
5007 %{
5008   constraint(ALLOC_IN_RC(int_rcx_reg));
5009   match(RegI);
5010   match(rRegI);
5011 
5012   format %{ "RCX" %}
5013   interface(REG_INTER);
5014 %}
5015 
5016 operand rdx_RegI()
5017 %{
5018   constraint(ALLOC_IN_RC(int_rdx_reg));
5019   match(RegI);
5020   match(rRegI);
5021 
5022   format %{ "RDX" %}
5023   interface(REG_INTER);
5024 %}
5025 
5026 operand rdi_RegI()
5027 %{
5028   constraint(ALLOC_IN_RC(int_rdi_reg));
5029   match(RegI);
5030   match(rRegI);
5031 
5032   format %{ "RDI" %}
5033   interface(REG_INTER);
5034 %}
5035 
5036 operand no_rcx_RegI()
5037 %{
5038   constraint(ALLOC_IN_RC(int_no_rcx_reg));
5039   match(RegI);
5040   match(rax_RegI);
5041   match(rbx_RegI);
5042   match(rdx_RegI);
5043   match(rdi_RegI);
5044 
5045   format %{ %}
5046   interface(REG_INTER);
5047 %}
5048 
5049 operand no_rax_rdx_RegI()
5050 %{
5051   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5052   match(RegI);
5053   match(rbx_RegI);
5054   match(rcx_RegI);
5055   match(rdi_RegI);
5056 
5057   format %{ %}
5058   interface(REG_INTER);
5059 %}
5060 
5061 // Pointer Register
5062 operand any_RegP()
5063 %{
5064   constraint(ALLOC_IN_RC(any_reg));
5065   match(RegP);
5066   match(rax_RegP);
5067   match(rbx_RegP);
5068   match(rdi_RegP);
5069   match(rsi_RegP);
5070   match(rbp_RegP);
5071   match(r15_RegP);
5072   match(rRegP);
5073 
5074   format %{ %}
5075   interface(REG_INTER);
5076 %}
5077 
5078 operand rRegP()
5079 %{
5080   constraint(ALLOC_IN_RC(ptr_reg));
5081   match(RegP);
5082   match(rax_RegP);
5083   match(rbx_RegP);
5084   match(rdi_RegP);
5085   match(rsi_RegP);
5086   match(rbp_RegP);
5087   match(r15_RegP);  // See Q&A below about r15_RegP.
5088 
5089   format %{ %}
5090   interface(REG_INTER);
5091 %}
5092 
5093 operand rRegN() %{
5094   constraint(ALLOC_IN_RC(int_reg));
5095   match(RegN);
5096 
5097   format %{ %}
5098   interface(REG_INTER);
5099 %}
5100 
5101 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5102 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5103 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
5104 // The output of an instruction is controlled by the allocator, which respects
5105 // register class masks, not match rules.  Unless an instruction mentions
5106 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5107 // by the allocator as an input.
5108 
5109 operand no_rax_RegP()
5110 %{
5111   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
5112   match(RegP);
5113   match(rbx_RegP);
5114   match(rsi_RegP);
5115   match(rdi_RegP);
5116 
5117   format %{ %}
5118   interface(REG_INTER);
5119 %}
5120 
5121 operand no_rbp_RegP()
5122 %{
5123   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
5124   match(RegP);
5125   match(rbx_RegP);
5126   match(rsi_RegP);
5127   match(rdi_RegP);
5128 
5129   format %{ %}
5130   interface(REG_INTER);
5131 %}
5132 
5133 operand no_rax_rbx_RegP()
5134 %{
5135   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
5136   match(RegP);
5137   match(rsi_RegP);
5138   match(rdi_RegP);
5139 
5140   format %{ %}
5141   interface(REG_INTER);
5142 %}
5143 
5144 // Special Registers
5145 // Return a pointer value
5146 operand rax_RegP()
5147 %{
5148   constraint(ALLOC_IN_RC(ptr_rax_reg));
5149   match(RegP);
5150   match(rRegP);
5151 
5152   format %{ %}
5153   interface(REG_INTER);
5154 %}
5155 
5156 // Special Registers
5157 // Return a compressed pointer value
5158 operand rax_RegN()
5159 %{
5160   constraint(ALLOC_IN_RC(int_rax_reg));
5161   match(RegN);
5162   match(rRegN);
5163 
5164   format %{ %}
5165   interface(REG_INTER);
5166 %}
5167 
5168 // Used in AtomicAdd
5169 operand rbx_RegP()
5170 %{
5171   constraint(ALLOC_IN_RC(ptr_rbx_reg));
5172   match(RegP);
5173   match(rRegP);
5174 
5175   format %{ %}
5176   interface(REG_INTER);
5177 %}
5178 
5179 operand rsi_RegP()
5180 %{
5181   constraint(ALLOC_IN_RC(ptr_rsi_reg));
5182   match(RegP);
5183   match(rRegP);
5184 
5185   format %{ %}
5186   interface(REG_INTER);
5187 %}
5188 
5189 // Used in rep stosq
5190 operand rdi_RegP()
5191 %{
5192   constraint(ALLOC_IN_RC(ptr_rdi_reg));
5193   match(RegP);
5194   match(rRegP);
5195 
5196   format %{ %}
5197   interface(REG_INTER);
5198 %}
5199 
5200 operand rbp_RegP()
5201 %{
5202   constraint(ALLOC_IN_RC(ptr_rbp_reg));
5203   match(RegP);
5204   match(rRegP);
5205 
5206   format %{ %}
5207   interface(REG_INTER);
5208 %}
5209 
5210 operand r15_RegP()
5211 %{
5212   constraint(ALLOC_IN_RC(ptr_r15_reg));
5213   match(RegP);
5214   match(rRegP);
5215 
5216   format %{ %}
5217   interface(REG_INTER);
5218 %}
5219 
5220 operand rRegL()
5221 %{
5222   constraint(ALLOC_IN_RC(long_reg));
5223   match(RegL);
5224   match(rax_RegL);
5225   match(rdx_RegL);
5226 
5227   format %{ %}
5228   interface(REG_INTER);
5229 %}
5230 
5231 // Special Registers
5232 operand no_rax_rdx_RegL()
5233 %{
5234   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5235   match(RegL);
5236   match(rRegL);
5237 
5238   format %{ %}
5239   interface(REG_INTER);
5240 %}
5241 
5242 operand no_rax_RegL()
5243 %{
5244   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5245   match(RegL);
5246   match(rRegL);
5247   match(rdx_RegL);
5248 
5249   format %{ %}
5250   interface(REG_INTER);
5251 %}
5252 
5253 operand no_rcx_RegL()
5254 %{
5255   constraint(ALLOC_IN_RC(long_no_rcx_reg));
5256   match(RegL);
5257   match(rRegL);
5258 
5259   format %{ %}
5260   interface(REG_INTER);
5261 %}
5262 
5263 operand rax_RegL()
5264 %{
5265   constraint(ALLOC_IN_RC(long_rax_reg));
5266   match(RegL);
5267   match(rRegL);
5268 
5269   format %{ "RAX" %}
5270   interface(REG_INTER);
5271 %}
5272 
5273 operand rcx_RegL()
5274 %{
5275   constraint(ALLOC_IN_RC(long_rcx_reg));
5276   match(RegL);
5277   match(rRegL);
5278 
5279   format %{ %}
5280   interface(REG_INTER);
5281 %}
5282 
5283 operand rdx_RegL()
5284 %{
5285   constraint(ALLOC_IN_RC(long_rdx_reg));
5286   match(RegL);
5287   match(rRegL);
5288 
5289   format %{ %}
5290   interface(REG_INTER);
5291 %}
5292 
5293 // Flags register, used as output of compare instructions
5294 operand rFlagsReg()
5295 %{
5296   constraint(ALLOC_IN_RC(int_flags));
5297   match(RegFlags);
5298 
5299   format %{ "RFLAGS" %}
5300   interface(REG_INTER);
5301 %}
5302 
5303 // Flags register, used as output of FLOATING POINT compare instructions
5304 operand rFlagsRegU()
5305 %{
5306   constraint(ALLOC_IN_RC(int_flags));
5307   match(RegFlags);
5308 
5309   format %{ "RFLAGS_U" %}
5310   interface(REG_INTER);
5311 %}
5312 
5313 operand rFlagsRegUCF() %{
5314   constraint(ALLOC_IN_RC(int_flags));
5315   match(RegFlags);
5316   predicate(false);
5317 
5318   format %{ "RFLAGS_U_CF" %}
5319   interface(REG_INTER);
5320 %}
5321 
5322 // Float register operands
5323 operand regF()
5324 %{
5325   constraint(ALLOC_IN_RC(float_reg));
5326   match(RegF);
5327 
5328   format %{ %}
5329   interface(REG_INTER);
5330 %}
5331 
5332 // Double register operands
5333 operand regD() 
5334 %{
5335   constraint(ALLOC_IN_RC(double_reg));
5336   match(RegD);
5337 
5338   format %{ %}
5339   interface(REG_INTER);
5340 %}
5341 
5342 
5343 //----------Memory Operands----------------------------------------------------
5344 // Direct Memory Operand
5345 // operand direct(immP addr)
5346 // %{
5347 //   match(addr);
5348 
5349 //   format %{ "[$addr]" %}
5350 //   interface(MEMORY_INTER) %{
5351 //     base(0xFFFFFFFF);
5352 //     index(0x4);
5353 //     scale(0x0);
5354 //     disp($addr);
5355 //   %}
5356 // %}
5357 
5358 // Indirect Memory Operand
5359 operand indirect(any_RegP reg)
5360 %{
5361   constraint(ALLOC_IN_RC(ptr_reg));
5362   match(reg);
5363 
5364   format %{ "[$reg]" %}
5365   interface(MEMORY_INTER) %{
5366     base($reg);
5367     index(0x4);
5368     scale(0x0);
5369     disp(0x0);
5370   %}
5371 %}
5372 
5373 // Indirect Memory Plus Short Offset Operand
5374 operand indOffset8(any_RegP reg, immL8 off)
5375 %{
5376   constraint(ALLOC_IN_RC(ptr_reg));
5377   match(AddP reg off);
5378 
5379   format %{ "[$reg + $off (8-bit)]" %}
5380   interface(MEMORY_INTER) %{
5381     base($reg);
5382     index(0x4);
5383     scale(0x0);
5384     disp($off);
5385   %}
5386 %}
5387 
5388 // Indirect Memory Plus Long Offset Operand
5389 operand indOffset32(any_RegP reg, immL32 off)
5390 %{
5391   constraint(ALLOC_IN_RC(ptr_reg));
5392   match(AddP reg off);
5393 
5394   format %{ "[$reg + $off (32-bit)]" %}
5395   interface(MEMORY_INTER) %{
5396     base($reg);
5397     index(0x4);
5398     scale(0x0);
5399     disp($off);
5400   %}
5401 %}
5402 
5403 // Indirect Memory Plus Index Register Plus Offset Operand
5404 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5405 %{
5406   constraint(ALLOC_IN_RC(ptr_reg));
5407   match(AddP (AddP reg lreg) off);
5408 
5409   op_cost(10);
5410   format %{"[$reg + $off + $lreg]" %}
5411   interface(MEMORY_INTER) %{
5412     base($reg);
5413     index($lreg);
5414     scale(0x0);
5415     disp($off);
5416   %}
5417 %}
5418 
5419 // Indirect Memory Plus Index Register Plus Offset Operand
5420 operand indIndex(any_RegP reg, rRegL lreg)
5421 %{
5422   constraint(ALLOC_IN_RC(ptr_reg));
5423   match(AddP reg lreg);
5424 
5425   op_cost(10);
5426   format %{"[$reg + $lreg]" %}
5427   interface(MEMORY_INTER) %{
5428     base($reg);
5429     index($lreg);
5430     scale(0x0);
5431     disp(0x0);
5432   %}
5433 %}
5434 
5435 // Indirect Memory Times Scale Plus Index Register
5436 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5437 %{
5438   constraint(ALLOC_IN_RC(ptr_reg));
5439   match(AddP reg (LShiftL lreg scale));
5440 
5441   op_cost(10);
5442   format %{"[$reg + $lreg << $scale]" %}
5443   interface(MEMORY_INTER) %{
5444     base($reg);
5445     index($lreg);
5446     scale($scale);
5447     disp(0x0);
5448   %}
5449 %}
5450 
5451 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5452 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5453 %{
5454   constraint(ALLOC_IN_RC(ptr_reg));
5455   match(AddP (AddP reg (LShiftL lreg scale)) off);
5456 
5457   op_cost(10);
5458   format %{"[$reg + $off + $lreg << $scale]" %}
5459   interface(MEMORY_INTER) %{
5460     base($reg);
5461     index($lreg);
5462     scale($scale);
5463     disp($off);
5464   %}
5465 %}
5466 
5467 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5468 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5469 %{
5470   constraint(ALLOC_IN_RC(ptr_reg));
5471   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5472   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5473 
5474   op_cost(10);
5475   format %{"[$reg + $off + $idx << $scale]" %}
5476   interface(MEMORY_INTER) %{
5477     base($reg);
5478     index($idx);
5479     scale($scale);
5480     disp($off);
5481   %}
5482 %}
5483 
5484 // Indirect Narrow Oop Plus Offset Operand
5485 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5486 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5487 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5488   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5489   constraint(ALLOC_IN_RC(ptr_reg));
5490   match(AddP (DecodeN reg) off);
5491 
5492   op_cost(10);
5493   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5494   interface(MEMORY_INTER) %{
5495     base(0xc); // R12
5496     index($reg);
5497     scale(0x3);
5498     disp($off);
5499   %}
5500 %}
5501 
5502 // Indirect Memory Operand
5503 operand indirectNarrow(rRegN reg)
5504 %{
5505   predicate(Universe::narrow_oop_shift() == 0);
5506   constraint(ALLOC_IN_RC(ptr_reg));
5507   match(DecodeN reg);
5508 
5509   format %{ "[$reg]" %}
5510   interface(MEMORY_INTER) %{
5511     base($reg);
5512     index(0x4);
5513     scale(0x0);
5514     disp(0x0);
5515   %}
5516 %}
5517 
5518 // Indirect Memory Plus Short Offset Operand
5519 operand indOffset8Narrow(rRegN reg, immL8 off)
5520 %{
5521   predicate(Universe::narrow_oop_shift() == 0);
5522   constraint(ALLOC_IN_RC(ptr_reg));
5523   match(AddP (DecodeN reg) off);
5524 
5525   format %{ "[$reg + $off (8-bit)]" %}
5526   interface(MEMORY_INTER) %{
5527     base($reg);
5528     index(0x4);
5529     scale(0x0);
5530     disp($off);
5531   %}
5532 %}
5533 
5534 // Indirect Memory Plus Long Offset Operand
5535 operand indOffset32Narrow(rRegN reg, immL32 off)
5536 %{
5537   predicate(Universe::narrow_oop_shift() == 0);
5538   constraint(ALLOC_IN_RC(ptr_reg));
5539   match(AddP (DecodeN reg) off);
5540 
5541   format %{ "[$reg + $off (32-bit)]" %}
5542   interface(MEMORY_INTER) %{
5543     base($reg);
5544     index(0x4);
5545     scale(0x0);
5546     disp($off);
5547   %}
5548 %}
5549 
5550 // Indirect Memory Plus Index Register Plus Offset Operand
5551 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5552 %{
5553   predicate(Universe::narrow_oop_shift() == 0);
5554   constraint(ALLOC_IN_RC(ptr_reg));
5555   match(AddP (AddP (DecodeN reg) lreg) off);
5556 
5557   op_cost(10);
5558   format %{"[$reg + $off + $lreg]" %}
5559   interface(MEMORY_INTER) %{
5560     base($reg);
5561     index($lreg);
5562     scale(0x0);
5563     disp($off);
5564   %}
5565 %}
5566 
5567 // Indirect Memory Plus Index Register Plus Offset Operand
5568 operand indIndexNarrow(rRegN reg, rRegL lreg)
5569 %{
5570   predicate(Universe::narrow_oop_shift() == 0);
5571   constraint(ALLOC_IN_RC(ptr_reg));
5572   match(AddP (DecodeN reg) lreg);
5573 
5574   op_cost(10);
5575   format %{"[$reg + $lreg]" %}
5576   interface(MEMORY_INTER) %{
5577     base($reg);
5578     index($lreg);
5579     scale(0x0);
5580     disp(0x0);
5581   %}
5582 %}
5583 
5584 // Indirect Memory Times Scale Plus Index Register
5585 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5586 %{
5587   predicate(Universe::narrow_oop_shift() == 0);
5588   constraint(ALLOC_IN_RC(ptr_reg));
5589   match(AddP (DecodeN reg) (LShiftL lreg scale));
5590 
5591   op_cost(10);
5592   format %{"[$reg + $lreg << $scale]" %}
5593   interface(MEMORY_INTER) %{
5594     base($reg);
5595     index($lreg);
5596     scale($scale);
5597     disp(0x0);
5598   %}
5599 %}
5600 
5601 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5602 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5603 %{
5604   predicate(Universe::narrow_oop_shift() == 0);
5605   constraint(ALLOC_IN_RC(ptr_reg));
5606   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5607 
5608   op_cost(10);
5609   format %{"[$reg + $off + $lreg << $scale]" %}
5610   interface(MEMORY_INTER) %{
5611     base($reg);
5612     index($lreg);
5613     scale($scale);
5614     disp($off);
5615   %}
5616 %}
5617 
5618 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5619 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5620 %{
5621   constraint(ALLOC_IN_RC(ptr_reg));
5622   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5623   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5624 
5625   op_cost(10);
5626   format %{"[$reg + $off + $idx << $scale]" %}
5627   interface(MEMORY_INTER) %{
5628     base($reg);
5629     index($idx);
5630     scale($scale);
5631     disp($off);
5632   %}
5633 %}
5634 
5635 
5636 //----------Special Memory Operands--------------------------------------------
5637 // Stack Slot Operand - This operand is used for loading and storing temporary
5638 //                      values on the stack where a match requires a value to
5639 //                      flow through memory.
5640 operand stackSlotP(sRegP reg)
5641 %{
5642   constraint(ALLOC_IN_RC(stack_slots));
5643   // No match rule because this operand is only generated in matching
5644 
5645   format %{ "[$reg]" %}
5646   interface(MEMORY_INTER) %{
5647     base(0x4);   // RSP
5648     index(0x4);  // No Index
5649     scale(0x0);  // No Scale
5650     disp($reg);  // Stack Offset
5651   %}
5652 %}
5653 
5654 operand stackSlotI(sRegI reg)
5655 %{
5656   constraint(ALLOC_IN_RC(stack_slots));
5657   // No match rule because this operand is only generated in matching
5658 
5659   format %{ "[$reg]" %}
5660   interface(MEMORY_INTER) %{
5661     base(0x4);   // RSP
5662     index(0x4);  // No Index
5663     scale(0x0);  // No Scale
5664     disp($reg);  // Stack Offset
5665   %}
5666 %}
5667 
5668 operand stackSlotF(sRegF reg)
5669 %{
5670   constraint(ALLOC_IN_RC(stack_slots));
5671   // No match rule because this operand is only generated in matching
5672 
5673   format %{ "[$reg]" %}
5674   interface(MEMORY_INTER) %{
5675     base(0x4);   // RSP
5676     index(0x4);  // No Index
5677     scale(0x0);  // No Scale
5678     disp($reg);  // Stack Offset
5679   %}
5680 %}
5681 
5682 operand stackSlotD(sRegD reg)
5683 %{
5684   constraint(ALLOC_IN_RC(stack_slots));
5685   // No match rule because this operand is only generated in matching
5686 
5687   format %{ "[$reg]" %}
5688   interface(MEMORY_INTER) %{
5689     base(0x4);   // RSP
5690     index(0x4);  // No Index
5691     scale(0x0);  // No Scale
5692     disp($reg);  // Stack Offset
5693   %}
5694 %}
5695 operand stackSlotL(sRegL reg)
5696 %{
5697   constraint(ALLOC_IN_RC(stack_slots));
5698   // No match rule because this operand is only generated in matching
5699 
5700   format %{ "[$reg]" %}
5701   interface(MEMORY_INTER) %{
5702     base(0x4);   // RSP
5703     index(0x4);  // No Index
5704     scale(0x0);  // No Scale
5705     disp($reg);  // Stack Offset
5706   %}
5707 %}
5708 
5709 //----------Conditional Branch Operands----------------------------------------
5710 // Comparison Op  - This is the operation of the comparison, and is limited to
5711 //                  the following set of codes:
5712 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5713 //
5714 // Other attributes of the comparison, such as unsignedness, are specified
5715 // by the comparison instruction that sets a condition code flags register.
5716 // That result is represented by a flags operand whose subtype is appropriate
5717 // to the unsignedness (etc.) of the comparison.
5718 //
5719 // Later, the instruction which matches both the Comparison Op (a Bool) and
5720 // the flags (produced by the Cmp) specifies the coding of the comparison op
5721 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5722 
5723 // Comparision Code
5724 operand cmpOp()
5725 %{
5726   match(Bool);
5727 
5728   format %{ "" %}
5729   interface(COND_INTER) %{
5730     equal(0x4, "e");
5731     not_equal(0x5, "ne");
5732     less(0xC, "l");
5733     greater_equal(0xD, "ge");
5734     less_equal(0xE, "le");
5735     greater(0xF, "g");
5736   %}
5737 %}
5738 
5739 // Comparison Code, unsigned compare.  Used by FP also, with
5740 // C2 (unordered) turned into GT or LT already.  The other bits
5741 // C0 and C3 are turned into Carry & Zero flags.
5742 operand cmpOpU()
5743 %{
5744   match(Bool);
5745 
5746   format %{ "" %}
5747   interface(COND_INTER) %{
5748     equal(0x4, "e");
5749     not_equal(0x5, "ne");
5750     less(0x2, "b");
5751     greater_equal(0x3, "nb");
5752     less_equal(0x6, "be");
5753     greater(0x7, "nbe");
5754   %}
5755 %}
5756 
5757 
5758 // Floating comparisons that don't require any fixup for the unordered case
5759 operand cmpOpUCF() %{
5760   match(Bool);
5761   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5762             n->as_Bool()->_test._test == BoolTest::ge ||
5763             n->as_Bool()->_test._test == BoolTest::le ||
5764             n->as_Bool()->_test._test == BoolTest::gt);
5765   format %{ "" %}
5766   interface(COND_INTER) %{
5767     equal(0x4, "e");
5768     not_equal(0x5, "ne");
5769     less(0x2, "b");
5770     greater_equal(0x3, "nb");
5771     less_equal(0x6, "be");
5772     greater(0x7, "nbe");
5773   %}
5774 %}
5775 
5776 
5777 // Floating comparisons that can be fixed up with extra conditional jumps
5778 operand cmpOpUCF2() %{
5779   match(Bool);
5780   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5781             n->as_Bool()->_test._test == BoolTest::eq);
5782   format %{ "" %}
5783   interface(COND_INTER) %{
5784     equal(0x4, "e");
5785     not_equal(0x5, "ne");
5786     less(0x2, "b");
5787     greater_equal(0x3, "nb");
5788     less_equal(0x6, "be");
5789     greater(0x7, "nbe");
5790   %}
5791 %}
5792 
5793 
5794 //----------OPERAND CLASSES----------------------------------------------------
5795 // Operand Classes are groups of operands that are used as to simplify
5796 // instruction definitions by not requiring the AD writer to specify separate
5797 // instructions for every form of operand when the instruction accepts
5798 // multiple operand types with the same basic encoding and format.  The classic
5799 // case of this is memory operands.
5800 
5801 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5802                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5803                indCompressedOopOffset,
5804                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5805                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5806                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5807 
5808 //----------PIPELINE-----------------------------------------------------------
5809 // Rules which define the behavior of the target architectures pipeline.
5810 pipeline %{
5811 
5812 //----------ATTRIBUTES---------------------------------------------------------
5813 attributes %{
5814   variable_size_instructions;        // Fixed size instructions
5815   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5816   instruction_unit_size = 1;         // An instruction is 1 bytes long
5817   instruction_fetch_unit_size = 16;  // The processor fetches one line
5818   instruction_fetch_units = 1;       // of 16 bytes
5819 
5820   // List of nop instructions
5821   nops( MachNop );
5822 %}
5823 
5824 //----------RESOURCES----------------------------------------------------------
5825 // Resources are the functional units available to the machine
5826 
5827 // Generic P2/P3 pipeline
5828 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5829 // 3 instructions decoded per cycle.
5830 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5831 // 3 ALU op, only ALU0 handles mul instructions.
5832 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5833            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5834            BR, FPU,
5835            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5836 
5837 //----------PIPELINE DESCRIPTION-----------------------------------------------
5838 // Pipeline Description specifies the stages in the machine's pipeline
5839 
5840 // Generic P2/P3 pipeline
5841 pipe_desc(S0, S1, S2, S3, S4, S5);
5842 
5843 //----------PIPELINE CLASSES---------------------------------------------------
5844 // Pipeline Classes describe the stages in which input and output are
5845 // referenced by the hardware pipeline.
5846 
5847 // Naming convention: ialu or fpu
5848 // Then: _reg
5849 // Then: _reg if there is a 2nd register
5850 // Then: _long if it's a pair of instructions implementing a long
5851 // Then: _fat if it requires the big decoder
5852 //   Or: _mem if it requires the big decoder and a memory unit.
5853 
5854 // Integer ALU reg operation
5855 pipe_class ialu_reg(rRegI dst)
5856 %{
5857     single_instruction;
5858     dst    : S4(write);
5859     dst    : S3(read);
5860     DECODE : S0;        // any decoder
5861     ALU    : S3;        // any alu
5862 %}
5863 
5864 // Long ALU reg operation
5865 pipe_class ialu_reg_long(rRegL dst)
5866 %{
5867     instruction_count(2);
5868     dst    : S4(write);
5869     dst    : S3(read);
5870     DECODE : S0(2);     // any 2 decoders
5871     ALU    : S3(2);     // both alus
5872 %}
5873 
5874 // Integer ALU reg operation using big decoder
5875 pipe_class ialu_reg_fat(rRegI dst)
5876 %{
5877     single_instruction;
5878     dst    : S4(write);
5879     dst    : S3(read);
5880     D0     : S0;        // big decoder only
5881     ALU    : S3;        // any alu
5882 %}
5883 
5884 // Long ALU reg operation using big decoder
5885 pipe_class ialu_reg_long_fat(rRegL dst)
5886 %{
5887     instruction_count(2);
5888     dst    : S4(write);
5889     dst    : S3(read);
5890     D0     : S0(2);     // big decoder only; twice
5891     ALU    : S3(2);     // any 2 alus
5892 %}
5893 
5894 // Integer ALU reg-reg operation
5895 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5896 %{
5897     single_instruction;
5898     dst    : S4(write);
5899     src    : S3(read);
5900     DECODE : S0;        // any decoder
5901     ALU    : S3;        // any alu
5902 %}
5903 
5904 // Long ALU reg-reg operation
5905 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5906 %{
5907     instruction_count(2);
5908     dst    : S4(write);
5909     src    : S3(read);
5910     DECODE : S0(2);     // any 2 decoders
5911     ALU    : S3(2);     // both alus
5912 %}
5913 
5914 // Integer ALU reg-reg operation
5915 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5916 %{
5917     single_instruction;
5918     dst    : S4(write);
5919     src    : S3(read);
5920     D0     : S0;        // big decoder only
5921     ALU    : S3;        // any alu
5922 %}
5923 
5924 // Long ALU reg-reg operation
5925 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5926 %{
5927     instruction_count(2);
5928     dst    : S4(write);
5929     src    : S3(read);
5930     D0     : S0(2);     // big decoder only; twice
5931     ALU    : S3(2);     // both alus
5932 %}
5933 
5934 // Integer ALU reg-mem operation
5935 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5936 %{
5937     single_instruction;
5938     dst    : S5(write);
5939     mem    : S3(read);
5940     D0     : S0;        // big decoder only
5941     ALU    : S4;        // any alu
5942     MEM    : S3;        // any mem
5943 %}
5944 
5945 // Integer mem operation (prefetch)
5946 pipe_class ialu_mem(memory mem)
5947 %{
5948     single_instruction;
5949     mem    : S3(read);
5950     D0     : S0;        // big decoder only
5951     MEM    : S3;        // any mem
5952 %}
5953 
5954 // Integer Store to Memory
5955 pipe_class ialu_mem_reg(memory mem, rRegI src)
5956 %{
5957     single_instruction;
5958     mem    : S3(read);
5959     src    : S5(read);
5960     D0     : S0;        // big decoder only
5961     ALU    : S4;        // any alu
5962     MEM    : S3;
5963 %}
5964 
5965 // // Long Store to Memory
5966 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5967 // %{
5968 //     instruction_count(2);
5969 //     mem    : S3(read);
5970 //     src    : S5(read);
5971 //     D0     : S0(2);          // big decoder only; twice
5972 //     ALU    : S4(2);     // any 2 alus
5973 //     MEM    : S3(2);  // Both mems
5974 // %}
5975 
5976 // Integer Store to Memory
5977 pipe_class ialu_mem_imm(memory mem)
5978 %{
5979     single_instruction;
5980     mem    : S3(read);
5981     D0     : S0;        // big decoder only
5982     ALU    : S4;        // any alu
5983     MEM    : S3;
5984 %}
5985 
5986 // Integer ALU0 reg-reg operation
5987 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5988 %{
5989     single_instruction;
5990     dst    : S4(write);
5991     src    : S3(read);
5992     D0     : S0;        // Big decoder only
5993     ALU0   : S3;        // only alu0
5994 %}
5995 
5996 // Integer ALU0 reg-mem operation
5997 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5998 %{
5999     single_instruction;
6000     dst    : S5(write);
6001     mem    : S3(read);
6002     D0     : S0;        // big decoder only
6003     ALU0   : S4;        // ALU0 only
6004     MEM    : S3;        // any mem
6005 %}
6006 
6007 // Integer ALU reg-reg operation
6008 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6009 %{
6010     single_instruction;
6011     cr     : S4(write);
6012     src1   : S3(read);
6013     src2   : S3(read);
6014     DECODE : S0;        // any decoder
6015     ALU    : S3;        // any alu
6016 %}
6017 
6018 // Integer ALU reg-imm operation
6019 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6020 %{
6021     single_instruction;
6022     cr     : S4(write);
6023     src1   : S3(read);
6024     DECODE : S0;        // any decoder
6025     ALU    : S3;        // any alu
6026 %}
6027 
6028 // Integer ALU reg-mem operation
6029 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6030 %{
6031     single_instruction;
6032     cr     : S4(write);
6033     src1   : S3(read);
6034     src2   : S3(read);
6035     D0     : S0;        // big decoder only
6036     ALU    : S4;        // any alu
6037     MEM    : S3;
6038 %}
6039 
6040 // Conditional move reg-reg
6041 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6042 %{
6043     instruction_count(4);
6044     y      : S4(read);
6045     q      : S3(read);
6046     p      : S3(read);
6047     DECODE : S0(4);     // any decoder
6048 %}
6049 
6050 // Conditional move reg-reg
6051 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6052 %{
6053     single_instruction;
6054     dst    : S4(write);
6055     src    : S3(read);
6056     cr     : S3(read);
6057     DECODE : S0;        // any decoder
6058 %}
6059 
6060 // Conditional move reg-mem
6061 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6062 %{
6063     single_instruction;
6064     dst    : S4(write);
6065     src    : S3(read);
6066     cr     : S3(read);
6067     DECODE : S0;        // any decoder
6068     MEM    : S3;
6069 %}
6070 
6071 // Conditional move reg-reg long
6072 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6073 %{
6074     single_instruction;
6075     dst    : S4(write);
6076     src    : S3(read);
6077     cr     : S3(read);
6078     DECODE : S0(2);     // any 2 decoders
6079 %}
6080 
6081 // XXX
6082 // // Conditional move double reg-reg
6083 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
6084 // %{
6085 //     single_instruction;
6086 //     dst    : S4(write);
6087 //     src    : S3(read);
6088 //     cr     : S3(read);
6089 //     DECODE : S0;     // any decoder
6090 // %}
6091 
6092 // Float reg-reg operation
6093 pipe_class fpu_reg(regD dst)
6094 %{
6095     instruction_count(2);
6096     dst    : S3(read);
6097     DECODE : S0(2);     // any 2 decoders
6098     FPU    : S3;
6099 %}
6100 
6101 // Float reg-reg operation
6102 pipe_class fpu_reg_reg(regD dst, regD src)
6103 %{
6104     instruction_count(2);
6105     dst    : S4(write);
6106     src    : S3(read);
6107     DECODE : S0(2);     // any 2 decoders
6108     FPU    : S3;
6109 %}
6110 
6111 // Float reg-reg operation
6112 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6113 %{
6114     instruction_count(3);
6115     dst    : S4(write);
6116     src1   : S3(read);
6117     src2   : S3(read);
6118     DECODE : S0(3);     // any 3 decoders
6119     FPU    : S3(2);
6120 %}
6121 
6122 // Float reg-reg operation
6123 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6124 %{
6125     instruction_count(4);
6126     dst    : S4(write);
6127     src1   : S3(read);
6128     src2   : S3(read);
6129     src3   : S3(read);
6130     DECODE : S0(4);     // any 3 decoders
6131     FPU    : S3(2);
6132 %}
6133 
6134 // Float reg-reg operation
6135 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6136 %{
6137     instruction_count(4);
6138     dst    : S4(write);
6139     src1   : S3(read);
6140     src2   : S3(read);
6141     src3   : S3(read);
6142     DECODE : S1(3);     // any 3 decoders
6143     D0     : S0;        // Big decoder only
6144     FPU    : S3(2);
6145     MEM    : S3;
6146 %}
6147 
6148 // Float reg-mem operation
6149 pipe_class fpu_reg_mem(regD dst, memory mem)
6150 %{
6151     instruction_count(2);
6152     dst    : S5(write);
6153     mem    : S3(read);
6154     D0     : S0;        // big decoder only
6155     DECODE : S1;        // any decoder for FPU POP
6156     FPU    : S4;
6157     MEM    : S3;        // any mem
6158 %}
6159 
6160 // Float reg-mem operation
6161 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6162 %{
6163     instruction_count(3);
6164     dst    : S5(write);
6165     src1   : S3(read);
6166     mem    : S3(read);
6167     D0     : S0;        // big decoder only
6168     DECODE : S1(2);     // any decoder for FPU POP
6169     FPU    : S4;
6170     MEM    : S3;        // any mem
6171 %}
6172 
6173 // Float mem-reg operation
6174 pipe_class fpu_mem_reg(memory mem, regD src)
6175 %{
6176     instruction_count(2);
6177     src    : S5(read);
6178     mem    : S3(read);
6179     DECODE : S0;        // any decoder for FPU PUSH
6180     D0     : S1;        // big decoder only
6181     FPU    : S4;
6182     MEM    : S3;        // any mem
6183 %}
6184 
6185 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6186 %{
6187     instruction_count(3);
6188     src1   : S3(read);
6189     src2   : S3(read);
6190     mem    : S3(read);
6191     DECODE : S0(2);     // any decoder for FPU PUSH
6192     D0     : S1;        // big decoder only
6193     FPU    : S4;
6194     MEM    : S3;        // any mem
6195 %}
6196 
6197 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6198 %{
6199     instruction_count(3);
6200     src1   : S3(read);
6201     src2   : S3(read);
6202     mem    : S4(read);
6203     DECODE : S0;        // any decoder for FPU PUSH
6204     D0     : S0(2);     // big decoder only
6205     FPU    : S4;
6206     MEM    : S3(2);     // any mem
6207 %}
6208 
6209 pipe_class fpu_mem_mem(memory dst, memory src1)
6210 %{
6211     instruction_count(2);
6212     src1   : S3(read);
6213     dst    : S4(read);
6214     D0     : S0(2);     // big decoder only
6215     MEM    : S3(2);     // any mem
6216 %}
6217 
6218 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6219 %{
6220     instruction_count(3);
6221     src1   : S3(read);
6222     src2   : S3(read);
6223     dst    : S4(read);
6224     D0     : S0(3);     // big decoder only
6225     FPU    : S4;
6226     MEM    : S3(3);     // any mem
6227 %}
6228 
6229 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6230 %{
6231     instruction_count(3);
6232     src1   : S4(read);
6233     mem    : S4(read);
6234     DECODE : S0;        // any decoder for FPU PUSH
6235     D0     : S0(2);     // big decoder only
6236     FPU    : S4;
6237     MEM    : S3(2);     // any mem
6238 %}
6239 
6240 // Float load constant
6241 pipe_class fpu_reg_con(regD dst)
6242 %{
6243     instruction_count(2);
6244     dst    : S5(write);
6245     D0     : S0;        // big decoder only for the load
6246     DECODE : S1;        // any decoder for FPU POP
6247     FPU    : S4;
6248     MEM    : S3;        // any mem
6249 %}
6250 
6251 // Float load constant
6252 pipe_class fpu_reg_reg_con(regD dst, regD src)
6253 %{
6254     instruction_count(3);
6255     dst    : S5(write);
6256     src    : S3(read);
6257     D0     : S0;        // big decoder only for the load
6258     DECODE : S1(2);     // any decoder for FPU POP
6259     FPU    : S4;
6260     MEM    : S3;        // any mem
6261 %}
6262 
6263 // UnConditional branch
6264 pipe_class pipe_jmp(label labl)
6265 %{
6266     single_instruction;
6267     BR   : S3;
6268 %}
6269 
6270 // Conditional branch
6271 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6272 %{
6273     single_instruction;
6274     cr    : S1(read);
6275     BR    : S3;
6276 %}
6277 
6278 // Allocation idiom
6279 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6280 %{
6281     instruction_count(1); force_serialization;
6282     fixed_latency(6);
6283     heap_ptr : S3(read);
6284     DECODE   : S0(3);
6285     D0       : S2;
6286     MEM      : S3;
6287     ALU      : S3(2);
6288     dst      : S5(write);
6289     BR       : S5;
6290 %}
6291 
6292 // Generic big/slow expanded idiom
6293 pipe_class pipe_slow()
6294 %{
6295     instruction_count(10); multiple_bundles; force_serialization;
6296     fixed_latency(100);
6297     D0  : S0(2);
6298     MEM : S3(2);
6299 %}
6300 
6301 // The real do-nothing guy
6302 pipe_class empty()
6303 %{
6304     instruction_count(0);
6305 %}
6306 
6307 // Define the class for the Nop node
6308 define
6309 %{
6310    MachNop = empty;
6311 %}
6312 
6313 %}
6314 
6315 //----------INSTRUCTIONS-------------------------------------------------------
6316 //
6317 // match      -- States which machine-independent subtree may be replaced
6318 //               by this instruction.
6319 // ins_cost   -- The estimated cost of this instruction is used by instruction
6320 //               selection to identify a minimum cost tree of machine
6321 //               instructions that matches a tree of machine-independent
6322 //               instructions.
6323 // format     -- A string providing the disassembly for this instruction.
6324 //               The value of an instruction's operand may be inserted
6325 //               by referring to it with a '$' prefix.
6326 // opcode     -- Three instruction opcodes may be provided.  These are referred
6327 //               to within an encode class as $primary, $secondary, and $tertiary
6328 //               rrspectively.  The primary opcode is commonly used to
6329 //               indicate the type of machine instruction, while secondary
6330 //               and tertiary are often used for prefix options or addressing
6331 //               modes.
6332 // ins_encode -- A list of encode classes with parameters. The encode class
6333 //               name must have been defined in an 'enc_class' specification
6334 //               in the encode section of the architecture description.
6335 
6336 
6337 //----------Load/Store/Move Instructions---------------------------------------
6338 //----------Load Instructions--------------------------------------------------
6339 
6340 // Load Byte (8 bit signed)
6341 instruct loadB(rRegI dst, memory mem)
6342 %{
6343   match(Set dst (LoadB mem));
6344 
6345   ins_cost(125);
6346   format %{ "movsbl  $dst, $mem\t# byte" %}
6347 
6348   ins_encode %{
6349     __ movsbl($dst$$Register, $mem$$Address);
6350   %}
6351 
6352   ins_pipe(ialu_reg_mem);
6353 %}
6354 
6355 // Load Byte (8 bit signed) into Long Register
6356 instruct loadB2L(rRegL dst, memory mem)
6357 %{
6358   match(Set dst (ConvI2L (LoadB mem)));
6359 
6360   ins_cost(125);
6361   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6362 
6363   ins_encode %{
6364     __ movsbq($dst$$Register, $mem$$Address);
6365   %}
6366 
6367   ins_pipe(ialu_reg_mem);
6368 %}
6369 
6370 // Load Unsigned Byte (8 bit UNsigned)
6371 instruct loadUB(rRegI dst, memory mem)
6372 %{
6373   match(Set dst (LoadUB mem));
6374 
6375   ins_cost(125);
6376   format %{ "movzbl  $dst, $mem\t# ubyte" %}
6377 
6378   ins_encode %{
6379     __ movzbl($dst$$Register, $mem$$Address);
6380   %}
6381 
6382   ins_pipe(ialu_reg_mem);
6383 %}
6384 
6385 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6386 instruct loadUB2L(rRegL dst, memory mem)
6387 %{
6388   match(Set dst (ConvI2L (LoadUB mem)));
6389 
6390   ins_cost(125);
6391   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6392 
6393   ins_encode %{
6394     __ movzbq($dst$$Register, $mem$$Address);
6395   %}
6396 
6397   ins_pipe(ialu_reg_mem);
6398 %}
6399 
6400 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
6401 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
6402   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6403   effect(KILL cr);
6404 
6405   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
6406             "andl    $dst, $mask" %}
6407   ins_encode %{
6408     Register Rdst = $dst$$Register;
6409     __ movzbq(Rdst, $mem$$Address);
6410     __ andl(Rdst, $mask$$constant);
6411   %}
6412   ins_pipe(ialu_reg_mem);
6413 %}
6414 
6415 // Load Short (16 bit signed)
6416 instruct loadS(rRegI dst, memory mem)
6417 %{
6418   match(Set dst (LoadS mem));
6419 
6420   ins_cost(125);
6421   format %{ "movswl $dst, $mem\t# short" %}
6422 
6423   ins_encode %{
6424     __ movswl($dst$$Register, $mem$$Address);
6425   %}
6426 
6427   ins_pipe(ialu_reg_mem);
6428 %}
6429 
6430 // Load Short (16 bit signed) to Byte (8 bit signed)
6431 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6432   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6433 
6434   ins_cost(125);
6435   format %{ "movsbl $dst, $mem\t# short -> byte" %}
6436   ins_encode %{
6437     __ movsbl($dst$$Register, $mem$$Address);
6438   %}
6439   ins_pipe(ialu_reg_mem);
6440 %}
6441 
6442 // Load Short (16 bit signed) into Long Register
6443 instruct loadS2L(rRegL dst, memory mem)
6444 %{
6445   match(Set dst (ConvI2L (LoadS mem)));
6446 
6447   ins_cost(125);
6448   format %{ "movswq $dst, $mem\t# short -> long" %}
6449 
6450   ins_encode %{
6451     __ movswq($dst$$Register, $mem$$Address);
6452   %}
6453 
6454   ins_pipe(ialu_reg_mem);
6455 %}
6456 
6457 // Load Unsigned Short/Char (16 bit UNsigned)
6458 instruct loadUS(rRegI dst, memory mem)
6459 %{
6460   match(Set dst (LoadUS mem));
6461 
6462   ins_cost(125);
6463   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6464 
6465   ins_encode %{
6466     __ movzwl($dst$$Register, $mem$$Address);
6467   %}
6468 
6469   ins_pipe(ialu_reg_mem);
6470 %}
6471 
6472 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6473 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6474   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6475 
6476   ins_cost(125);
6477   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6478   ins_encode %{
6479     __ movsbl($dst$$Register, $mem$$Address);
6480   %}
6481   ins_pipe(ialu_reg_mem);
6482 %}
6483 
6484 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6485 instruct loadUS2L(rRegL dst, memory mem)
6486 %{
6487   match(Set dst (ConvI2L (LoadUS mem)));
6488 
6489   ins_cost(125);
6490   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6491 
6492   ins_encode %{
6493     __ movzwq($dst$$Register, $mem$$Address);
6494   %}
6495 
6496   ins_pipe(ialu_reg_mem);
6497 %}
6498 
6499 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6500 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6501   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6502 
6503   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6504   ins_encode %{
6505     __ movzbq($dst$$Register, $mem$$Address);
6506   %}
6507   ins_pipe(ialu_reg_mem);
6508 %}
6509 
6510 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6511 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6512   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6513   effect(KILL cr);
6514 
6515   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6516             "andl    $dst, $mask" %}
6517   ins_encode %{
6518     Register Rdst = $dst$$Register;
6519     __ movzwq(Rdst, $mem$$Address);
6520     __ andl(Rdst, $mask$$constant);
6521   %}
6522   ins_pipe(ialu_reg_mem);
6523 %}
6524 
6525 // Load Integer
6526 instruct loadI(rRegI dst, memory mem)
6527 %{
6528   match(Set dst (LoadI mem));
6529 
6530   ins_cost(125);
6531   format %{ "movl    $dst, $mem\t# int" %}
6532 
6533   ins_encode %{
6534     __ movl($dst$$Register, $mem$$Address);
6535   %}
6536 
6537   ins_pipe(ialu_reg_mem);
6538 %}
6539 
6540 // Load Integer (32 bit signed) to Byte (8 bit signed)
6541 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6542   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6543 
6544   ins_cost(125);
6545   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6546   ins_encode %{
6547     __ movsbl($dst$$Register, $mem$$Address);
6548   %}
6549   ins_pipe(ialu_reg_mem);
6550 %}
6551 
6552 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6553 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6554   match(Set dst (AndI (LoadI mem) mask));
6555 
6556   ins_cost(125);
6557   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6558   ins_encode %{
6559     __ movzbl($dst$$Register, $mem$$Address);
6560   %}
6561   ins_pipe(ialu_reg_mem);
6562 %}
6563 
6564 // Load Integer (32 bit signed) to Short (16 bit signed)
6565 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6566   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6567 
6568   ins_cost(125);
6569   format %{ "movswl  $dst, $mem\t# int -> short" %}
6570   ins_encode %{
6571     __ movswl($dst$$Register, $mem$$Address);
6572   %}
6573   ins_pipe(ialu_reg_mem);
6574 %}
6575 
6576 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6577 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6578   match(Set dst (AndI (LoadI mem) mask));
6579 
6580   ins_cost(125);
6581   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6582   ins_encode %{
6583     __ movzwl($dst$$Register, $mem$$Address);
6584   %}
6585   ins_pipe(ialu_reg_mem);
6586 %}
6587 
6588 // Load Integer into Long Register
6589 instruct loadI2L(rRegL dst, memory mem)
6590 %{
6591   match(Set dst (ConvI2L (LoadI mem)));
6592 
6593   ins_cost(125);
6594   format %{ "movslq  $dst, $mem\t# int -> long" %}
6595 
6596   ins_encode %{
6597     __ movslq($dst$$Register, $mem$$Address);
6598   %}
6599 
6600   ins_pipe(ialu_reg_mem);
6601 %}
6602 
6603 // Load Integer with mask 0xFF into Long Register
6604 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6605   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6606 
6607   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6608   ins_encode %{
6609     __ movzbq($dst$$Register, $mem$$Address);
6610   %}
6611   ins_pipe(ialu_reg_mem);
6612 %}
6613 
6614 // Load Integer with mask 0xFFFF into Long Register
6615 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6616   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6617 
6618   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6619   ins_encode %{
6620     __ movzwq($dst$$Register, $mem$$Address);
6621   %}
6622   ins_pipe(ialu_reg_mem);
6623 %}
6624 
6625 // Load Integer with a 32-bit mask into Long Register
6626 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6627   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6628   effect(KILL cr);
6629 
6630   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6631             "andl    $dst, $mask" %}
6632   ins_encode %{
6633     Register Rdst = $dst$$Register;
6634     __ movl(Rdst, $mem$$Address);
6635     __ andl(Rdst, $mask$$constant);
6636   %}
6637   ins_pipe(ialu_reg_mem);
6638 %}
6639 
6640 // Load Unsigned Integer into Long Register
6641 instruct loadUI2L(rRegL dst, memory mem)
6642 %{
6643   match(Set dst (LoadUI2L mem));
6644 
6645   ins_cost(125);
6646   format %{ "movl    $dst, $mem\t# uint -> long" %}
6647 
6648   ins_encode %{
6649     __ movl($dst$$Register, $mem$$Address);
6650   %}
6651 
6652   ins_pipe(ialu_reg_mem);
6653 %}
6654 
6655 // Load Long
6656 instruct loadL(rRegL dst, memory mem)
6657 %{
6658   match(Set dst (LoadL mem));
6659 
6660   ins_cost(125);
6661   format %{ "movq    $dst, $mem\t# long" %}
6662 
6663   ins_encode %{
6664     __ movq($dst$$Register, $mem$$Address);
6665   %}
6666 
6667   ins_pipe(ialu_reg_mem); // XXX
6668 %}
6669 
6670 // Load Range
6671 instruct loadRange(rRegI dst, memory mem)
6672 %{
6673   match(Set dst (LoadRange mem));
6674 
6675   ins_cost(125); // XXX
6676   format %{ "movl    $dst, $mem\t# range" %}
6677   opcode(0x8B);
6678   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6679   ins_pipe(ialu_reg_mem);
6680 %}
6681 
6682 // Load Pointer
6683 instruct loadP(rRegP dst, memory mem)
6684 %{
6685   match(Set dst (LoadP mem));
6686 
6687   ins_cost(125); // XXX
6688   format %{ "movq    $dst, $mem\t# ptr" %}
6689   opcode(0x8B);
6690   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6691   ins_pipe(ialu_reg_mem); // XXX
6692 %}
6693 
6694 // Load Compressed Pointer
6695 instruct loadN(rRegN dst, memory mem)
6696 %{
6697    match(Set dst (LoadN mem));
6698 
6699    ins_cost(125); // XXX
6700    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6701    ins_encode %{
6702      __ movl($dst$$Register, $mem$$Address);
6703    %}
6704    ins_pipe(ialu_reg_mem); // XXX
6705 %}
6706 
6707 
6708 // Load Klass Pointer
6709 instruct loadKlass(rRegP dst, memory mem)
6710 %{
6711   match(Set dst (LoadKlass mem));
6712 
6713   ins_cost(125); // XXX
6714   format %{ "movq    $dst, $mem\t# class" %}
6715   opcode(0x8B);
6716   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6717   ins_pipe(ialu_reg_mem); // XXX
6718 %}
6719 
6720 // Load narrow Klass Pointer
6721 instruct loadNKlass(rRegN dst, memory mem)
6722 %{
6723   match(Set dst (LoadNKlass mem));
6724 
6725   ins_cost(125); // XXX
6726   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6727   ins_encode %{
6728     __ movl($dst$$Register, $mem$$Address);
6729   %}
6730   ins_pipe(ialu_reg_mem); // XXX
6731 %}
6732 
6733 // Load Float
6734 instruct loadF(regF dst, memory mem)
6735 %{
6736   match(Set dst (LoadF mem));
6737 
6738   ins_cost(145); // XXX
6739   format %{ "movss   $dst, $mem\t# float" %}
6740   opcode(0xF3, 0x0F, 0x10);
6741   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6742   ins_pipe(pipe_slow); // XXX
6743 %}
6744 
6745 // Load Double
6746 instruct loadD_partial(regD dst, memory mem)
6747 %{
6748   predicate(!UseXmmLoadAndClearUpper);
6749   match(Set dst (LoadD mem));
6750 
6751   ins_cost(145); // XXX
6752   format %{ "movlpd  $dst, $mem\t# double" %}
6753   opcode(0x66, 0x0F, 0x12);
6754   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6755   ins_pipe(pipe_slow); // XXX
6756 %}
6757 
6758 instruct loadD(regD dst, memory mem)
6759 %{
6760   predicate(UseXmmLoadAndClearUpper);
6761   match(Set dst (LoadD mem));
6762 
6763   ins_cost(145); // XXX
6764   format %{ "movsd   $dst, $mem\t# double" %}
6765   opcode(0xF2, 0x0F, 0x10);
6766   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6767   ins_pipe(pipe_slow); // XXX
6768 %}
6769 
6770 // Load Aligned Packed Byte to XMM register
6771 instruct loadA8B(regD dst, memory mem) %{
6772   match(Set dst (Load8B mem));
6773   ins_cost(125);
6774   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6775   ins_encode( movq_ld(dst, mem));
6776   ins_pipe( pipe_slow );
6777 %}
6778 
6779 // Load Aligned Packed Short to XMM register
6780 instruct loadA4S(regD dst, memory mem) %{
6781   match(Set dst (Load4S mem));
6782   ins_cost(125);
6783   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6784   ins_encode( movq_ld(dst, mem));
6785   ins_pipe( pipe_slow );
6786 %}
6787 
6788 // Load Aligned Packed Char to XMM register
6789 instruct loadA4C(regD dst, memory mem) %{
6790   match(Set dst (Load4C mem));
6791   ins_cost(125);
6792   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6793   ins_encode( movq_ld(dst, mem));
6794   ins_pipe( pipe_slow );
6795 %}
6796 
6797 // Load Aligned Packed Integer to XMM register
6798 instruct load2IU(regD dst, memory mem) %{
6799   match(Set dst (Load2I mem));
6800   ins_cost(125);
6801   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6802   ins_encode( movq_ld(dst, mem));
6803   ins_pipe( pipe_slow );
6804 %}
6805 
6806 // Load Aligned Packed Single to XMM
6807 instruct loadA2F(regD dst, memory mem) %{
6808   match(Set dst (Load2F mem));
6809   ins_cost(145);
6810   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6811   ins_encode( movq_ld(dst, mem));
6812   ins_pipe( pipe_slow );
6813 %}
6814 
6815 // Load Effective Address
6816 instruct leaP8(rRegP dst, indOffset8 mem)
6817 %{
6818   match(Set dst mem);
6819 
6820   ins_cost(110); // XXX
6821   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6822   opcode(0x8D);
6823   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6824   ins_pipe(ialu_reg_reg_fat);
6825 %}
6826 
6827 instruct leaP32(rRegP dst, indOffset32 mem)
6828 %{
6829   match(Set dst mem);
6830 
6831   ins_cost(110);
6832   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6833   opcode(0x8D);
6834   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6835   ins_pipe(ialu_reg_reg_fat);
6836 %}
6837 
6838 // instruct leaPIdx(rRegP dst, indIndex mem)
6839 // %{
6840 //   match(Set dst mem);
6841 
6842 //   ins_cost(110);
6843 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6844 //   opcode(0x8D);
6845 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6846 //   ins_pipe(ialu_reg_reg_fat);
6847 // %}
6848 
6849 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6850 %{
6851   match(Set dst mem);
6852 
6853   ins_cost(110);
6854   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6855   opcode(0x8D);
6856   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6857   ins_pipe(ialu_reg_reg_fat);
6858 %}
6859 
6860 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6861 %{
6862   match(Set dst mem);
6863 
6864   ins_cost(110);
6865   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6866   opcode(0x8D);
6867   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6868   ins_pipe(ialu_reg_reg_fat);
6869 %}
6870 
6871 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6872 %{
6873   match(Set dst mem);
6874 
6875   ins_cost(110);
6876   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6877   opcode(0x8D);
6878   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6879   ins_pipe(ialu_reg_reg_fat);
6880 %}
6881 
6882 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6883 %{
6884   match(Set dst mem);
6885 
6886   ins_cost(110);
6887   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6888   opcode(0x8D);
6889   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6890   ins_pipe(ialu_reg_reg_fat);
6891 %}
6892 
6893 // Load Effective Address which uses Narrow (32-bits) oop
6894 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6895 %{
6896   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6897   match(Set dst mem);
6898 
6899   ins_cost(110);
6900   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6901   opcode(0x8D);
6902   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6903   ins_pipe(ialu_reg_reg_fat);
6904 %}
6905 
6906 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6907 %{
6908   predicate(Universe::narrow_oop_shift() == 0);
6909   match(Set dst mem);
6910 
6911   ins_cost(110); // XXX
6912   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6913   opcode(0x8D);
6914   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6915   ins_pipe(ialu_reg_reg_fat);
6916 %}
6917 
6918 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6919 %{
6920   predicate(Universe::narrow_oop_shift() == 0);
6921   match(Set dst mem);
6922 
6923   ins_cost(110);
6924   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6925   opcode(0x8D);
6926   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6927   ins_pipe(ialu_reg_reg_fat);
6928 %}
6929 
6930 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6931 %{
6932   predicate(Universe::narrow_oop_shift() == 0);
6933   match(Set dst mem);
6934 
6935   ins_cost(110);
6936   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6937   opcode(0x8D);
6938   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6939   ins_pipe(ialu_reg_reg_fat);
6940 %}
6941 
6942 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6943 %{
6944   predicate(Universe::narrow_oop_shift() == 0);
6945   match(Set dst mem);
6946 
6947   ins_cost(110);
6948   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6949   opcode(0x8D);
6950   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6951   ins_pipe(ialu_reg_reg_fat);
6952 %}
6953 
6954 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6955 %{
6956   predicate(Universe::narrow_oop_shift() == 0);
6957   match(Set dst mem);
6958 
6959   ins_cost(110);
6960   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6961   opcode(0x8D);
6962   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6963   ins_pipe(ialu_reg_reg_fat);
6964 %}
6965 
6966 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6967 %{
6968   predicate(Universe::narrow_oop_shift() == 0);
6969   match(Set dst mem);
6970 
6971   ins_cost(110);
6972   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6973   opcode(0x8D);
6974   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6975   ins_pipe(ialu_reg_reg_fat);
6976 %}
6977 
6978 instruct loadConI(rRegI dst, immI src)
6979 %{
6980   match(Set dst src);
6981 
6982   format %{ "movl    $dst, $src\t# int" %}
6983   ins_encode(load_immI(dst, src));
6984   ins_pipe(ialu_reg_fat); // XXX
6985 %}
6986 
6987 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6988 %{
6989   match(Set dst src);
6990   effect(KILL cr);
6991 
6992   ins_cost(50);
6993   format %{ "xorl    $dst, $dst\t# int" %}
6994   opcode(0x33); /* + rd */
6995   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6996   ins_pipe(ialu_reg);
6997 %}
6998 
6999 instruct loadConL(rRegL dst, immL src)
7000 %{
7001   match(Set dst src);
7002 
7003   ins_cost(150);
7004   format %{ "movq    $dst, $src\t# long" %}
7005   ins_encode(load_immL(dst, src));
7006   ins_pipe(ialu_reg);
7007 %}
7008 
7009 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7010 %{
7011   match(Set dst src);
7012   effect(KILL cr);
7013 
7014   ins_cost(50);
7015   format %{ "xorl    $dst, $dst\t# long" %}
7016   opcode(0x33); /* + rd */
7017   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
7018   ins_pipe(ialu_reg); // XXX
7019 %}
7020 
7021 instruct loadConUL32(rRegL dst, immUL32 src)
7022 %{
7023   match(Set dst src);
7024 
7025   ins_cost(60);
7026   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
7027   ins_encode(load_immUL32(dst, src));
7028   ins_pipe(ialu_reg);
7029 %}
7030 
7031 instruct loadConL32(rRegL dst, immL32 src)
7032 %{
7033   match(Set dst src);
7034 
7035   ins_cost(70);
7036   format %{ "movq    $dst, $src\t# long (32-bit)" %}
7037   ins_encode(load_immL32(dst, src));
7038   ins_pipe(ialu_reg);
7039 %}
7040 
7041 instruct loadConP(rRegP dst, immP src)
7042 %{
7043   match(Set dst src);
7044 
7045   format %{ "movq    $dst, $src\t# ptr" %}
7046   ins_encode(load_immP(dst, src));
7047   ins_pipe(ialu_reg_fat); // XXX
7048 %}
7049 
7050 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7051 %{
7052   match(Set dst src);
7053   effect(KILL cr);
7054 
7055   ins_cost(50);
7056   format %{ "xorl    $dst, $dst\t# ptr" %}
7057   opcode(0x33); /* + rd */
7058   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
7059   ins_pipe(ialu_reg);
7060 %}
7061 
7062 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7063 %{
7064   match(Set dst src);
7065   effect(KILL cr);
7066 
7067   ins_cost(60);
7068   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
7069   ins_encode(load_immP31(dst, src));
7070   ins_pipe(ialu_reg);
7071 %}
7072 
7073 instruct loadConF(regF dst, immF src)
7074 %{
7075   match(Set dst src);
7076   ins_cost(125);
7077 
7078   format %{ "movss   $dst, [$src]" %}
7079   ins_encode(load_conF(dst, src));
7080   ins_pipe(pipe_slow);
7081 %}
7082 
7083 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7084   match(Set dst src);
7085   effect(KILL cr);
7086   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
7087   ins_encode %{
7088     __ xorq($dst$$Register, $dst$$Register);
7089   %}
7090   ins_pipe(ialu_reg);
7091 %}
7092 
7093 instruct loadConN(rRegN dst, immN src) %{
7094   match(Set dst src);
7095 
7096   ins_cost(125);
7097   format %{ "movl    $dst, $src\t# compressed ptr" %}
7098   ins_encode %{
7099     address con = (address)$src$$constant;
7100     if (con == NULL) {
7101       ShouldNotReachHere();
7102     } else {
7103       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7104     }
7105   %}
7106   ins_pipe(ialu_reg_fat); // XXX
7107 %}
7108 
7109 instruct loadConF0(regF dst, immF0 src)
7110 %{
7111   match(Set dst src);
7112   ins_cost(100);
7113 
7114   format %{ "xorps   $dst, $dst\t# float 0.0" %}
7115   opcode(0x0F, 0x57);
7116   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
7117   ins_pipe(pipe_slow);
7118 %}
7119 
7120 // Use the same format since predicate() can not be used here.
7121 instruct loadConD(regD dst, immD src)
7122 %{
7123   match(Set dst src);
7124   ins_cost(125);
7125 
7126   format %{ "movsd   $dst, [$src]" %}
7127   ins_encode(load_conD(dst, src));
7128   ins_pipe(pipe_slow);
7129 %}
7130 
7131 instruct loadConD0(regD dst, immD0 src)
7132 %{
7133   match(Set dst src);
7134   ins_cost(100);
7135 
7136   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
7137   opcode(0x66, 0x0F, 0x57);
7138   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
7139   ins_pipe(pipe_slow);
7140 %}
7141 
7142 instruct loadSSI(rRegI dst, stackSlotI src)
7143 %{
7144   match(Set dst src);
7145 
7146   ins_cost(125);
7147   format %{ "movl    $dst, $src\t# int stk" %}
7148   opcode(0x8B);
7149   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7150   ins_pipe(ialu_reg_mem);
7151 %}
7152 
7153 instruct loadSSL(rRegL dst, stackSlotL src)
7154 %{
7155   match(Set dst src);
7156 
7157   ins_cost(125);
7158   format %{ "movq    $dst, $src\t# long stk" %}
7159   opcode(0x8B);
7160   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7161   ins_pipe(ialu_reg_mem);
7162 %}
7163 
7164 instruct loadSSP(rRegP dst, stackSlotP src)
7165 %{
7166   match(Set dst src);
7167 
7168   ins_cost(125);
7169   format %{ "movq    $dst, $src\t# ptr stk" %}
7170   opcode(0x8B);
7171   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7172   ins_pipe(ialu_reg_mem);
7173 %}
7174 
7175 instruct loadSSF(regF dst, stackSlotF src)
7176 %{
7177   match(Set dst src);
7178 
7179   ins_cost(125);
7180   format %{ "movss   $dst, $src\t# float stk" %}
7181   opcode(0xF3, 0x0F, 0x10);
7182   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
7183   ins_pipe(pipe_slow); // XXX
7184 %}
7185 
7186 // Use the same format since predicate() can not be used here.
7187 instruct loadSSD(regD dst, stackSlotD src)
7188 %{
7189   match(Set dst src);
7190 
7191   ins_cost(125);
7192   format %{ "movsd   $dst, $src\t# double stk" %}
7193   ins_encode  %{
7194     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7195   %}
7196   ins_pipe(pipe_slow); // XXX
7197 %}
7198 
7199 // Prefetch instructions.
7200 // Must be safe to execute with invalid address (cannot fault).
7201 
7202 instruct prefetchr( memory mem ) %{
7203   predicate(ReadPrefetchInstr==3);
7204   match(PrefetchRead mem);
7205   ins_cost(125);
7206 
7207   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
7208   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
7209   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7210   ins_pipe(ialu_mem);
7211 %}
7212 
7213 instruct prefetchrNTA( memory mem ) %{
7214   predicate(ReadPrefetchInstr==0);
7215   match(PrefetchRead mem);
7216   ins_cost(125);
7217 
7218   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
7219   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7220   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7221   ins_pipe(ialu_mem);
7222 %}
7223 
7224 instruct prefetchrT0( memory mem ) %{
7225   predicate(ReadPrefetchInstr==1);
7226   match(PrefetchRead mem);
7227   ins_cost(125);
7228 
7229   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
7230   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7231   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7232   ins_pipe(ialu_mem);
7233 %}
7234 
7235 instruct prefetchrT2( memory mem ) %{
7236   predicate(ReadPrefetchInstr==2);
7237   match(PrefetchRead mem);
7238   ins_cost(125);
7239 
7240   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
7241   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7242   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
7243   ins_pipe(ialu_mem);
7244 %}
7245 
7246 instruct prefetchw( memory mem ) %{
7247   predicate(AllocatePrefetchInstr==3);
7248   match(PrefetchWrite mem);
7249   ins_cost(125);
7250 
7251   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
7252   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7253   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7254   ins_pipe(ialu_mem);
7255 %}
7256 
7257 instruct prefetchwNTA( memory mem ) %{
7258   predicate(AllocatePrefetchInstr==0);
7259   match(PrefetchWrite mem);
7260   ins_cost(125);
7261 
7262   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
7263   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7264   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7265   ins_pipe(ialu_mem);
7266 %}
7267 
7268 instruct prefetchwT0( memory mem ) %{
7269   predicate(AllocatePrefetchInstr==1);
7270   match(PrefetchWrite mem);
7271   ins_cost(125);
7272 
7273   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
7274   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7275   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7276   ins_pipe(ialu_mem);
7277 %}
7278 
7279 instruct prefetchwT2( memory mem ) %{
7280   predicate(AllocatePrefetchInstr==2);
7281   match(PrefetchWrite mem);
7282   ins_cost(125);
7283 
7284   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
7285   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7286   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
7287   ins_pipe(ialu_mem);
7288 %}
7289 
7290 //----------Store Instructions-------------------------------------------------
7291 
7292 // Store Byte
7293 instruct storeB(memory mem, rRegI src)
7294 %{
7295   match(Set mem (StoreB mem src));
7296 
7297   ins_cost(125); // XXX
7298   format %{ "movb    $mem, $src\t# byte" %}
7299   opcode(0x88);
7300   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
7301   ins_pipe(ialu_mem_reg);
7302 %}
7303 
7304 // Store Char/Short
7305 instruct storeC(memory mem, rRegI src)
7306 %{
7307   match(Set mem (StoreC mem src));
7308 
7309   ins_cost(125); // XXX
7310   format %{ "movw    $mem, $src\t# char/short" %}
7311   opcode(0x89);
7312   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
7313   ins_pipe(ialu_mem_reg);
7314 %}
7315 
7316 // Store Integer
7317 instruct storeI(memory mem, rRegI src)
7318 %{
7319   match(Set mem (StoreI mem src));
7320 
7321   ins_cost(125); // XXX
7322   format %{ "movl    $mem, $src\t# int" %}
7323   opcode(0x89);
7324   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
7325   ins_pipe(ialu_mem_reg);
7326 %}
7327 
7328 // Store Long
7329 instruct storeL(memory mem, rRegL src)
7330 %{
7331   match(Set mem (StoreL mem src));
7332 
7333   ins_cost(125); // XXX
7334   format %{ "movq    $mem, $src\t# long" %}
7335   opcode(0x89);
7336   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
7337   ins_pipe(ialu_mem_reg); // XXX
7338 %}
7339 
7340 // Store Pointer
7341 instruct storeP(memory mem, any_RegP src)
7342 %{
7343   match(Set mem (StoreP mem src));
7344 
7345   ins_cost(125); // XXX
7346   format %{ "movq    $mem, $src\t# ptr" %}
7347   opcode(0x89);
7348   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
7349   ins_pipe(ialu_mem_reg);
7350 %}
7351 
7352 instruct storeImmP0(memory mem, immP0 zero)
7353 %{
7354   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7355   match(Set mem (StoreP mem zero));
7356 
7357   ins_cost(125); // XXX
7358   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
7359   ins_encode %{
7360     __ movq($mem$$Address, r12);
7361   %}
7362   ins_pipe(ialu_mem_reg);
7363 %}
7364 
7365 // Store NULL Pointer, mark word, or other simple pointer constant.
7366 instruct storeImmP(memory mem, immP31 src)
7367 %{
7368   match(Set mem (StoreP mem src));
7369 
7370   ins_cost(150); // XXX
7371   format %{ "movq    $mem, $src\t# ptr" %}
7372   opcode(0xC7); /* C7 /0 */
7373   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7374   ins_pipe(ialu_mem_imm);
7375 %}
7376 
7377 // Store Compressed Pointer
7378 instruct storeN(memory mem, rRegN src)
7379 %{
7380   match(Set mem (StoreN mem src));
7381 
7382   ins_cost(125); // XXX
7383   format %{ "movl    $mem, $src\t# compressed ptr" %}
7384   ins_encode %{
7385     __ movl($mem$$Address, $src$$Register);
7386   %}
7387   ins_pipe(ialu_mem_reg);
7388 %}
7389 
7390 instruct storeImmN0(memory mem, immN0 zero)
7391 %{
7392   predicate(Universe::narrow_oop_base() == NULL);
7393   match(Set mem (StoreN mem zero));
7394 
7395   ins_cost(125); // XXX
7396   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7397   ins_encode %{
7398     __ movl($mem$$Address, r12);
7399   %}
7400   ins_pipe(ialu_mem_reg);
7401 %}
7402 
7403 instruct storeImmN(memory mem, immN src)
7404 %{
7405   match(Set mem (StoreN mem src));
7406 
7407   ins_cost(150); // XXX
7408   format %{ "movl    $mem, $src\t# compressed ptr" %}
7409   ins_encode %{
7410     address con = (address)$src$$constant;
7411     if (con == NULL) {
7412       __ movl($mem$$Address, (int32_t)0);
7413     } else {
7414       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7415     }
7416   %}
7417   ins_pipe(ialu_mem_imm);
7418 %}
7419 
7420 // Store Integer Immediate
7421 instruct storeImmI0(memory mem, immI0 zero)
7422 %{
7423   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7424   match(Set mem (StoreI mem zero));
7425 
7426   ins_cost(125); // XXX
7427   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7428   ins_encode %{
7429     __ movl($mem$$Address, r12);
7430   %}
7431   ins_pipe(ialu_mem_reg);
7432 %}
7433 
7434 instruct storeImmI(memory mem, immI src)
7435 %{
7436   match(Set mem (StoreI mem src));
7437 
7438   ins_cost(150);
7439   format %{ "movl    $mem, $src\t# int" %}
7440   opcode(0xC7); /* C7 /0 */
7441   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7442   ins_pipe(ialu_mem_imm);
7443 %}
7444 
7445 // Store Long Immediate
7446 instruct storeImmL0(memory mem, immL0 zero)
7447 %{
7448   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7449   match(Set mem (StoreL mem zero));
7450 
7451   ins_cost(125); // XXX
7452   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7453   ins_encode %{
7454     __ movq($mem$$Address, r12);
7455   %}
7456   ins_pipe(ialu_mem_reg);
7457 %}
7458 
7459 instruct storeImmL(memory mem, immL32 src)
7460 %{
7461   match(Set mem (StoreL mem src));
7462 
7463   ins_cost(150);
7464   format %{ "movq    $mem, $src\t# long" %}
7465   opcode(0xC7); /* C7 /0 */
7466   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7467   ins_pipe(ialu_mem_imm);
7468 %}
7469 
7470 // Store Short/Char Immediate
7471 instruct storeImmC0(memory mem, immI0 zero)
7472 %{
7473   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7474   match(Set mem (StoreC mem zero));
7475 
7476   ins_cost(125); // XXX
7477   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7478   ins_encode %{
7479     __ movw($mem$$Address, r12);
7480   %}
7481   ins_pipe(ialu_mem_reg);
7482 %}
7483 
7484 instruct storeImmI16(memory mem, immI16 src)
7485 %{
7486   predicate(UseStoreImmI16);
7487   match(Set mem (StoreC mem src));
7488 
7489   ins_cost(150);
7490   format %{ "movw    $mem, $src\t# short/char" %}
7491   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7492   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7493   ins_pipe(ialu_mem_imm);
7494 %}
7495 
7496 // Store Byte Immediate
7497 instruct storeImmB0(memory mem, immI0 zero)
7498 %{
7499   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7500   match(Set mem (StoreB mem zero));
7501 
7502   ins_cost(125); // XXX
7503   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7504   ins_encode %{
7505     __ movb($mem$$Address, r12);
7506   %}
7507   ins_pipe(ialu_mem_reg);
7508 %}
7509 
7510 instruct storeImmB(memory mem, immI8 src)
7511 %{
7512   match(Set mem (StoreB mem src));
7513 
7514   ins_cost(150); // XXX
7515   format %{ "movb    $mem, $src\t# byte" %}
7516   opcode(0xC6); /* C6 /0 */
7517   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7518   ins_pipe(ialu_mem_imm);
7519 %}
7520 
7521 // Store Aligned Packed Byte XMM register to memory
7522 instruct storeA8B(memory mem, regD src) %{
7523   match(Set mem (Store8B mem src));
7524   ins_cost(145);
7525   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7526   ins_encode( movq_st(mem, src));
7527   ins_pipe( pipe_slow );
7528 %}
7529 
7530 // Store Aligned Packed Char/Short XMM register to memory
7531 instruct storeA4C(memory mem, regD src) %{
7532   match(Set mem (Store4C mem src));
7533   ins_cost(145);
7534   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7535   ins_encode( movq_st(mem, src));
7536   ins_pipe( pipe_slow );
7537 %}
7538 
7539 // Store Aligned Packed Integer XMM register to memory
7540 instruct storeA2I(memory mem, regD src) %{
7541   match(Set mem (Store2I mem src));
7542   ins_cost(145);
7543   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7544   ins_encode( movq_st(mem, src));
7545   ins_pipe( pipe_slow );
7546 %}
7547 
7548 // Store CMS card-mark Immediate
7549 instruct storeImmCM0_reg(memory mem, immI0 zero)
7550 %{
7551   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7552   match(Set mem (StoreCM mem zero));
7553 
7554   ins_cost(125); // XXX
7555   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7556   ins_encode %{
7557     __ movb($mem$$Address, r12);
7558   %}
7559   ins_pipe(ialu_mem_reg);
7560 %}
7561 
7562 instruct storeImmCM0(memory mem, immI0 src)
7563 %{
7564   match(Set mem (StoreCM mem src));
7565 
7566   ins_cost(150); // XXX
7567   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7568   opcode(0xC6); /* C6 /0 */
7569   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7570   ins_pipe(ialu_mem_imm);
7571 %}
7572 
7573 // Store Aligned Packed Single Float XMM register to memory
7574 instruct storeA2F(memory mem, regD src) %{
7575   match(Set mem (Store2F mem src));
7576   ins_cost(145);
7577   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7578   ins_encode( movq_st(mem, src));
7579   ins_pipe( pipe_slow );
7580 %}
7581 
7582 // Store Float
7583 instruct storeF(memory mem, regF src)
7584 %{
7585   match(Set mem (StoreF mem src));
7586 
7587   ins_cost(95); // XXX
7588   format %{ "movss   $mem, $src\t# float" %}
7589   opcode(0xF3, 0x0F, 0x11);
7590   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7591   ins_pipe(pipe_slow); // XXX
7592 %}
7593 
7594 // Store immediate Float value (it is faster than store from XMM register)
7595 instruct storeF0(memory mem, immF0 zero)
7596 %{
7597   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7598   match(Set mem (StoreF mem zero));
7599 
7600   ins_cost(25); // XXX
7601   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7602   ins_encode %{
7603     __ movl($mem$$Address, r12);
7604   %}
7605   ins_pipe(ialu_mem_reg);
7606 %}
7607 
7608 instruct storeF_imm(memory mem, immF src)
7609 %{
7610   match(Set mem (StoreF mem src));
7611 
7612   ins_cost(50);
7613   format %{ "movl    $mem, $src\t# float" %}
7614   opcode(0xC7); /* C7 /0 */
7615   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7616   ins_pipe(ialu_mem_imm);
7617 %}
7618 
7619 // Store Double
7620 instruct storeD(memory mem, regD src)
7621 %{
7622   match(Set mem (StoreD mem src));
7623 
7624   ins_cost(95); // XXX
7625   format %{ "movsd   $mem, $src\t# double" %}
7626   opcode(0xF2, 0x0F, 0x11);
7627   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7628   ins_pipe(pipe_slow); // XXX
7629 %}
7630 
7631 // Store immediate double 0.0 (it is faster than store from XMM register)
7632 instruct storeD0_imm(memory mem, immD0 src)
7633 %{
7634   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7635   match(Set mem (StoreD mem src));
7636 
7637   ins_cost(50);
7638   format %{ "movq    $mem, $src\t# double 0." %}
7639   opcode(0xC7); /* C7 /0 */
7640   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7641   ins_pipe(ialu_mem_imm);
7642 %}
7643 
7644 instruct storeD0(memory mem, immD0 zero)
7645 %{
7646   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7647   match(Set mem (StoreD mem zero));
7648 
7649   ins_cost(25); // XXX
7650   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7651   ins_encode %{
7652     __ movq($mem$$Address, r12);
7653   %}
7654   ins_pipe(ialu_mem_reg);
7655 %}
7656 
7657 instruct storeSSI(stackSlotI dst, rRegI src)
7658 %{
7659   match(Set dst src);
7660 
7661   ins_cost(100);
7662   format %{ "movl    $dst, $src\t# int stk" %}
7663   opcode(0x89);
7664   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7665   ins_pipe( ialu_mem_reg );
7666 %}
7667 
7668 instruct storeSSL(stackSlotL dst, rRegL src)
7669 %{
7670   match(Set dst src);
7671 
7672   ins_cost(100);
7673   format %{ "movq    $dst, $src\t# long stk" %}
7674   opcode(0x89);
7675   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7676   ins_pipe(ialu_mem_reg);
7677 %}
7678 
7679 instruct storeSSP(stackSlotP dst, rRegP src)
7680 %{
7681   match(Set dst src);
7682 
7683   ins_cost(100);
7684   format %{ "movq    $dst, $src\t# ptr stk" %}
7685   opcode(0x89);
7686   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7687   ins_pipe(ialu_mem_reg);
7688 %}
7689 
7690 instruct storeSSF(stackSlotF dst, regF src)
7691 %{
7692   match(Set dst src);
7693 
7694   ins_cost(95); // XXX
7695   format %{ "movss   $dst, $src\t# float stk" %}
7696   opcode(0xF3, 0x0F, 0x11);
7697   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7698   ins_pipe(pipe_slow); // XXX
7699 %}
7700 
7701 instruct storeSSD(stackSlotD dst, regD src)
7702 %{
7703   match(Set dst src);
7704 
7705   ins_cost(95); // XXX
7706   format %{ "movsd   $dst, $src\t# double stk" %}
7707   opcode(0xF2, 0x0F, 0x11);
7708   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7709   ins_pipe(pipe_slow); // XXX
7710 %}
7711 
7712 //----------BSWAP Instructions-------------------------------------------------
7713 instruct bytes_reverse_int(rRegI dst) %{
7714   match(Set dst (ReverseBytesI dst));
7715 
7716   format %{ "bswapl  $dst" %}
7717   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7718   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7719   ins_pipe( ialu_reg );
7720 %}
7721 
7722 instruct bytes_reverse_long(rRegL dst) %{
7723   match(Set dst (ReverseBytesL dst));
7724 
7725   format %{ "bswapq  $dst" %}
7726 
7727   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7728   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7729   ins_pipe( ialu_reg);
7730 %}
7731 
7732 instruct loadI_reversed(rRegI dst, memory src) %{
7733   match(Set dst (ReverseBytesI (LoadI src)));
7734 
7735   format %{ "bswap_movl $dst, $src" %}
7736   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7737   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7738   ins_pipe( ialu_reg_mem );
7739 %}
7740 
7741 instruct loadL_reversed(rRegL dst, memory src) %{
7742   match(Set dst (ReverseBytesL (LoadL src)));
7743 
7744   format %{ "bswap_movq $dst, $src" %}
7745   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7746   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7747   ins_pipe( ialu_reg_mem );
7748 %}
7749 
7750 instruct storeI_reversed(memory dst, rRegI src) %{
7751   match(Set dst (StoreI dst (ReverseBytesI  src)));
7752 
7753   format %{ "movl_bswap $dst, $src" %}
7754   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7755   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7756   ins_pipe( ialu_mem_reg );
7757 %}
7758 
7759 instruct storeL_reversed(memory dst, rRegL src) %{
7760   match(Set dst (StoreL dst (ReverseBytesL  src)));
7761 
7762   format %{ "movq_bswap $dst, $src" %}
7763   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7764   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7765   ins_pipe( ialu_mem_reg );
7766 %}
7767 
7768 
7769 //---------- Zeros Count Instructions ------------------------------------------
7770 
7771 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7772   predicate(UseCountLeadingZerosInstruction);
7773   match(Set dst (CountLeadingZerosI src));
7774   effect(KILL cr);
7775 
7776   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7777   ins_encode %{
7778     __ lzcntl($dst$$Register, $src$$Register);
7779   %}
7780   ins_pipe(ialu_reg);
7781 %}
7782 
7783 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7784   predicate(!UseCountLeadingZerosInstruction);
7785   match(Set dst (CountLeadingZerosI src));
7786   effect(KILL cr);
7787 
7788   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7789             "jnz     skip\n\t"
7790             "movl    $dst, -1\n"
7791       "skip:\n\t"
7792             "negl    $dst\n\t"
7793             "addl    $dst, 31" %}
7794   ins_encode %{
7795     Register Rdst = $dst$$Register;
7796     Register Rsrc = $src$$Register;
7797     Label skip;
7798     __ bsrl(Rdst, Rsrc);
7799     __ jccb(Assembler::notZero, skip);
7800     __ movl(Rdst, -1);
7801     __ bind(skip);
7802     __ negl(Rdst);
7803     __ addl(Rdst, BitsPerInt - 1);
7804   %}
7805   ins_pipe(ialu_reg);
7806 %}
7807 
7808 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7809   predicate(UseCountLeadingZerosInstruction);
7810   match(Set dst (CountLeadingZerosL src));
7811   effect(KILL cr);
7812 
7813   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7814   ins_encode %{
7815     __ lzcntq($dst$$Register, $src$$Register);
7816   %}
7817   ins_pipe(ialu_reg);
7818 %}
7819 
7820 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7821   predicate(!UseCountLeadingZerosInstruction);
7822   match(Set dst (CountLeadingZerosL src));
7823   effect(KILL cr);
7824 
7825   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7826             "jnz     skip\n\t"
7827             "movl    $dst, -1\n"
7828       "skip:\n\t"
7829             "negl    $dst\n\t"
7830             "addl    $dst, 63" %}
7831   ins_encode %{
7832     Register Rdst = $dst$$Register;
7833     Register Rsrc = $src$$Register;
7834     Label skip;
7835     __ bsrq(Rdst, Rsrc);
7836     __ jccb(Assembler::notZero, skip);
7837     __ movl(Rdst, -1);
7838     __ bind(skip);
7839     __ negl(Rdst);
7840     __ addl(Rdst, BitsPerLong - 1);
7841   %}
7842   ins_pipe(ialu_reg);
7843 %}
7844 
7845 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7846   match(Set dst (CountTrailingZerosI src));
7847   effect(KILL cr);
7848 
7849   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7850             "jnz     done\n\t"
7851             "movl    $dst, 32\n"
7852       "done:" %}
7853   ins_encode %{
7854     Register Rdst = $dst$$Register;
7855     Label done;
7856     __ bsfl(Rdst, $src$$Register);
7857     __ jccb(Assembler::notZero, done);
7858     __ movl(Rdst, BitsPerInt);
7859     __ bind(done);
7860   %}
7861   ins_pipe(ialu_reg);
7862 %}
7863 
7864 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7865   match(Set dst (CountTrailingZerosL src));
7866   effect(KILL cr);
7867 
7868   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7869             "jnz     done\n\t"
7870             "movl    $dst, 64\n"
7871       "done:" %}
7872   ins_encode %{
7873     Register Rdst = $dst$$Register;
7874     Label done;
7875     __ bsfq(Rdst, $src$$Register);
7876     __ jccb(Assembler::notZero, done);
7877     __ movl(Rdst, BitsPerLong);
7878     __ bind(done);
7879   %}
7880   ins_pipe(ialu_reg);
7881 %}
7882 
7883 
7884 //---------- Population Count Instructions -------------------------------------
7885 
7886 instruct popCountI(rRegI dst, rRegI src) %{
7887   predicate(UsePopCountInstruction);
7888   match(Set dst (PopCountI src));
7889 
7890   format %{ "popcnt  $dst, $src" %}
7891   ins_encode %{
7892     __ popcntl($dst$$Register, $src$$Register);
7893   %}
7894   ins_pipe(ialu_reg);
7895 %}
7896 
7897 instruct popCountI_mem(rRegI dst, memory mem) %{
7898   predicate(UsePopCountInstruction);
7899   match(Set dst (PopCountI (LoadI mem)));
7900 
7901   format %{ "popcnt  $dst, $mem" %}
7902   ins_encode %{
7903     __ popcntl($dst$$Register, $mem$$Address);
7904   %}
7905   ins_pipe(ialu_reg);
7906 %}
7907 
7908 // Note: Long.bitCount(long) returns an int.
7909 instruct popCountL(rRegI dst, rRegL src) %{
7910   predicate(UsePopCountInstruction);
7911   match(Set dst (PopCountL src));
7912 
7913   format %{ "popcnt  $dst, $src" %}
7914   ins_encode %{
7915     __ popcntq($dst$$Register, $src$$Register);
7916   %}
7917   ins_pipe(ialu_reg);
7918 %}
7919 
7920 // Note: Long.bitCount(long) returns an int.
7921 instruct popCountL_mem(rRegI dst, memory mem) %{
7922   predicate(UsePopCountInstruction);
7923   match(Set dst (PopCountL (LoadL mem)));
7924 
7925   format %{ "popcnt  $dst, $mem" %}
7926   ins_encode %{
7927     __ popcntq($dst$$Register, $mem$$Address);
7928   %}
7929   ins_pipe(ialu_reg);
7930 %}
7931 
7932 
7933 //----------MemBar Instructions-----------------------------------------------
7934 // Memory barrier flavors
7935 
7936 instruct membar_acquire()
7937 %{
7938   match(MemBarAcquire);
7939   ins_cost(0);
7940 
7941   size(0);
7942   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7943   ins_encode();
7944   ins_pipe(empty);
7945 %}
7946 
7947 instruct membar_acquire_lock()
7948 %{
7949   match(MemBarAcquire);
7950   predicate(Matcher::prior_fast_lock(n));
7951   ins_cost(0);
7952 
7953   size(0);
7954   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7955   ins_encode();
7956   ins_pipe(empty);
7957 %}
7958 
7959 instruct membar_release()
7960 %{
7961   match(MemBarRelease);
7962   ins_cost(0);
7963 
7964   size(0);
7965   format %{ "MEMBAR-release ! (empty encoding)" %}
7966   ins_encode();
7967   ins_pipe(empty);
7968 %}
7969 
7970 instruct membar_release_lock()
7971 %{
7972   match(MemBarRelease);
7973   predicate(Matcher::post_fast_unlock(n));
7974   ins_cost(0);
7975 
7976   size(0);
7977   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7978   ins_encode();
7979   ins_pipe(empty);
7980 %}
7981 
7982 instruct membar_volatile(rFlagsReg cr) %{
7983   match(MemBarVolatile);
7984   effect(KILL cr);
7985   ins_cost(400);
7986 
7987   format %{ 
7988     $$template
7989     if (os::is_MP()) {
7990       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7991     } else {
7992       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7993     }
7994   %}
7995   ins_encode %{
7996     __ membar(Assembler::StoreLoad);
7997   %}
7998   ins_pipe(pipe_slow);
7999 %}
8000 
8001 instruct unnecessary_membar_volatile()
8002 %{
8003   match(MemBarVolatile);
8004   predicate(Matcher::post_store_load_barrier(n));
8005   ins_cost(0);
8006 
8007   size(0);
8008   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8009   ins_encode();
8010   ins_pipe(empty);
8011 %}
8012 
8013 //----------Move Instructions--------------------------------------------------
8014 
8015 instruct castX2P(rRegP dst, rRegL src)
8016 %{
8017   match(Set dst (CastX2P src));
8018 
8019   format %{ "movq    $dst, $src\t# long->ptr" %}
8020   ins_encode(enc_copy_wide(dst, src));
8021   ins_pipe(ialu_reg_reg); // XXX
8022 %}
8023 
8024 instruct castP2X(rRegL dst, rRegP src)
8025 %{
8026   match(Set dst (CastP2X src));
8027 
8028   format %{ "movq    $dst, $src\t# ptr -> long" %}
8029   ins_encode(enc_copy_wide(dst, src));
8030   ins_pipe(ialu_reg_reg); // XXX
8031 %}
8032 
8033 
8034 // Convert oop pointer into compressed form
8035 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8036   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8037   match(Set dst (EncodeP src));
8038   effect(KILL cr);
8039   format %{ "encode_heap_oop $dst,$src" %}
8040   ins_encode %{
8041     Register s = $src$$Register;
8042     Register d = $dst$$Register;
8043     if (s != d) {
8044       __ movq(d, s);
8045     }
8046     __ encode_heap_oop(d);
8047   %}
8048   ins_pipe(ialu_reg_long);
8049 %}
8050 
8051 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8052   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8053   match(Set dst (EncodeP src));
8054   effect(KILL cr);
8055   format %{ "encode_heap_oop_not_null $dst,$src" %}
8056   ins_encode %{
8057     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8058   %}
8059   ins_pipe(ialu_reg_long);
8060 %}
8061 
8062 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8063   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
8064             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
8065   match(Set dst (DecodeN src));
8066   effect(KILL cr);
8067   format %{ "decode_heap_oop $dst,$src" %}
8068   ins_encode %{
8069     Register s = $src$$Register;
8070     Register d = $dst$$Register;
8071     if (s != d) {
8072       __ movq(d, s);
8073     }
8074     __ decode_heap_oop(d);
8075   %}
8076   ins_pipe(ialu_reg_long);
8077 %}
8078 
8079 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
8080   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
8081             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
8082   match(Set dst (DecodeN src));
8083   format %{ "decode_heap_oop_not_null $dst,$src" %}
8084   ins_encode %{
8085     Register s = $src$$Register;
8086     Register d = $dst$$Register;
8087     if (s != d) {
8088       __ decode_heap_oop_not_null(d, s);
8089     } else {
8090       __ decode_heap_oop_not_null(d);
8091     }
8092   %}
8093   ins_pipe(ialu_reg_long);
8094 %}
8095 
8096 
8097 //----------Conditional Move---------------------------------------------------
8098 // Jump
8099 // dummy instruction for generating temp registers
8100 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8101   match(Jump (LShiftL switch_val shift));
8102   ins_cost(350);
8103   predicate(false);
8104   effect(TEMP dest);
8105 
8106   format %{ "leaq    $dest, table_base\n\t"
8107             "jmp     [$dest + $switch_val << $shift]\n\t" %}
8108   ins_encode(jump_enc_offset(switch_val, shift, dest));
8109   ins_pipe(pipe_jmp);
8110   ins_pc_relative(1);
8111 %}
8112 
8113 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8114   match(Jump (AddL (LShiftL switch_val shift) offset));
8115   ins_cost(350);
8116   effect(TEMP dest);
8117 
8118   format %{ "leaq    $dest, table_base\n\t"
8119             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
8120   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
8121   ins_pipe(pipe_jmp);
8122   ins_pc_relative(1);
8123 %}
8124 
8125 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
8126   match(Jump switch_val);
8127   ins_cost(350);
8128   effect(TEMP dest);
8129 
8130   format %{ "leaq    $dest, table_base\n\t"
8131             "jmp     [$dest + $switch_val]\n\t" %}
8132   ins_encode(jump_enc(switch_val, dest));
8133   ins_pipe(pipe_jmp);
8134   ins_pc_relative(1);
8135 %}
8136 
8137 // Conditional move
8138 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
8139 %{
8140   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8141 
8142   ins_cost(200); // XXX
8143   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8144   opcode(0x0F, 0x40);
8145   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8146   ins_pipe(pipe_cmov_reg);
8147 %}
8148 
8149 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
8150   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8151 
8152   ins_cost(200); // XXX
8153   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
8154   opcode(0x0F, 0x40);
8155   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8156   ins_pipe(pipe_cmov_reg);
8157 %}
8158 
8159 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
8160   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8161   ins_cost(200);
8162   expand %{
8163     cmovI_regU(cop, cr, dst, src);
8164   %}
8165 %}
8166 
8167 // Conditional move
8168 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
8169   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8170 
8171   ins_cost(250); // XXX
8172   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8173   opcode(0x0F, 0x40);
8174   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
8175   ins_pipe(pipe_cmov_mem);
8176 %}
8177 
8178 // Conditional move
8179 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
8180 %{
8181   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8182 
8183   ins_cost(250); // XXX
8184   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
8185   opcode(0x0F, 0x40);
8186   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
8187   ins_pipe(pipe_cmov_mem);
8188 %}
8189 
8190 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
8191   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8192   ins_cost(250);
8193   expand %{
8194     cmovI_memU(cop, cr, dst, src);
8195   %}
8196 %}
8197 
8198 // Conditional move
8199 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
8200 %{
8201   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8202 
8203   ins_cost(200); // XXX
8204   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
8205   opcode(0x0F, 0x40);
8206   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8207   ins_pipe(pipe_cmov_reg);
8208 %}
8209 
8210 // Conditional move
8211 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
8212 %{
8213   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8214 
8215   ins_cost(200); // XXX
8216   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
8217   opcode(0x0F, 0x40);
8218   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8219   ins_pipe(pipe_cmov_reg);
8220 %}
8221 
8222 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
8223   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8224   ins_cost(200);
8225   expand %{
8226     cmovN_regU(cop, cr, dst, src);
8227   %}
8228 %}
8229 
8230 // Conditional move
8231 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
8232 %{
8233   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8234 
8235   ins_cost(200); // XXX
8236   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
8237   opcode(0x0F, 0x40);
8238   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8239   ins_pipe(pipe_cmov_reg);  // XXX
8240 %}
8241 
8242 // Conditional move
8243 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
8244 %{
8245   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8246 
8247   ins_cost(200); // XXX
8248   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
8249   opcode(0x0F, 0x40);
8250   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8251   ins_pipe(pipe_cmov_reg); // XXX
8252 %}
8253 
8254 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
8255   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8256   ins_cost(200);
8257   expand %{
8258     cmovP_regU(cop, cr, dst, src);
8259   %}
8260 %}
8261 
8262 // DISABLED: Requires the ADLC to emit a bottom_type call that
8263 // correctly meets the two pointer arguments; one is an incoming
8264 // register but the other is a memory operand.  ALSO appears to
8265 // be buggy with implicit null checks.
8266 //
8267 //// Conditional move
8268 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
8269 //%{
8270 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8271 //  ins_cost(250);
8272 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8273 //  opcode(0x0F,0x40);
8274 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
8275 //  ins_pipe( pipe_cmov_mem );
8276 //%}
8277 //
8278 //// Conditional move
8279 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
8280 //%{
8281 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8282 //  ins_cost(250);
8283 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8284 //  opcode(0x0F,0x40);
8285 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
8286 //  ins_pipe( pipe_cmov_mem );
8287 //%}
8288 
8289 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
8290 %{
8291   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8292 
8293   ins_cost(200); // XXX
8294   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
8295   opcode(0x0F, 0x40);
8296   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8297   ins_pipe(pipe_cmov_reg);  // XXX
8298 %}
8299 
8300 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
8301 %{
8302   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8303 
8304   ins_cost(200); // XXX
8305   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
8306   opcode(0x0F, 0x40);
8307   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8308   ins_pipe(pipe_cmov_mem);  // XXX
8309 %}
8310 
8311 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
8312 %{
8313   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8314 
8315   ins_cost(200); // XXX
8316   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8317   opcode(0x0F, 0x40);
8318   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8319   ins_pipe(pipe_cmov_reg); // XXX
8320 %}
8321 
8322 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
8323   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8324   ins_cost(200);
8325   expand %{
8326     cmovL_regU(cop, cr, dst, src);
8327   %}
8328 %}
8329 
8330 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
8331 %{
8332   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8333 
8334   ins_cost(200); // XXX
8335   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8336   opcode(0x0F, 0x40);
8337   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8338   ins_pipe(pipe_cmov_mem); // XXX
8339 %}
8340 
8341 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
8342   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8343   ins_cost(200);
8344   expand %{
8345     cmovL_memU(cop, cr, dst, src);
8346   %}
8347 %}
8348 
8349 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
8350 %{
8351   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8352 
8353   ins_cost(200); // XXX
8354   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8355             "movss     $dst, $src\n"
8356     "skip:" %}
8357   ins_encode(enc_cmovf_branch(cop, dst, src));
8358   ins_pipe(pipe_slow);
8359 %}
8360 
8361 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
8362 // %{
8363 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
8364 
8365 //   ins_cost(200); // XXX
8366 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8367 //             "movss     $dst, $src\n"
8368 //     "skip:" %}
8369 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
8370 //   ins_pipe(pipe_slow);
8371 // %}
8372 
8373 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
8374 %{
8375   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8376 
8377   ins_cost(200); // XXX
8378   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
8379             "movss     $dst, $src\n"
8380     "skip:" %}
8381   ins_encode(enc_cmovf_branch(cop, dst, src));
8382   ins_pipe(pipe_slow);
8383 %}
8384 
8385 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
8386   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8387   ins_cost(200);
8388   expand %{
8389     cmovF_regU(cop, cr, dst, src);
8390   %}
8391 %}
8392 
8393 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
8394 %{
8395   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8396 
8397   ins_cost(200); // XXX
8398   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8399             "movsd     $dst, $src\n"
8400     "skip:" %}
8401   ins_encode(enc_cmovd_branch(cop, dst, src));
8402   ins_pipe(pipe_slow);
8403 %}
8404 
8405 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8406 %{
8407   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8408 
8409   ins_cost(200); // XXX
8410   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8411             "movsd     $dst, $src\n"
8412     "skip:" %}
8413   ins_encode(enc_cmovd_branch(cop, dst, src));
8414   ins_pipe(pipe_slow);
8415 %}
8416 
8417 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8418   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8419   ins_cost(200);
8420   expand %{
8421     cmovD_regU(cop, cr, dst, src);
8422   %}
8423 %}
8424 
8425 //----------Arithmetic Instructions--------------------------------------------
8426 //----------Addition Instructions----------------------------------------------
8427 
8428 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8429 %{
8430   match(Set dst (AddI dst src));
8431   effect(KILL cr);
8432 
8433   format %{ "addl    $dst, $src\t# int" %}
8434   opcode(0x03);
8435   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8436   ins_pipe(ialu_reg_reg);
8437 %}
8438 
8439 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8440 %{
8441   match(Set dst (AddI dst src));
8442   effect(KILL cr);
8443 
8444   format %{ "addl    $dst, $src\t# int" %}
8445   opcode(0x81, 0x00); /* /0 id */
8446   ins_encode(OpcSErm(dst, src), Con8or32(src));
8447   ins_pipe( ialu_reg );
8448 %}
8449 
8450 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8451 %{
8452   match(Set dst (AddI dst (LoadI src)));
8453   effect(KILL cr);
8454 
8455   ins_cost(125); // XXX
8456   format %{ "addl    $dst, $src\t# int" %}
8457   opcode(0x03);
8458   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8459   ins_pipe(ialu_reg_mem);
8460 %}
8461 
8462 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8463 %{
8464   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8465   effect(KILL cr);
8466 
8467   ins_cost(150); // XXX
8468   format %{ "addl    $dst, $src\t# int" %}
8469   opcode(0x01); /* Opcode 01 /r */
8470   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8471   ins_pipe(ialu_mem_reg);
8472 %}
8473 
8474 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8475 %{
8476   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8477   effect(KILL cr);
8478 
8479   ins_cost(125); // XXX
8480   format %{ "addl    $dst, $src\t# int" %}
8481   opcode(0x81); /* Opcode 81 /0 id */
8482   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8483   ins_pipe(ialu_mem_imm);
8484 %}
8485 
8486 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8487 %{
8488   predicate(UseIncDec);
8489   match(Set dst (AddI dst src));
8490   effect(KILL cr);
8491 
8492   format %{ "incl    $dst\t# int" %}
8493   opcode(0xFF, 0x00); // FF /0
8494   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8495   ins_pipe(ialu_reg);
8496 %}
8497 
8498 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8499 %{
8500   predicate(UseIncDec);
8501   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8502   effect(KILL cr);
8503 
8504   ins_cost(125); // XXX
8505   format %{ "incl    $dst\t# int" %}
8506   opcode(0xFF); /* Opcode FF /0 */
8507   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8508   ins_pipe(ialu_mem_imm);
8509 %}
8510 
8511 // XXX why does that use AddI
8512 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8513 %{
8514   predicate(UseIncDec);
8515   match(Set dst (AddI dst src));
8516   effect(KILL cr);
8517 
8518   format %{ "decl    $dst\t# int" %}
8519   opcode(0xFF, 0x01); // FF /1
8520   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8521   ins_pipe(ialu_reg);
8522 %}
8523 
8524 // XXX why does that use AddI
8525 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8526 %{
8527   predicate(UseIncDec);
8528   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8529   effect(KILL cr);
8530 
8531   ins_cost(125); // XXX
8532   format %{ "decl    $dst\t# int" %}
8533   opcode(0xFF); /* Opcode FF /1 */
8534   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8535   ins_pipe(ialu_mem_imm);
8536 %}
8537 
8538 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8539 %{
8540   match(Set dst (AddI src0 src1));
8541 
8542   ins_cost(110);
8543   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8544   opcode(0x8D); /* 0x8D /r */
8545   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8546   ins_pipe(ialu_reg_reg);
8547 %}
8548 
8549 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8550 %{
8551   match(Set dst (AddL dst src));
8552   effect(KILL cr);
8553 
8554   format %{ "addq    $dst, $src\t# long" %}
8555   opcode(0x03);
8556   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8557   ins_pipe(ialu_reg_reg);
8558 %}
8559 
8560 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8561 %{
8562   match(Set dst (AddL dst src));
8563   effect(KILL cr);
8564 
8565   format %{ "addq    $dst, $src\t# long" %}
8566   opcode(0x81, 0x00); /* /0 id */
8567   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8568   ins_pipe( ialu_reg );
8569 %}
8570 
8571 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8572 %{
8573   match(Set dst (AddL dst (LoadL src)));
8574   effect(KILL cr);
8575 
8576   ins_cost(125); // XXX
8577   format %{ "addq    $dst, $src\t# long" %}
8578   opcode(0x03);
8579   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8580   ins_pipe(ialu_reg_mem);
8581 %}
8582 
8583 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8584 %{
8585   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8586   effect(KILL cr);
8587 
8588   ins_cost(150); // XXX
8589   format %{ "addq    $dst, $src\t# long" %}
8590   opcode(0x01); /* Opcode 01 /r */
8591   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8592   ins_pipe(ialu_mem_reg);
8593 %}
8594 
8595 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8596 %{
8597   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8598   effect(KILL cr);
8599 
8600   ins_cost(125); // XXX
8601   format %{ "addq    $dst, $src\t# long" %}
8602   opcode(0x81); /* Opcode 81 /0 id */
8603   ins_encode(REX_mem_wide(dst),
8604              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8605   ins_pipe(ialu_mem_imm);
8606 %}
8607 
8608 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8609 %{
8610   predicate(UseIncDec);
8611   match(Set dst (AddL dst src));
8612   effect(KILL cr);
8613 
8614   format %{ "incq    $dst\t# long" %}
8615   opcode(0xFF, 0x00); // FF /0
8616   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8617   ins_pipe(ialu_reg);
8618 %}
8619 
8620 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8621 %{
8622   predicate(UseIncDec);
8623   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8624   effect(KILL cr);
8625 
8626   ins_cost(125); // XXX
8627   format %{ "incq    $dst\t# long" %}
8628   opcode(0xFF); /* Opcode FF /0 */
8629   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8630   ins_pipe(ialu_mem_imm);
8631 %}
8632 
8633 // XXX why does that use AddL
8634 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8635 %{
8636   predicate(UseIncDec);
8637   match(Set dst (AddL dst src));
8638   effect(KILL cr);
8639 
8640   format %{ "decq    $dst\t# long" %}
8641   opcode(0xFF, 0x01); // FF /1
8642   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8643   ins_pipe(ialu_reg);
8644 %}
8645 
8646 // XXX why does that use AddL
8647 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8648 %{
8649   predicate(UseIncDec);
8650   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8651   effect(KILL cr);
8652 
8653   ins_cost(125); // XXX
8654   format %{ "decq    $dst\t# long" %}
8655   opcode(0xFF); /* Opcode FF /1 */
8656   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8657   ins_pipe(ialu_mem_imm);
8658 %}
8659 
8660 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8661 %{
8662   match(Set dst (AddL src0 src1));
8663 
8664   ins_cost(110);
8665   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8666   opcode(0x8D); /* 0x8D /r */
8667   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8668   ins_pipe(ialu_reg_reg);
8669 %}
8670 
8671 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8672 %{
8673   match(Set dst (AddP dst src));
8674   effect(KILL cr);
8675 
8676   format %{ "addq    $dst, $src\t# ptr" %}
8677   opcode(0x03);
8678   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8679   ins_pipe(ialu_reg_reg);
8680 %}
8681 
8682 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8683 %{
8684   match(Set dst (AddP dst src));
8685   effect(KILL cr);
8686 
8687   format %{ "addq    $dst, $src\t# ptr" %}
8688   opcode(0x81, 0x00); /* /0 id */
8689   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8690   ins_pipe( ialu_reg );
8691 %}
8692 
8693 // XXX addP mem ops ????
8694 
8695 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8696 %{
8697   match(Set dst (AddP src0 src1));
8698 
8699   ins_cost(110);
8700   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8701   opcode(0x8D); /* 0x8D /r */
8702   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8703   ins_pipe(ialu_reg_reg);
8704 %}
8705 
8706 instruct checkCastPP(rRegP dst)
8707 %{
8708   match(Set dst (CheckCastPP dst));
8709 
8710   size(0);
8711   format %{ "# checkcastPP of $dst" %}
8712   ins_encode(/* empty encoding */);
8713   ins_pipe(empty);
8714 %}
8715 
8716 instruct castPP(rRegP dst)
8717 %{
8718   match(Set dst (CastPP dst));
8719 
8720   size(0);
8721   format %{ "# castPP of $dst" %}
8722   ins_encode(/* empty encoding */);
8723   ins_pipe(empty);
8724 %}
8725 
8726 instruct castII(rRegI dst)
8727 %{
8728   match(Set dst (CastII dst));
8729 
8730   size(0);
8731   format %{ "# castII of $dst" %}
8732   ins_encode(/* empty encoding */);
8733   ins_cost(0);
8734   ins_pipe(empty);
8735 %}
8736 
8737 // LoadP-locked same as a regular LoadP when used with compare-swap
8738 instruct loadPLocked(rRegP dst, memory mem)
8739 %{
8740   match(Set dst (LoadPLocked mem));
8741 
8742   ins_cost(125); // XXX
8743   format %{ "movq    $dst, $mem\t# ptr locked" %}
8744   opcode(0x8B);
8745   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8746   ins_pipe(ialu_reg_mem); // XXX
8747 %}
8748 
8749 // LoadL-locked - same as a regular LoadL when used with compare-swap
8750 instruct loadLLocked(rRegL dst, memory mem)
8751 %{
8752   match(Set dst (LoadLLocked mem));
8753 
8754   ins_cost(125); // XXX
8755   format %{ "movq    $dst, $mem\t# long locked" %}
8756   opcode(0x8B);
8757   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8758   ins_pipe(ialu_reg_mem); // XXX
8759 %}
8760 
8761 // Conditional-store of the updated heap-top.
8762 // Used during allocation of the shared heap.
8763 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8764 
8765 instruct storePConditional(memory heap_top_ptr,
8766                            rax_RegP oldval, rRegP newval,
8767                            rFlagsReg cr)
8768 %{
8769   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8770  
8771   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8772             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8773   opcode(0x0F, 0xB1);
8774   ins_encode(lock_prefix,
8775              REX_reg_mem_wide(newval, heap_top_ptr),
8776              OpcP, OpcS,
8777              reg_mem(newval, heap_top_ptr));
8778   ins_pipe(pipe_cmpxchg);
8779 %}
8780 
8781 // Conditional-store of an int value.
8782 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8783 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8784 %{
8785   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8786   effect(KILL oldval);
8787 
8788   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8789   opcode(0x0F, 0xB1);
8790   ins_encode(lock_prefix,
8791              REX_reg_mem(newval, mem),
8792              OpcP, OpcS,
8793              reg_mem(newval, mem));
8794   ins_pipe(pipe_cmpxchg);
8795 %}
8796 
8797 // Conditional-store of a long value.
8798 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8799 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8800 %{
8801   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8802   effect(KILL oldval);
8803 
8804   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8805   opcode(0x0F, 0xB1);
8806   ins_encode(lock_prefix,
8807              REX_reg_mem_wide(newval, mem),
8808              OpcP, OpcS,
8809              reg_mem(newval, mem));
8810   ins_pipe(pipe_cmpxchg);
8811 %}
8812 
8813 
8814 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8815 instruct compareAndSwapP(rRegI res,
8816                          memory mem_ptr,
8817                          rax_RegP oldval, rRegP newval,
8818                          rFlagsReg cr)
8819 %{
8820   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8821   effect(KILL cr, KILL oldval);
8822 
8823   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8824             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8825             "sete    $res\n\t"
8826             "movzbl  $res, $res" %}
8827   opcode(0x0F, 0xB1);
8828   ins_encode(lock_prefix,
8829              REX_reg_mem_wide(newval, mem_ptr),
8830              OpcP, OpcS,
8831              reg_mem(newval, mem_ptr),
8832              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8833              REX_reg_breg(res, res), // movzbl
8834              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8835   ins_pipe( pipe_cmpxchg );
8836 %}
8837 
8838 instruct compareAndSwapL(rRegI res,
8839                          memory mem_ptr,
8840                          rax_RegL oldval, rRegL newval,
8841                          rFlagsReg cr)
8842 %{
8843   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8844   effect(KILL cr, KILL oldval);
8845 
8846   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8847             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8848             "sete    $res\n\t"
8849             "movzbl  $res, $res" %}
8850   opcode(0x0F, 0xB1);
8851   ins_encode(lock_prefix,
8852              REX_reg_mem_wide(newval, mem_ptr),
8853              OpcP, OpcS,
8854              reg_mem(newval, mem_ptr),
8855              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8856              REX_reg_breg(res, res), // movzbl
8857              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8858   ins_pipe( pipe_cmpxchg );
8859 %}
8860 
8861 instruct compareAndSwapI(rRegI res,
8862                          memory mem_ptr,
8863                          rax_RegI oldval, rRegI newval,
8864                          rFlagsReg cr)
8865 %{
8866   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8867   effect(KILL cr, KILL oldval);
8868 
8869   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8870             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8871             "sete    $res\n\t"
8872             "movzbl  $res, $res" %}
8873   opcode(0x0F, 0xB1);
8874   ins_encode(lock_prefix,
8875              REX_reg_mem(newval, mem_ptr),
8876              OpcP, OpcS,
8877              reg_mem(newval, mem_ptr),
8878              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8879              REX_reg_breg(res, res), // movzbl
8880              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8881   ins_pipe( pipe_cmpxchg );
8882 %}
8883 
8884 
8885 instruct compareAndSwapN(rRegI res,
8886                           memory mem_ptr,
8887                           rax_RegN oldval, rRegN newval,
8888                           rFlagsReg cr) %{
8889   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8890   effect(KILL cr, KILL oldval);
8891 
8892   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8893             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8894             "sete    $res\n\t"
8895             "movzbl  $res, $res" %}
8896   opcode(0x0F, 0xB1);
8897   ins_encode(lock_prefix,
8898              REX_reg_mem(newval, mem_ptr),
8899              OpcP, OpcS,
8900              reg_mem(newval, mem_ptr),
8901              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8902              REX_reg_breg(res, res), // movzbl
8903              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8904   ins_pipe( pipe_cmpxchg );
8905 %}
8906 
8907 //----------Subtraction Instructions-------------------------------------------
8908 
8909 // Integer Subtraction Instructions
8910 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8911 %{
8912   match(Set dst (SubI dst src));
8913   effect(KILL cr);
8914 
8915   format %{ "subl    $dst, $src\t# int" %}
8916   opcode(0x2B);
8917   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8918   ins_pipe(ialu_reg_reg);
8919 %}
8920 
8921 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8922 %{
8923   match(Set dst (SubI dst src));
8924   effect(KILL cr);
8925 
8926   format %{ "subl    $dst, $src\t# int" %}
8927   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8928   ins_encode(OpcSErm(dst, src), Con8or32(src));
8929   ins_pipe(ialu_reg);
8930 %}
8931 
8932 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8933 %{
8934   match(Set dst (SubI dst (LoadI src)));
8935   effect(KILL cr);
8936 
8937   ins_cost(125);
8938   format %{ "subl    $dst, $src\t# int" %}
8939   opcode(0x2B);
8940   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8941   ins_pipe(ialu_reg_mem);
8942 %}
8943 
8944 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8945 %{
8946   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8947   effect(KILL cr);
8948 
8949   ins_cost(150);
8950   format %{ "subl    $dst, $src\t# int" %}
8951   opcode(0x29); /* Opcode 29 /r */
8952   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8953   ins_pipe(ialu_mem_reg);
8954 %}
8955 
8956 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8957 %{
8958   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8959   effect(KILL cr);
8960 
8961   ins_cost(125); // XXX
8962   format %{ "subl    $dst, $src\t# int" %}
8963   opcode(0x81); /* Opcode 81 /5 id */
8964   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8965   ins_pipe(ialu_mem_imm);
8966 %}
8967 
8968 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8969 %{
8970   match(Set dst (SubL dst src));
8971   effect(KILL cr);
8972 
8973   format %{ "subq    $dst, $src\t# long" %}
8974   opcode(0x2B);
8975   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8976   ins_pipe(ialu_reg_reg);
8977 %}
8978 
8979 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8980 %{
8981   match(Set dst (SubL dst src));
8982   effect(KILL cr);
8983 
8984   format %{ "subq    $dst, $src\t# long" %}
8985   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8986   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8987   ins_pipe(ialu_reg);
8988 %}
8989 
8990 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8991 %{
8992   match(Set dst (SubL dst (LoadL src)));
8993   effect(KILL cr);
8994 
8995   ins_cost(125);
8996   format %{ "subq    $dst, $src\t# long" %}
8997   opcode(0x2B);
8998   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8999   ins_pipe(ialu_reg_mem);
9000 %}
9001 
9002 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9003 %{
9004   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
9005   effect(KILL cr);
9006 
9007   ins_cost(150);
9008   format %{ "subq    $dst, $src\t# long" %}
9009   opcode(0x29); /* Opcode 29 /r */
9010   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9011   ins_pipe(ialu_mem_reg);
9012 %}
9013 
9014 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9015 %{
9016   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
9017   effect(KILL cr);
9018 
9019   ins_cost(125); // XXX
9020   format %{ "subq    $dst, $src\t# long" %}
9021   opcode(0x81); /* Opcode 81 /5 id */
9022   ins_encode(REX_mem_wide(dst),
9023              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
9024   ins_pipe(ialu_mem_imm);
9025 %}
9026 
9027 // Subtract from a pointer
9028 // XXX hmpf???
9029 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
9030 %{
9031   match(Set dst (AddP dst (SubI zero src)));
9032   effect(KILL cr);
9033 
9034   format %{ "subq    $dst, $src\t# ptr - int" %}
9035   opcode(0x2B);
9036   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9037   ins_pipe(ialu_reg_reg);
9038 %}
9039 
9040 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
9041 %{
9042   match(Set dst (SubI zero dst));
9043   effect(KILL cr);
9044 
9045   format %{ "negl    $dst\t# int" %}
9046   opcode(0xF7, 0x03);  // Opcode F7 /3
9047   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9048   ins_pipe(ialu_reg);
9049 %}
9050 
9051 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
9052 %{
9053   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
9054   effect(KILL cr);
9055 
9056   format %{ "negl    $dst\t# int" %}
9057   opcode(0xF7, 0x03);  // Opcode F7 /3
9058   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9059   ins_pipe(ialu_reg);
9060 %}
9061 
9062 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
9063 %{
9064   match(Set dst (SubL zero dst));
9065   effect(KILL cr);
9066 
9067   format %{ "negq    $dst\t# long" %}
9068   opcode(0xF7, 0x03);  // Opcode F7 /3
9069   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9070   ins_pipe(ialu_reg);
9071 %}
9072 
9073 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
9074 %{
9075   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
9076   effect(KILL cr);
9077 
9078   format %{ "negq    $dst\t# long" %}
9079   opcode(0xF7, 0x03);  // Opcode F7 /3
9080   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9081   ins_pipe(ialu_reg);
9082 %}
9083 
9084 
9085 //----------Multiplication/Division Instructions-------------------------------
9086 // Integer Multiplication Instructions
9087 // Multiply Register
9088 
9089 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9090 %{
9091   match(Set dst (MulI dst src));
9092   effect(KILL cr);
9093 
9094   ins_cost(300);
9095   format %{ "imull   $dst, $src\t# int" %}
9096   opcode(0x0F, 0xAF);
9097   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9098   ins_pipe(ialu_reg_reg_alu0);
9099 %}
9100 
9101 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
9102 %{
9103   match(Set dst (MulI src imm));
9104   effect(KILL cr);
9105 
9106   ins_cost(300);
9107   format %{ "imull   $dst, $src, $imm\t# int" %}
9108   opcode(0x69); /* 69 /r id */
9109   ins_encode(REX_reg_reg(dst, src),
9110              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
9111   ins_pipe(ialu_reg_reg_alu0);
9112 %}
9113 
9114 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
9115 %{
9116   match(Set dst (MulI dst (LoadI src)));
9117   effect(KILL cr);
9118 
9119   ins_cost(350);
9120   format %{ "imull   $dst, $src\t# int" %}
9121   opcode(0x0F, 0xAF);
9122   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
9123   ins_pipe(ialu_reg_mem_alu0);
9124 %}
9125 
9126 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
9127 %{
9128   match(Set dst (MulI (LoadI src) imm));
9129   effect(KILL cr);
9130 
9131   ins_cost(300);
9132   format %{ "imull   $dst, $src, $imm\t# int" %}
9133   opcode(0x69); /* 69 /r id */
9134   ins_encode(REX_reg_mem(dst, src),
9135              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
9136   ins_pipe(ialu_reg_mem_alu0);
9137 %}
9138 
9139 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9140 %{
9141   match(Set dst (MulL dst src));
9142   effect(KILL cr);
9143 
9144   ins_cost(300);
9145   format %{ "imulq   $dst, $src\t# long" %}
9146   opcode(0x0F, 0xAF);
9147   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
9148   ins_pipe(ialu_reg_reg_alu0);
9149 %}
9150 
9151 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
9152 %{
9153   match(Set dst (MulL src imm));
9154   effect(KILL cr);
9155 
9156   ins_cost(300);
9157   format %{ "imulq   $dst, $src, $imm\t# long" %}
9158   opcode(0x69); /* 69 /r id */
9159   ins_encode(REX_reg_reg_wide(dst, src),
9160              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
9161   ins_pipe(ialu_reg_reg_alu0);
9162 %}
9163 
9164 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
9165 %{
9166   match(Set dst (MulL dst (LoadL src)));
9167   effect(KILL cr);
9168 
9169   ins_cost(350);
9170   format %{ "imulq   $dst, $src\t# long" %}
9171   opcode(0x0F, 0xAF);
9172   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
9173   ins_pipe(ialu_reg_mem_alu0);
9174 %}
9175 
9176 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
9177 %{
9178   match(Set dst (MulL (LoadL src) imm));
9179   effect(KILL cr);
9180 
9181   ins_cost(300);
9182   format %{ "imulq   $dst, $src, $imm\t# long" %}
9183   opcode(0x69); /* 69 /r id */
9184   ins_encode(REX_reg_mem_wide(dst, src),
9185              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
9186   ins_pipe(ialu_reg_mem_alu0);
9187 %}
9188 
9189 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
9190 %{
9191   match(Set dst (MulHiL src rax));
9192   effect(USE_KILL rax, KILL cr);
9193 
9194   ins_cost(300);
9195   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
9196   opcode(0xF7, 0x5); /* Opcode F7 /5 */
9197   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
9198   ins_pipe(ialu_reg_reg_alu0);
9199 %}
9200 
9201 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
9202                    rFlagsReg cr)
9203 %{
9204   match(Set rax (DivI rax div));
9205   effect(KILL rdx, KILL cr);
9206 
9207   ins_cost(30*100+10*100); // XXX
9208   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
9209             "jne,s   normal\n\t"
9210             "xorl    rdx, rdx\n\t"
9211             "cmpl    $div, -1\n\t"
9212             "je,s    done\n"
9213     "normal: cdql\n\t"
9214             "idivl   $div\n"
9215     "done:"        %}
9216   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9217   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9218   ins_pipe(ialu_reg_reg_alu0);
9219 %}
9220 
9221 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
9222                    rFlagsReg cr)
9223 %{
9224   match(Set rax (DivL rax div));
9225   effect(KILL rdx, KILL cr);
9226 
9227   ins_cost(30*100+10*100); // XXX
9228   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
9229             "cmpq    rax, rdx\n\t"
9230             "jne,s   normal\n\t"
9231             "xorl    rdx, rdx\n\t"
9232             "cmpq    $div, -1\n\t"
9233             "je,s    done\n"
9234     "normal: cdqq\n\t"
9235             "idivq   $div\n"
9236     "done:"        %}
9237   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9238   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9239   ins_pipe(ialu_reg_reg_alu0);
9240 %}
9241 
9242 // Integer DIVMOD with Register, both quotient and mod results
9243 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
9244                              rFlagsReg cr)
9245 %{
9246   match(DivModI rax div);
9247   effect(KILL cr);
9248 
9249   ins_cost(30*100+10*100); // XXX
9250   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
9251             "jne,s   normal\n\t"
9252             "xorl    rdx, rdx\n\t"
9253             "cmpl    $div, -1\n\t"
9254             "je,s    done\n"
9255     "normal: cdql\n\t"
9256             "idivl   $div\n"
9257     "done:"        %}
9258   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9259   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9260   ins_pipe(pipe_slow);
9261 %}
9262 
9263 // Long DIVMOD with Register, both quotient and mod results
9264 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
9265                              rFlagsReg cr)
9266 %{
9267   match(DivModL rax div);
9268   effect(KILL cr);
9269 
9270   ins_cost(30*100+10*100); // XXX
9271   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
9272             "cmpq    rax, rdx\n\t"
9273             "jne,s   normal\n\t"
9274             "xorl    rdx, rdx\n\t"
9275             "cmpq    $div, -1\n\t"
9276             "je,s    done\n"
9277     "normal: cdqq\n\t"
9278             "idivq   $div\n"
9279     "done:"        %}
9280   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9281   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9282   ins_pipe(pipe_slow);
9283 %}
9284 
9285 //----------- DivL-By-Constant-Expansions--------------------------------------
9286 // DivI cases are handled by the compiler
9287 
9288 // Magic constant, reciprocal of 10
9289 instruct loadConL_0x6666666666666667(rRegL dst)
9290 %{
9291   effect(DEF dst);
9292 
9293   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
9294   ins_encode(load_immL(dst, 0x6666666666666667));
9295   ins_pipe(ialu_reg);
9296 %}
9297 
9298 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
9299 %{
9300   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
9301 
9302   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
9303   opcode(0xF7, 0x5); /* Opcode F7 /5 */
9304   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
9305   ins_pipe(ialu_reg_reg_alu0);
9306 %}
9307 
9308 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
9309 %{
9310   effect(USE_DEF dst, KILL cr);
9311 
9312   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
9313   opcode(0xC1, 0x7); /* C1 /7 ib */
9314   ins_encode(reg_opc_imm_wide(dst, 0x3F));
9315   ins_pipe(ialu_reg);
9316 %}
9317 
9318 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
9319 %{
9320   effect(USE_DEF dst, KILL cr);
9321 
9322   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
9323   opcode(0xC1, 0x7); /* C1 /7 ib */
9324   ins_encode(reg_opc_imm_wide(dst, 0x2));
9325   ins_pipe(ialu_reg);
9326 %}
9327 
9328 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
9329 %{
9330   match(Set dst (DivL src div));
9331 
9332   ins_cost((5+8)*100);
9333   expand %{
9334     rax_RegL rax;                     // Killed temp
9335     rFlagsReg cr;                     // Killed
9336     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
9337     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
9338     sarL_rReg_63(src, cr);            // sarq  src, 63
9339     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
9340     subL_rReg(dst, src, cr);          // subl  rdx, src
9341   %}
9342 %}
9343 
9344 //-----------------------------------------------------------------------------
9345 
9346 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
9347                    rFlagsReg cr)
9348 %{
9349   match(Set rdx (ModI rax div));
9350   effect(KILL rax, KILL cr);
9351 
9352   ins_cost(300); // XXX
9353   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
9354             "jne,s   normal\n\t"
9355             "xorl    rdx, rdx\n\t"
9356             "cmpl    $div, -1\n\t"
9357             "je,s    done\n"
9358     "normal: cdql\n\t"
9359             "idivl   $div\n"
9360     "done:"        %}
9361   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9362   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9363   ins_pipe(ialu_reg_reg_alu0);
9364 %}
9365 
9366 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
9367                    rFlagsReg cr)
9368 %{
9369   match(Set rdx (ModL rax div));
9370   effect(KILL rax, KILL cr);
9371 
9372   ins_cost(300); // XXX
9373   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
9374             "cmpq    rax, rdx\n\t"
9375             "jne,s   normal\n\t"
9376             "xorl    rdx, rdx\n\t"
9377             "cmpq    $div, -1\n\t"
9378             "je,s    done\n"
9379     "normal: cdqq\n\t"
9380             "idivq   $div\n"
9381     "done:"        %}
9382   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9383   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9384   ins_pipe(ialu_reg_reg_alu0);
9385 %}
9386 
9387 // Integer Shift Instructions
9388 // Shift Left by one
9389 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9390 %{
9391   match(Set dst (LShiftI dst shift));
9392   effect(KILL cr);
9393 
9394   format %{ "sall    $dst, $shift" %}
9395   opcode(0xD1, 0x4); /* D1 /4 */
9396   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9397   ins_pipe(ialu_reg);
9398 %}
9399 
9400 // Shift Left by one
9401 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9402 %{
9403   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9404   effect(KILL cr);
9405 
9406   format %{ "sall    $dst, $shift\t" %}
9407   opcode(0xD1, 0x4); /* D1 /4 */
9408   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9409   ins_pipe(ialu_mem_imm);
9410 %}
9411 
9412 // Shift Left by 8-bit immediate
9413 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9414 %{
9415   match(Set dst (LShiftI dst shift));
9416   effect(KILL cr);
9417 
9418   format %{ "sall    $dst, $shift" %}
9419   opcode(0xC1, 0x4); /* C1 /4 ib */
9420   ins_encode(reg_opc_imm(dst, shift));
9421   ins_pipe(ialu_reg);
9422 %}
9423 
9424 // Shift Left by 8-bit immediate
9425 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9426 %{
9427   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9428   effect(KILL cr);
9429 
9430   format %{ "sall    $dst, $shift" %}
9431   opcode(0xC1, 0x4); /* C1 /4 ib */
9432   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9433   ins_pipe(ialu_mem_imm);
9434 %}
9435 
9436 // Shift Left by variable
9437 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9438 %{
9439   match(Set dst (LShiftI dst shift));
9440   effect(KILL cr);
9441 
9442   format %{ "sall    $dst, $shift" %}
9443   opcode(0xD3, 0x4); /* D3 /4 */
9444   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9445   ins_pipe(ialu_reg_reg);
9446 %}
9447 
9448 // Shift Left by variable
9449 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9450 %{
9451   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9452   effect(KILL cr);
9453 
9454   format %{ "sall    $dst, $shift" %}
9455   opcode(0xD3, 0x4); /* D3 /4 */
9456   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9457   ins_pipe(ialu_mem_reg);
9458 %}
9459 
9460 // Arithmetic shift right by one
9461 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9462 %{
9463   match(Set dst (RShiftI dst shift));
9464   effect(KILL cr);
9465 
9466   format %{ "sarl    $dst, $shift" %}
9467   opcode(0xD1, 0x7); /* D1 /7 */
9468   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9469   ins_pipe(ialu_reg);
9470 %}
9471 
9472 // Arithmetic shift right by one
9473 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9474 %{
9475   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9476   effect(KILL cr);
9477 
9478   format %{ "sarl    $dst, $shift" %}
9479   opcode(0xD1, 0x7); /* D1 /7 */
9480   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9481   ins_pipe(ialu_mem_imm);
9482 %}
9483 
9484 // Arithmetic Shift Right by 8-bit immediate
9485 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9486 %{
9487   match(Set dst (RShiftI dst shift));
9488   effect(KILL cr);
9489 
9490   format %{ "sarl    $dst, $shift" %}
9491   opcode(0xC1, 0x7); /* C1 /7 ib */
9492   ins_encode(reg_opc_imm(dst, shift));
9493   ins_pipe(ialu_mem_imm);
9494 %}
9495 
9496 // Arithmetic Shift Right by 8-bit immediate
9497 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9498 %{
9499   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9500   effect(KILL cr);
9501 
9502   format %{ "sarl    $dst, $shift" %}
9503   opcode(0xC1, 0x7); /* C1 /7 ib */
9504   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9505   ins_pipe(ialu_mem_imm);
9506 %}
9507 
9508 // Arithmetic Shift Right by variable
9509 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9510 %{
9511   match(Set dst (RShiftI dst shift));
9512   effect(KILL cr);
9513 
9514   format %{ "sarl    $dst, $shift" %}
9515   opcode(0xD3, 0x7); /* D3 /7 */
9516   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9517   ins_pipe(ialu_reg_reg);
9518 %}
9519 
9520 // Arithmetic Shift Right by variable
9521 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9522 %{
9523   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9524   effect(KILL cr);
9525 
9526   format %{ "sarl    $dst, $shift" %}
9527   opcode(0xD3, 0x7); /* D3 /7 */
9528   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9529   ins_pipe(ialu_mem_reg);
9530 %}
9531 
9532 // Logical shift right by one
9533 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9534 %{
9535   match(Set dst (URShiftI dst shift));
9536   effect(KILL cr);
9537 
9538   format %{ "shrl    $dst, $shift" %}
9539   opcode(0xD1, 0x5); /* D1 /5 */
9540   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9541   ins_pipe(ialu_reg);
9542 %}
9543 
9544 // Logical shift right by one
9545 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9546 %{
9547   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9548   effect(KILL cr);
9549 
9550   format %{ "shrl    $dst, $shift" %}
9551   opcode(0xD1, 0x5); /* D1 /5 */
9552   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9553   ins_pipe(ialu_mem_imm);
9554 %}
9555 
9556 // Logical Shift Right by 8-bit immediate
9557 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9558 %{
9559   match(Set dst (URShiftI dst shift));
9560   effect(KILL cr);
9561 
9562   format %{ "shrl    $dst, $shift" %}
9563   opcode(0xC1, 0x5); /* C1 /5 ib */
9564   ins_encode(reg_opc_imm(dst, shift));
9565   ins_pipe(ialu_reg);
9566 %}
9567 
9568 // Logical Shift Right by 8-bit immediate
9569 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9570 %{
9571   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9572   effect(KILL cr);
9573 
9574   format %{ "shrl    $dst, $shift" %}
9575   opcode(0xC1, 0x5); /* C1 /5 ib */
9576   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9577   ins_pipe(ialu_mem_imm);
9578 %}
9579 
9580 // Logical Shift Right by variable
9581 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9582 %{
9583   match(Set dst (URShiftI dst shift));
9584   effect(KILL cr);
9585 
9586   format %{ "shrl    $dst, $shift" %}
9587   opcode(0xD3, 0x5); /* D3 /5 */
9588   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9589   ins_pipe(ialu_reg_reg);
9590 %}
9591 
9592 // Logical Shift Right by variable
9593 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9594 %{
9595   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9596   effect(KILL cr);
9597 
9598   format %{ "shrl    $dst, $shift" %}
9599   opcode(0xD3, 0x5); /* D3 /5 */
9600   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9601   ins_pipe(ialu_mem_reg);
9602 %}
9603 
9604 // Long Shift Instructions
9605 // Shift Left by one
9606 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9607 %{
9608   match(Set dst (LShiftL dst shift));
9609   effect(KILL cr);
9610 
9611   format %{ "salq    $dst, $shift" %}
9612   opcode(0xD1, 0x4); /* D1 /4 */
9613   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9614   ins_pipe(ialu_reg);
9615 %}
9616 
9617 // Shift Left by one
9618 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9619 %{
9620   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9621   effect(KILL cr);
9622 
9623   format %{ "salq    $dst, $shift" %}
9624   opcode(0xD1, 0x4); /* D1 /4 */
9625   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9626   ins_pipe(ialu_mem_imm);
9627 %}
9628 
9629 // Shift Left by 8-bit immediate
9630 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9631 %{
9632   match(Set dst (LShiftL dst shift));
9633   effect(KILL cr);
9634 
9635   format %{ "salq    $dst, $shift" %}
9636   opcode(0xC1, 0x4); /* C1 /4 ib */
9637   ins_encode(reg_opc_imm_wide(dst, shift));
9638   ins_pipe(ialu_reg);
9639 %}
9640 
9641 // Shift Left by 8-bit immediate
9642 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9643 %{
9644   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9645   effect(KILL cr);
9646 
9647   format %{ "salq    $dst, $shift" %}
9648   opcode(0xC1, 0x4); /* C1 /4 ib */
9649   ins_encode(REX_mem_wide(dst), OpcP,
9650              RM_opc_mem(secondary, dst), Con8or32(shift));
9651   ins_pipe(ialu_mem_imm);
9652 %}
9653 
9654 // Shift Left by variable
9655 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9656 %{
9657   match(Set dst (LShiftL dst shift));
9658   effect(KILL cr);
9659 
9660   format %{ "salq    $dst, $shift" %}
9661   opcode(0xD3, 0x4); /* D3 /4 */
9662   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9663   ins_pipe(ialu_reg_reg);
9664 %}
9665 
9666 // Shift Left by variable
9667 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9668 %{
9669   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9670   effect(KILL cr);
9671 
9672   format %{ "salq    $dst, $shift" %}
9673   opcode(0xD3, 0x4); /* D3 /4 */
9674   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9675   ins_pipe(ialu_mem_reg);
9676 %}
9677 
9678 // Arithmetic shift right by one
9679 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9680 %{
9681   match(Set dst (RShiftL dst shift));
9682   effect(KILL cr);
9683 
9684   format %{ "sarq    $dst, $shift" %}
9685   opcode(0xD1, 0x7); /* D1 /7 */
9686   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9687   ins_pipe(ialu_reg);
9688 %}
9689 
9690 // Arithmetic shift right by one
9691 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9692 %{
9693   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9694   effect(KILL cr);
9695 
9696   format %{ "sarq    $dst, $shift" %}
9697   opcode(0xD1, 0x7); /* D1 /7 */
9698   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9699   ins_pipe(ialu_mem_imm);
9700 %}
9701 
9702 // Arithmetic Shift Right by 8-bit immediate
9703 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9704 %{
9705   match(Set dst (RShiftL dst shift));
9706   effect(KILL cr);
9707 
9708   format %{ "sarq    $dst, $shift" %}
9709   opcode(0xC1, 0x7); /* C1 /7 ib */
9710   ins_encode(reg_opc_imm_wide(dst, shift));
9711   ins_pipe(ialu_mem_imm);
9712 %}
9713 
9714 // Arithmetic Shift Right by 8-bit immediate
9715 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9716 %{
9717   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9718   effect(KILL cr);
9719 
9720   format %{ "sarq    $dst, $shift" %}
9721   opcode(0xC1, 0x7); /* C1 /7 ib */
9722   ins_encode(REX_mem_wide(dst), OpcP,
9723              RM_opc_mem(secondary, dst), Con8or32(shift));
9724   ins_pipe(ialu_mem_imm);
9725 %}
9726 
9727 // Arithmetic Shift Right by variable
9728 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9729 %{
9730   match(Set dst (RShiftL dst shift));
9731   effect(KILL cr);
9732 
9733   format %{ "sarq    $dst, $shift" %}
9734   opcode(0xD3, 0x7); /* D3 /7 */
9735   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9736   ins_pipe(ialu_reg_reg);
9737 %}
9738 
9739 // Arithmetic Shift Right by variable
9740 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9741 %{
9742   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9743   effect(KILL cr);
9744 
9745   format %{ "sarq    $dst, $shift" %}
9746   opcode(0xD3, 0x7); /* D3 /7 */
9747   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9748   ins_pipe(ialu_mem_reg);
9749 %}
9750 
9751 // Logical shift right by one
9752 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9753 %{
9754   match(Set dst (URShiftL dst shift));
9755   effect(KILL cr);
9756 
9757   format %{ "shrq    $dst, $shift" %}
9758   opcode(0xD1, 0x5); /* D1 /5 */
9759   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9760   ins_pipe(ialu_reg);
9761 %}
9762 
9763 // Logical shift right by one
9764 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9765 %{
9766   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9767   effect(KILL cr);
9768 
9769   format %{ "shrq    $dst, $shift" %}
9770   opcode(0xD1, 0x5); /* D1 /5 */
9771   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9772   ins_pipe(ialu_mem_imm);
9773 %}
9774 
9775 // Logical Shift Right by 8-bit immediate
9776 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9777 %{
9778   match(Set dst (URShiftL dst shift));
9779   effect(KILL cr);
9780 
9781   format %{ "shrq    $dst, $shift" %}
9782   opcode(0xC1, 0x5); /* C1 /5 ib */
9783   ins_encode(reg_opc_imm_wide(dst, shift));
9784   ins_pipe(ialu_reg);
9785 %}
9786 
9787 
9788 // Logical Shift Right by 8-bit immediate
9789 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9790 %{
9791   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9792   effect(KILL cr);
9793 
9794   format %{ "shrq    $dst, $shift" %}
9795   opcode(0xC1, 0x5); /* C1 /5 ib */
9796   ins_encode(REX_mem_wide(dst), OpcP,
9797              RM_opc_mem(secondary, dst), Con8or32(shift));
9798   ins_pipe(ialu_mem_imm);
9799 %}
9800 
9801 // Logical Shift Right by variable
9802 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9803 %{
9804   match(Set dst (URShiftL dst shift));
9805   effect(KILL cr);
9806 
9807   format %{ "shrq    $dst, $shift" %}
9808   opcode(0xD3, 0x5); /* D3 /5 */
9809   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9810   ins_pipe(ialu_reg_reg);
9811 %}
9812 
9813 // Logical Shift Right by variable
9814 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9815 %{
9816   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9817   effect(KILL cr);
9818 
9819   format %{ "shrq    $dst, $shift" %}
9820   opcode(0xD3, 0x5); /* D3 /5 */
9821   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9822   ins_pipe(ialu_mem_reg);
9823 %}
9824 
9825 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9826 // This idiom is used by the compiler for the i2b bytecode.
9827 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9828 %{
9829   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9830 
9831   format %{ "movsbl  $dst, $src\t# i2b" %}
9832   opcode(0x0F, 0xBE);
9833   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9834   ins_pipe(ialu_reg_reg);
9835 %}
9836 
9837 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9838 // This idiom is used by the compiler the i2s bytecode.
9839 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9840 %{
9841   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9842 
9843   format %{ "movswl  $dst, $src\t# i2s" %}
9844   opcode(0x0F, 0xBF);
9845   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9846   ins_pipe(ialu_reg_reg);
9847 %}
9848 
9849 // ROL/ROR instructions
9850 
9851 // ROL expand
9852 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9853   effect(KILL cr, USE_DEF dst);
9854 
9855   format %{ "roll    $dst" %}
9856   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9857   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9858   ins_pipe(ialu_reg);
9859 %}
9860 
9861 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9862   effect(USE_DEF dst, USE shift, KILL cr);
9863 
9864   format %{ "roll    $dst, $shift" %}
9865   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9866   ins_encode( reg_opc_imm(dst, shift) );
9867   ins_pipe(ialu_reg);
9868 %}
9869 
9870 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9871 %{
9872   effect(USE_DEF dst, USE shift, KILL cr);
9873 
9874   format %{ "roll    $dst, $shift" %}
9875   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9876   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9877   ins_pipe(ialu_reg_reg);
9878 %}
9879 // end of ROL expand
9880 
9881 // Rotate Left by one
9882 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9883 %{
9884   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9885 
9886   expand %{
9887     rolI_rReg_imm1(dst, cr);
9888   %}
9889 %}
9890 
9891 // Rotate Left by 8-bit immediate
9892 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9893 %{
9894   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9895   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9896 
9897   expand %{
9898     rolI_rReg_imm8(dst, lshift, cr);
9899   %}
9900 %}
9901 
9902 // Rotate Left by variable
9903 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9904 %{
9905   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9906 
9907   expand %{
9908     rolI_rReg_CL(dst, shift, cr);
9909   %}
9910 %}
9911 
9912 // Rotate Left by variable
9913 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9914 %{
9915   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9916 
9917   expand %{
9918     rolI_rReg_CL(dst, shift, cr);
9919   %}
9920 %}
9921 
9922 // ROR expand
9923 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9924 %{
9925   effect(USE_DEF dst, KILL cr);
9926 
9927   format %{ "rorl    $dst" %}
9928   opcode(0xD1, 0x1); /* D1 /1 */
9929   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9930   ins_pipe(ialu_reg);
9931 %}
9932 
9933 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9934 %{
9935   effect(USE_DEF dst, USE shift, KILL cr);
9936 
9937   format %{ "rorl    $dst, $shift" %}
9938   opcode(0xC1, 0x1); /* C1 /1 ib */
9939   ins_encode(reg_opc_imm(dst, shift));
9940   ins_pipe(ialu_reg);
9941 %}
9942 
9943 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9944 %{
9945   effect(USE_DEF dst, USE shift, KILL cr);
9946 
9947   format %{ "rorl    $dst, $shift" %}
9948   opcode(0xD3, 0x1); /* D3 /1 */
9949   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9950   ins_pipe(ialu_reg_reg);
9951 %}
9952 // end of ROR expand
9953 
9954 // Rotate Right by one
9955 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9956 %{
9957   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9958 
9959   expand %{
9960     rorI_rReg_imm1(dst, cr);
9961   %}
9962 %}
9963 
9964 // Rotate Right by 8-bit immediate
9965 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9966 %{
9967   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9968   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9969 
9970   expand %{
9971     rorI_rReg_imm8(dst, rshift, cr);
9972   %}
9973 %}
9974 
9975 // Rotate Right by variable
9976 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9977 %{
9978   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9979 
9980   expand %{
9981     rorI_rReg_CL(dst, shift, cr);
9982   %}
9983 %}
9984 
9985 // Rotate Right by variable
9986 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9987 %{
9988   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9989 
9990   expand %{
9991     rorI_rReg_CL(dst, shift, cr);
9992   %}
9993 %}
9994 
9995 // for long rotate
9996 // ROL expand
9997 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9998   effect(USE_DEF dst, KILL cr);
9999 
10000   format %{ "rolq    $dst" %}
10001   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
10002   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
10003   ins_pipe(ialu_reg);
10004 %}
10005 
10006 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
10007   effect(USE_DEF dst, USE shift, KILL cr);
10008 
10009   format %{ "rolq    $dst, $shift" %}
10010   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
10011   ins_encode( reg_opc_imm_wide(dst, shift) );
10012   ins_pipe(ialu_reg);
10013 %}
10014 
10015 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
10016 %{
10017   effect(USE_DEF dst, USE shift, KILL cr);
10018 
10019   format %{ "rolq    $dst, $shift" %}
10020   opcode(0xD3, 0x0); /* Opcode D3 /0 */
10021   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
10022   ins_pipe(ialu_reg_reg);
10023 %}
10024 // end of ROL expand
10025 
10026 // Rotate Left by one
10027 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
10028 %{
10029   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
10030 
10031   expand %{
10032     rolL_rReg_imm1(dst, cr);
10033   %}
10034 %}
10035 
10036 // Rotate Left by 8-bit immediate
10037 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
10038 %{
10039   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
10040   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
10041 
10042   expand %{
10043     rolL_rReg_imm8(dst, lshift, cr);
10044   %}
10045 %}
10046 
10047 // Rotate Left by variable
10048 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
10049 %{
10050   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
10051 
10052   expand %{
10053     rolL_rReg_CL(dst, shift, cr);
10054   %}
10055 %}
10056 
10057 // Rotate Left by variable
10058 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
10059 %{
10060   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
10061 
10062   expand %{
10063     rolL_rReg_CL(dst, shift, cr);
10064   %}
10065 %}
10066 
10067 // ROR expand
10068 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
10069 %{
10070   effect(USE_DEF dst, KILL cr);
10071 
10072   format %{ "rorq    $dst" %}
10073   opcode(0xD1, 0x1); /* D1 /1 */
10074   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
10075   ins_pipe(ialu_reg);
10076 %}
10077 
10078 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
10079 %{
10080   effect(USE_DEF dst, USE shift, KILL cr);
10081 
10082   format %{ "rorq    $dst, $shift" %}
10083   opcode(0xC1, 0x1); /* C1 /1 ib */
10084   ins_encode(reg_opc_imm_wide(dst, shift));
10085   ins_pipe(ialu_reg);
10086 %}
10087 
10088 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
10089 %{
10090   effect(USE_DEF dst, USE shift, KILL cr);
10091 
10092   format %{ "rorq    $dst, $shift" %}
10093   opcode(0xD3, 0x1); /* D3 /1 */
10094   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
10095   ins_pipe(ialu_reg_reg);
10096 %}
10097 // end of ROR expand
10098 
10099 // Rotate Right by one
10100 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
10101 %{
10102   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
10103 
10104   expand %{
10105     rorL_rReg_imm1(dst, cr);
10106   %}
10107 %}
10108 
10109 // Rotate Right by 8-bit immediate
10110 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
10111 %{
10112   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
10113   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
10114 
10115   expand %{
10116     rorL_rReg_imm8(dst, rshift, cr);
10117   %}
10118 %}
10119 
10120 // Rotate Right by variable
10121 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
10122 %{
10123   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
10124 
10125   expand %{
10126     rorL_rReg_CL(dst, shift, cr);
10127   %}
10128 %}
10129 
10130 // Rotate Right by variable
10131 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
10132 %{
10133   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
10134 
10135   expand %{
10136     rorL_rReg_CL(dst, shift, cr);
10137   %}
10138 %}
10139 
10140 // Logical Instructions
10141 
10142 // Integer Logical Instructions
10143 
10144 // And Instructions
10145 // And Register with Register
10146 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10147 %{
10148   match(Set dst (AndI dst src));
10149   effect(KILL cr);
10150 
10151   format %{ "andl    $dst, $src\t# int" %}
10152   opcode(0x23);
10153   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10154   ins_pipe(ialu_reg_reg);
10155 %}
10156 
10157 // And Register with Immediate 255
10158 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
10159 %{
10160   match(Set dst (AndI dst src));
10161 
10162   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
10163   opcode(0x0F, 0xB6);
10164   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10165   ins_pipe(ialu_reg);
10166 %}
10167 
10168 // And Register with Immediate 255 and promote to long
10169 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
10170 %{
10171   match(Set dst (ConvI2L (AndI src mask)));
10172 
10173   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
10174   opcode(0x0F, 0xB6);
10175   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10176   ins_pipe(ialu_reg);
10177 %}
10178 
10179 // And Register with Immediate 65535
10180 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
10181 %{
10182   match(Set dst (AndI dst src));
10183 
10184   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
10185   opcode(0x0F, 0xB7);
10186   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10187   ins_pipe(ialu_reg);
10188 %}
10189 
10190 // And Register with Immediate 65535 and promote to long
10191 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
10192 %{
10193   match(Set dst (ConvI2L (AndI src mask)));
10194 
10195   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
10196   opcode(0x0F, 0xB7);
10197   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10198   ins_pipe(ialu_reg);
10199 %}
10200 
10201 // And Register with Immediate
10202 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10203 %{
10204   match(Set dst (AndI dst src));
10205   effect(KILL cr);
10206 
10207   format %{ "andl    $dst, $src\t# int" %}
10208   opcode(0x81, 0x04); /* Opcode 81 /4 */
10209   ins_encode(OpcSErm(dst, src), Con8or32(src));
10210   ins_pipe(ialu_reg);
10211 %}
10212 
10213 // And Register with Memory
10214 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10215 %{
10216   match(Set dst (AndI dst (LoadI src)));
10217   effect(KILL cr);
10218 
10219   ins_cost(125);
10220   format %{ "andl    $dst, $src\t# int" %}
10221   opcode(0x23);
10222   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10223   ins_pipe(ialu_reg_mem);
10224 %}
10225 
10226 // And Memory with Register
10227 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10228 %{
10229   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10230   effect(KILL cr);
10231 
10232   ins_cost(150);
10233   format %{ "andl    $dst, $src\t# int" %}
10234   opcode(0x21); /* Opcode 21 /r */
10235   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10236   ins_pipe(ialu_mem_reg);
10237 %}
10238 
10239 // And Memory with Immediate
10240 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
10241 %{
10242   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10243   effect(KILL cr);
10244 
10245   ins_cost(125);
10246   format %{ "andl    $dst, $src\t# int" %}
10247   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10248   ins_encode(REX_mem(dst), OpcSE(src),
10249              RM_opc_mem(secondary, dst), Con8or32(src));
10250   ins_pipe(ialu_mem_imm);
10251 %}
10252 
10253 // Or Instructions
10254 // Or Register with Register
10255 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10256 %{
10257   match(Set dst (OrI dst src));
10258   effect(KILL cr);
10259 
10260   format %{ "orl     $dst, $src\t# int" %}
10261   opcode(0x0B);
10262   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10263   ins_pipe(ialu_reg_reg);
10264 %}
10265 
10266 // Or Register with Immediate
10267 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10268 %{
10269   match(Set dst (OrI dst src));
10270   effect(KILL cr);
10271 
10272   format %{ "orl     $dst, $src\t# int" %}
10273   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10274   ins_encode(OpcSErm(dst, src), Con8or32(src));
10275   ins_pipe(ialu_reg);
10276 %}
10277 
10278 // Or Register with Memory
10279 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10280 %{
10281   match(Set dst (OrI dst (LoadI src)));
10282   effect(KILL cr);
10283 
10284   ins_cost(125);
10285   format %{ "orl     $dst, $src\t# int" %}
10286   opcode(0x0B);
10287   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10288   ins_pipe(ialu_reg_mem);
10289 %}
10290 
10291 // Or Memory with Register
10292 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10293 %{
10294   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10295   effect(KILL cr);
10296 
10297   ins_cost(150);
10298   format %{ "orl     $dst, $src\t# int" %}
10299   opcode(0x09); /* Opcode 09 /r */
10300   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10301   ins_pipe(ialu_mem_reg);
10302 %}
10303 
10304 // Or Memory with Immediate
10305 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10306 %{
10307   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10308   effect(KILL cr);
10309 
10310   ins_cost(125);
10311   format %{ "orl     $dst, $src\t# int" %}
10312   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10313   ins_encode(REX_mem(dst), OpcSE(src),
10314              RM_opc_mem(secondary, dst), Con8or32(src));
10315   ins_pipe(ialu_mem_imm);
10316 %}
10317 
10318 // Xor Instructions
10319 // Xor Register with Register
10320 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10321 %{
10322   match(Set dst (XorI dst src));
10323   effect(KILL cr);
10324 
10325   format %{ "xorl    $dst, $src\t# int" %}
10326   opcode(0x33);
10327   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10328   ins_pipe(ialu_reg_reg);
10329 %}
10330 
10331 // Xor Register with Immediate -1
10332 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10333   match(Set dst (XorI dst imm));  
10334 
10335   format %{ "not    $dst" %}  
10336   ins_encode %{
10337      __ notl($dst$$Register);
10338   %}
10339   ins_pipe(ialu_reg);
10340 %}
10341 
10342 // Xor Register with Immediate
10343 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10344 %{
10345   match(Set dst (XorI dst src));
10346   effect(KILL cr);
10347 
10348   format %{ "xorl    $dst, $src\t# int" %}
10349   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10350   ins_encode(OpcSErm(dst, src), Con8or32(src));
10351   ins_pipe(ialu_reg);
10352 %}
10353 
10354 // Xor Register with Memory
10355 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10356 %{
10357   match(Set dst (XorI dst (LoadI src)));
10358   effect(KILL cr);
10359 
10360   ins_cost(125);
10361   format %{ "xorl    $dst, $src\t# int" %}
10362   opcode(0x33);
10363   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10364   ins_pipe(ialu_reg_mem);
10365 %}
10366 
10367 // Xor Memory with Register
10368 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10369 %{
10370   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10371   effect(KILL cr);
10372 
10373   ins_cost(150);
10374   format %{ "xorl    $dst, $src\t# int" %}
10375   opcode(0x31); /* Opcode 31 /r */
10376   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10377   ins_pipe(ialu_mem_reg);
10378 %}
10379 
10380 // Xor Memory with Immediate
10381 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10382 %{
10383   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10384   effect(KILL cr);
10385 
10386   ins_cost(125);
10387   format %{ "xorl    $dst, $src\t# int" %}
10388   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10389   ins_encode(REX_mem(dst), OpcSE(src),
10390              RM_opc_mem(secondary, dst), Con8or32(src));
10391   ins_pipe(ialu_mem_imm);
10392 %}
10393 
10394 
10395 // Long Logical Instructions
10396 
10397 // And Instructions
10398 // And Register with Register
10399 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10400 %{
10401   match(Set dst (AndL dst src));
10402   effect(KILL cr);
10403 
10404   format %{ "andq    $dst, $src\t# long" %}
10405   opcode(0x23);
10406   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10407   ins_pipe(ialu_reg_reg);
10408 %}
10409 
10410 // And Register with Immediate 255
10411 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10412 %{
10413   match(Set dst (AndL dst src));
10414 
10415   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10416   opcode(0x0F, 0xB6);
10417   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10418   ins_pipe(ialu_reg);
10419 %}
10420 
10421 // And Register with Immediate 65535
10422 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10423 %{
10424   match(Set dst (AndL dst src));
10425 
10426   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10427   opcode(0x0F, 0xB7);
10428   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10429   ins_pipe(ialu_reg);
10430 %}
10431 
10432 // And Register with Immediate
10433 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10434 %{
10435   match(Set dst (AndL dst src));
10436   effect(KILL cr);
10437 
10438   format %{ "andq    $dst, $src\t# long" %}
10439   opcode(0x81, 0x04); /* Opcode 81 /4 */
10440   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10441   ins_pipe(ialu_reg);
10442 %}
10443 
10444 // And Register with Memory
10445 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10446 %{
10447   match(Set dst (AndL dst (LoadL src)));
10448   effect(KILL cr);
10449 
10450   ins_cost(125);
10451   format %{ "andq    $dst, $src\t# long" %}
10452   opcode(0x23);
10453   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10454   ins_pipe(ialu_reg_mem);
10455 %}
10456 
10457 // And Memory with Register
10458 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10459 %{
10460   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10461   effect(KILL cr);
10462 
10463   ins_cost(150);
10464   format %{ "andq    $dst, $src\t# long" %}
10465   opcode(0x21); /* Opcode 21 /r */
10466   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10467   ins_pipe(ialu_mem_reg);
10468 %}
10469 
10470 // And Memory with Immediate
10471 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10472 %{
10473   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10474   effect(KILL cr);
10475 
10476   ins_cost(125);
10477   format %{ "andq    $dst, $src\t# long" %}
10478   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10479   ins_encode(REX_mem_wide(dst), OpcSE(src),
10480              RM_opc_mem(secondary, dst), Con8or32(src));
10481   ins_pipe(ialu_mem_imm);
10482 %}
10483 
10484 // Or Instructions
10485 // Or Register with Register
10486 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10487 %{
10488   match(Set dst (OrL dst src));
10489   effect(KILL cr);
10490 
10491   format %{ "orq     $dst, $src\t# long" %}
10492   opcode(0x0B);
10493   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10494   ins_pipe(ialu_reg_reg);
10495 %}
10496 
10497 // Use any_RegP to match R15 (TLS register) without spilling.
10498 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10499   match(Set dst (OrL dst (CastP2X src)));
10500   effect(KILL cr);
10501 
10502   format %{ "orq     $dst, $src\t# long" %}
10503   opcode(0x0B);
10504   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10505   ins_pipe(ialu_reg_reg);
10506 %}
10507 
10508 
10509 // Or Register with Immediate
10510 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10511 %{
10512   match(Set dst (OrL dst src));
10513   effect(KILL cr);
10514 
10515   format %{ "orq     $dst, $src\t# long" %}
10516   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10517   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10518   ins_pipe(ialu_reg);
10519 %}
10520 
10521 // Or Register with Memory
10522 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10523 %{
10524   match(Set dst (OrL dst (LoadL src)));
10525   effect(KILL cr);
10526 
10527   ins_cost(125);
10528   format %{ "orq     $dst, $src\t# long" %}
10529   opcode(0x0B);
10530   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10531   ins_pipe(ialu_reg_mem);
10532 %}
10533 
10534 // Or Memory with Register
10535 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10536 %{
10537   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10538   effect(KILL cr);
10539 
10540   ins_cost(150);
10541   format %{ "orq     $dst, $src\t# long" %}
10542   opcode(0x09); /* Opcode 09 /r */
10543   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10544   ins_pipe(ialu_mem_reg);
10545 %}
10546 
10547 // Or Memory with Immediate
10548 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10549 %{
10550   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10551   effect(KILL cr);
10552 
10553   ins_cost(125);
10554   format %{ "orq     $dst, $src\t# long" %}
10555   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10556   ins_encode(REX_mem_wide(dst), OpcSE(src),
10557              RM_opc_mem(secondary, dst), Con8or32(src));
10558   ins_pipe(ialu_mem_imm);
10559 %}
10560 
10561 // Xor Instructions
10562 // Xor Register with Register
10563 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10564 %{
10565   match(Set dst (XorL dst src));
10566   effect(KILL cr);
10567 
10568   format %{ "xorq    $dst, $src\t# long" %}
10569   opcode(0x33);
10570   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10571   ins_pipe(ialu_reg_reg);
10572 %}
10573 
10574 // Xor Register with Immediate -1
10575 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10576   match(Set dst (XorL dst imm));  
10577 
10578   format %{ "notq   $dst" %}  
10579   ins_encode %{
10580      __ notq($dst$$Register);
10581   %}
10582   ins_pipe(ialu_reg);
10583 %}
10584 
10585 // Xor Register with Immediate
10586 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10587 %{
10588   match(Set dst (XorL dst src));
10589   effect(KILL cr);
10590 
10591   format %{ "xorq    $dst, $src\t# long" %}
10592   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10593   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10594   ins_pipe(ialu_reg);
10595 %}
10596 
10597 // Xor Register with Memory
10598 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10599 %{
10600   match(Set dst (XorL dst (LoadL src)));
10601   effect(KILL cr);
10602 
10603   ins_cost(125);
10604   format %{ "xorq    $dst, $src\t# long" %}
10605   opcode(0x33);
10606   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10607   ins_pipe(ialu_reg_mem);
10608 %}
10609 
10610 // Xor Memory with Register
10611 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10612 %{
10613   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10614   effect(KILL cr);
10615 
10616   ins_cost(150);
10617   format %{ "xorq    $dst, $src\t# long" %}
10618   opcode(0x31); /* Opcode 31 /r */
10619   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10620   ins_pipe(ialu_mem_reg);
10621 %}
10622 
10623 // Xor Memory with Immediate
10624 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10625 %{
10626   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10627   effect(KILL cr);
10628 
10629   ins_cost(125);
10630   format %{ "xorq    $dst, $src\t# long" %}
10631   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10632   ins_encode(REX_mem_wide(dst), OpcSE(src),
10633              RM_opc_mem(secondary, dst), Con8or32(src));
10634   ins_pipe(ialu_mem_imm);
10635 %}
10636 
10637 // Convert Int to Boolean
10638 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10639 %{
10640   match(Set dst (Conv2B src));
10641   effect(KILL cr);
10642 
10643   format %{ "testl   $src, $src\t# ci2b\n\t"
10644             "setnz   $dst\n\t"
10645             "movzbl  $dst, $dst" %}
10646   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10647              setNZ_reg(dst),
10648              REX_reg_breg(dst, dst), // movzbl
10649              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10650   ins_pipe(pipe_slow); // XXX
10651 %}
10652 
10653 // Convert Pointer to Boolean
10654 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10655 %{
10656   match(Set dst (Conv2B src));
10657   effect(KILL cr);
10658 
10659   format %{ "testq   $src, $src\t# cp2b\n\t"
10660             "setnz   $dst\n\t"
10661             "movzbl  $dst, $dst" %}
10662   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10663              setNZ_reg(dst),
10664              REX_reg_breg(dst, dst), // movzbl
10665              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10666   ins_pipe(pipe_slow); // XXX
10667 %}
10668 
10669 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10670 %{
10671   match(Set dst (CmpLTMask p q));
10672   effect(KILL cr);
10673 
10674   ins_cost(400); // XXX
10675   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10676             "setlt   $dst\n\t"
10677             "movzbl  $dst, $dst\n\t"
10678             "negl    $dst" %}
10679   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10680              setLT_reg(dst),
10681              REX_reg_breg(dst, dst), // movzbl
10682              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10683              neg_reg(dst));
10684   ins_pipe(pipe_slow);
10685 %}
10686 
10687 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10688 %{
10689   match(Set dst (CmpLTMask dst zero));
10690   effect(KILL cr);
10691 
10692   ins_cost(100); // XXX
10693   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10694   opcode(0xC1, 0x7);  /* C1 /7 ib */
10695   ins_encode(reg_opc_imm(dst, 0x1F));
10696   ins_pipe(ialu_reg);
10697 %}
10698 
10699 
10700 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10701                          rRegI tmp,
10702                          rFlagsReg cr)
10703 %{
10704   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10705   effect(TEMP tmp, KILL cr);
10706 
10707   ins_cost(400); // XXX
10708   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10709             "sbbl    $tmp, $tmp\n\t"
10710             "andl    $tmp, $y\n\t"
10711             "addl    $p, $tmp" %}
10712   ins_encode(enc_cmpLTP(p, q, y, tmp));
10713   ins_pipe(pipe_cmplt);
10714 %}
10715 
10716 /* If I enable this, I encourage spilling in the inner loop of compress.
10717 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10718 %{
10719   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10720   effect( TEMP tmp, KILL cr );
10721   ins_cost(400);
10722 
10723   format %{ "SUB    $p,$q\n\t"
10724             "SBB    RCX,RCX\n\t"
10725             "AND    RCX,$y\n\t"
10726             "ADD    $p,RCX" %}
10727   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10728 %}
10729 */
10730 
10731 //---------- FP Instructions------------------------------------------------
10732 
10733 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10734 %{
10735   match(Set cr (CmpF src1 src2));
10736 
10737   ins_cost(145);
10738   format %{ "ucomiss $src1, $src2\n\t"
10739             "jnp,s   exit\n\t"
10740             "pushfq\t# saw NaN, set CF\n\t"
10741             "andq    [rsp], #0xffffff2b\n\t"
10742             "popfq\n"
10743     "exit:   nop\t# avoid branch to branch" %}
10744   opcode(0x0F, 0x2E);
10745   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10746              cmpfp_fixup);
10747   ins_pipe(pipe_slow);
10748 %}
10749 
10750 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10751   match(Set cr (CmpF src1 src2));
10752 
10753   ins_cost(145);
10754   format %{ "ucomiss $src1, $src2" %}
10755   ins_encode %{
10756     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10757   %}
10758   ins_pipe(pipe_slow);
10759 %}
10760 
10761 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10762 %{
10763   match(Set cr (CmpF src1 (LoadF src2)));
10764 
10765   ins_cost(145);
10766   format %{ "ucomiss $src1, $src2\n\t"
10767             "jnp,s   exit\n\t"
10768             "pushfq\t# saw NaN, set CF\n\t"
10769             "andq    [rsp], #0xffffff2b\n\t"
10770             "popfq\n"
10771     "exit:   nop\t# avoid branch to branch" %}
10772   opcode(0x0F, 0x2E);
10773   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10774              cmpfp_fixup);
10775   ins_pipe(pipe_slow);
10776 %}
10777 
10778 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10779   match(Set cr (CmpF src1 (LoadF src2)));
10780 
10781   ins_cost(100);
10782   format %{ "ucomiss $src1, $src2" %}
10783   opcode(0x0F, 0x2E);
10784   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10785   ins_pipe(pipe_slow);
10786 %}
10787 
10788 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10789 %{
10790   match(Set cr (CmpF src1 src2));
10791 
10792   ins_cost(145);
10793   format %{ "ucomiss $src1, $src2\n\t"
10794             "jnp,s   exit\n\t"
10795             "pushfq\t# saw NaN, set CF\n\t"
10796             "andq    [rsp], #0xffffff2b\n\t"
10797             "popfq\n"
10798     "exit:   nop\t# avoid branch to branch" %}
10799   opcode(0x0F, 0x2E);
10800   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10801              cmpfp_fixup);
10802   ins_pipe(pipe_slow);
10803 %}
10804 
10805 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10806   match(Set cr (CmpF src1 src2));
10807 
10808   ins_cost(100);
10809   format %{ "ucomiss $src1, $src2" %}
10810   opcode(0x0F, 0x2E);
10811   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10812   ins_pipe(pipe_slow);
10813 %}
10814 
10815 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10816 %{
10817   match(Set cr (CmpD src1 src2));
10818 
10819   ins_cost(145);
10820   format %{ "ucomisd $src1, $src2\n\t"
10821             "jnp,s   exit\n\t"
10822             "pushfq\t# saw NaN, set CF\n\t"
10823             "andq    [rsp], #0xffffff2b\n\t"
10824             "popfq\n"
10825     "exit:   nop\t# avoid branch to branch" %}
10826   opcode(0x66, 0x0F, 0x2E);
10827   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10828              cmpfp_fixup);
10829   ins_pipe(pipe_slow);
10830 %}
10831 
10832 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10833   match(Set cr (CmpD src1 src2));
10834 
10835   ins_cost(100);
10836   format %{ "ucomisd $src1, $src2 test" %}
10837   ins_encode %{
10838     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10839   %}
10840   ins_pipe(pipe_slow);
10841 %}
10842 
10843 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10844 %{
10845   match(Set cr (CmpD src1 (LoadD src2)));
10846 
10847   ins_cost(145);
10848   format %{ "ucomisd $src1, $src2\n\t"
10849             "jnp,s   exit\n\t"
10850             "pushfq\t# saw NaN, set CF\n\t"
10851             "andq    [rsp], #0xffffff2b\n\t"
10852             "popfq\n"
10853     "exit:   nop\t# avoid branch to branch" %}
10854   opcode(0x66, 0x0F, 0x2E);
10855   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10856              cmpfp_fixup);
10857   ins_pipe(pipe_slow);
10858 %}
10859 
10860 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10861   match(Set cr (CmpD src1 (LoadD src2)));
10862 
10863   ins_cost(100);
10864   format %{ "ucomisd $src1, $src2" %}
10865   opcode(0x66, 0x0F, 0x2E);
10866   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10867   ins_pipe(pipe_slow);
10868 %}
10869 
10870 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10871 %{
10872   match(Set cr (CmpD src1 src2));
10873 
10874   ins_cost(145);
10875   format %{ "ucomisd $src1, [$src2]\n\t"
10876             "jnp,s   exit\n\t"
10877             "pushfq\t# saw NaN, set CF\n\t"
10878             "andq    [rsp], #0xffffff2b\n\t"
10879             "popfq\n"
10880     "exit:   nop\t# avoid branch to branch" %}
10881   opcode(0x66, 0x0F, 0x2E);
10882   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10883              cmpfp_fixup);
10884   ins_pipe(pipe_slow);
10885 %}
10886 
10887 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10888   match(Set cr (CmpD src1 src2));
10889 
10890   ins_cost(100);
10891   format %{ "ucomisd $src1, [$src2]" %}
10892   opcode(0x66, 0x0F, 0x2E);
10893   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10894   ins_pipe(pipe_slow);
10895 %}
10896 
10897 // Compare into -1,0,1
10898 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10899 %{
10900   match(Set dst (CmpF3 src1 src2));
10901   effect(KILL cr);
10902 
10903   ins_cost(275);
10904   format %{ "ucomiss $src1, $src2\n\t"
10905             "movl    $dst, #-1\n\t"
10906             "jp,s    done\n\t"
10907             "jb,s    done\n\t"
10908             "setne   $dst\n\t"
10909             "movzbl  $dst, $dst\n"
10910     "done:" %}
10911 
10912   opcode(0x0F, 0x2E);
10913   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10914              cmpfp3(dst));
10915   ins_pipe(pipe_slow);
10916 %}
10917 
10918 // Compare into -1,0,1
10919 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10920 %{
10921   match(Set dst (CmpF3 src1 (LoadF src2)));
10922   effect(KILL cr);
10923 
10924   ins_cost(275);
10925   format %{ "ucomiss $src1, $src2\n\t"
10926             "movl    $dst, #-1\n\t"
10927             "jp,s    done\n\t"
10928             "jb,s    done\n\t"
10929             "setne   $dst\n\t"
10930             "movzbl  $dst, $dst\n"
10931     "done:" %}
10932 
10933   opcode(0x0F, 0x2E);
10934   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10935              cmpfp3(dst));
10936   ins_pipe(pipe_slow);
10937 %}
10938 
10939 // Compare into -1,0,1
10940 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10941 %{
10942   match(Set dst (CmpF3 src1 src2));
10943   effect(KILL cr);
10944 
10945   ins_cost(275);
10946   format %{ "ucomiss $src1, [$src2]\n\t"
10947             "movl    $dst, #-1\n\t"
10948             "jp,s    done\n\t"
10949             "jb,s    done\n\t"
10950             "setne   $dst\n\t"
10951             "movzbl  $dst, $dst\n"
10952     "done:" %}
10953 
10954   opcode(0x0F, 0x2E);
10955   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10956              cmpfp3(dst));
10957   ins_pipe(pipe_slow);
10958 %}
10959 
10960 // Compare into -1,0,1
10961 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10962 %{
10963   match(Set dst (CmpD3 src1 src2));
10964   effect(KILL cr);
10965 
10966   ins_cost(275);
10967   format %{ "ucomisd $src1, $src2\n\t"
10968             "movl    $dst, #-1\n\t"
10969             "jp,s    done\n\t"
10970             "jb,s    done\n\t"
10971             "setne   $dst\n\t"
10972             "movzbl  $dst, $dst\n"
10973     "done:" %}
10974 
10975   opcode(0x66, 0x0F, 0x2E);
10976   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10977              cmpfp3(dst));
10978   ins_pipe(pipe_slow);
10979 %}
10980 
10981 // Compare into -1,0,1
10982 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10983 %{
10984   match(Set dst (CmpD3 src1 (LoadD src2)));
10985   effect(KILL cr);
10986 
10987   ins_cost(275);
10988   format %{ "ucomisd $src1, $src2\n\t"
10989             "movl    $dst, #-1\n\t"
10990             "jp,s    done\n\t"
10991             "jb,s    done\n\t"
10992             "setne   $dst\n\t"
10993             "movzbl  $dst, $dst\n"
10994     "done:" %}
10995 
10996   opcode(0x66, 0x0F, 0x2E);
10997   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10998              cmpfp3(dst));
10999   ins_pipe(pipe_slow);
11000 %}
11001 
11002 // Compare into -1,0,1
11003 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
11004 %{
11005   match(Set dst (CmpD3 src1 src2));
11006   effect(KILL cr);
11007 
11008   ins_cost(275);
11009   format %{ "ucomisd $src1, [$src2]\n\t"
11010             "movl    $dst, #-1\n\t"
11011             "jp,s    done\n\t"
11012             "jb,s    done\n\t"
11013             "setne   $dst\n\t"
11014             "movzbl  $dst, $dst\n"
11015     "done:" %}
11016 
11017   opcode(0x66, 0x0F, 0x2E);
11018   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
11019              cmpfp3(dst));
11020   ins_pipe(pipe_slow);
11021 %}
11022 
11023 instruct addF_reg(regF dst, regF src)
11024 %{
11025   match(Set dst (AddF dst src));
11026 
11027   format %{ "addss   $dst, $src" %}
11028   ins_cost(150); // XXX
11029   opcode(0xF3, 0x0F, 0x58);
11030   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11031   ins_pipe(pipe_slow);
11032 %}
11033 
11034 instruct addF_mem(regF dst, memory src)
11035 %{
11036   match(Set dst (AddF dst (LoadF src)));
11037 
11038   format %{ "addss   $dst, $src" %}
11039   ins_cost(150); // XXX
11040   opcode(0xF3, 0x0F, 0x58);
11041   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11042   ins_pipe(pipe_slow);
11043 %}
11044 
11045 instruct addF_imm(regF dst, immF src)
11046 %{
11047   match(Set dst (AddF dst src));
11048 
11049   format %{ "addss   $dst, [$src]" %}
11050   ins_cost(150); // XXX
11051   opcode(0xF3, 0x0F, 0x58);
11052   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11053   ins_pipe(pipe_slow);
11054 %}
11055 
11056 instruct addD_reg(regD dst, regD src)
11057 %{
11058   match(Set dst (AddD dst src));
11059 
11060   format %{ "addsd   $dst, $src" %}
11061   ins_cost(150); // XXX
11062   opcode(0xF2, 0x0F, 0x58);
11063   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11064   ins_pipe(pipe_slow);
11065 %}
11066 
11067 instruct addD_mem(regD dst, memory src)
11068 %{
11069   match(Set dst (AddD dst (LoadD src)));
11070 
11071   format %{ "addsd   $dst, $src" %}
11072   ins_cost(150); // XXX
11073   opcode(0xF2, 0x0F, 0x58);
11074   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11075   ins_pipe(pipe_slow);
11076 %}
11077 
11078 instruct addD_imm(regD dst, immD src)
11079 %{
11080   match(Set dst (AddD dst src));
11081 
11082   format %{ "addsd   $dst, [$src]" %}
11083   ins_cost(150); // XXX
11084   opcode(0xF2, 0x0F, 0x58);
11085   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11086   ins_pipe(pipe_slow);
11087 %}
11088 
11089 instruct subF_reg(regF dst, regF src)
11090 %{
11091   match(Set dst (SubF dst src));
11092 
11093   format %{ "subss   $dst, $src" %}
11094   ins_cost(150); // XXX
11095   opcode(0xF3, 0x0F, 0x5C);
11096   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11097   ins_pipe(pipe_slow);
11098 %}
11099 
11100 instruct subF_mem(regF dst, memory src)
11101 %{
11102   match(Set dst (SubF dst (LoadF src)));
11103 
11104   format %{ "subss   $dst, $src" %}
11105   ins_cost(150); // XXX
11106   opcode(0xF3, 0x0F, 0x5C);
11107   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11108   ins_pipe(pipe_slow);
11109 %}
11110 
11111 instruct subF_imm(regF dst, immF src)
11112 %{
11113   match(Set dst (SubF dst src));
11114 
11115   format %{ "subss   $dst, [$src]" %}
11116   ins_cost(150); // XXX
11117   opcode(0xF3, 0x0F, 0x5C);
11118   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11119   ins_pipe(pipe_slow);
11120 %}
11121 
11122 instruct subD_reg(regD dst, regD src)
11123 %{
11124   match(Set dst (SubD dst src));
11125 
11126   format %{ "subsd   $dst, $src" %}
11127   ins_cost(150); // XXX
11128   opcode(0xF2, 0x0F, 0x5C);
11129   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11130   ins_pipe(pipe_slow);
11131 %}
11132 
11133 instruct subD_mem(regD dst, memory src)
11134 %{
11135   match(Set dst (SubD dst (LoadD src)));
11136 
11137   format %{ "subsd   $dst, $src" %}
11138   ins_cost(150); // XXX
11139   opcode(0xF2, 0x0F, 0x5C);
11140   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11141   ins_pipe(pipe_slow);
11142 %}
11143 
11144 instruct subD_imm(regD dst, immD src)
11145 %{
11146   match(Set dst (SubD dst src));
11147 
11148   format %{ "subsd   $dst, [$src]" %}
11149   ins_cost(150); // XXX
11150   opcode(0xF2, 0x0F, 0x5C);
11151   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11152   ins_pipe(pipe_slow);
11153 %}
11154 
11155 instruct mulF_reg(regF dst, regF src)
11156 %{
11157   match(Set dst (MulF dst src));
11158 
11159   format %{ "mulss   $dst, $src" %}
11160   ins_cost(150); // XXX
11161   opcode(0xF3, 0x0F, 0x59);
11162   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11163   ins_pipe(pipe_slow);
11164 %}
11165 
11166 instruct mulF_mem(regF dst, memory src)
11167 %{
11168   match(Set dst (MulF dst (LoadF src)));
11169 
11170   format %{ "mulss   $dst, $src" %}
11171   ins_cost(150); // XXX
11172   opcode(0xF3, 0x0F, 0x59);
11173   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11174   ins_pipe(pipe_slow);
11175 %}
11176 
11177 instruct mulF_imm(regF dst, immF src)
11178 %{
11179   match(Set dst (MulF dst src));
11180 
11181   format %{ "mulss   $dst, [$src]" %}
11182   ins_cost(150); // XXX
11183   opcode(0xF3, 0x0F, 0x59);
11184   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11185   ins_pipe(pipe_slow);
11186 %}
11187 
11188 instruct mulD_reg(regD dst, regD src)
11189 %{
11190   match(Set dst (MulD dst src));
11191 
11192   format %{ "mulsd   $dst, $src" %}
11193   ins_cost(150); // XXX
11194   opcode(0xF2, 0x0F, 0x59);
11195   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11196   ins_pipe(pipe_slow);
11197 %}
11198 
11199 instruct mulD_mem(regD dst, memory src)
11200 %{
11201   match(Set dst (MulD dst (LoadD src)));
11202 
11203   format %{ "mulsd   $dst, $src" %}
11204   ins_cost(150); // XXX
11205   opcode(0xF2, 0x0F, 0x59);
11206   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11207   ins_pipe(pipe_slow);
11208 %}
11209 
11210 instruct mulD_imm(regD dst, immD src)
11211 %{
11212   match(Set dst (MulD dst src));
11213 
11214   format %{ "mulsd   $dst, [$src]" %}
11215   ins_cost(150); // XXX
11216   opcode(0xF2, 0x0F, 0x59);
11217   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11218   ins_pipe(pipe_slow);
11219 %}
11220 
11221 instruct divF_reg(regF dst, regF src)
11222 %{
11223   match(Set dst (DivF dst src));
11224 
11225   format %{ "divss   $dst, $src" %}
11226   ins_cost(150); // XXX
11227   opcode(0xF3, 0x0F, 0x5E);
11228   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11229   ins_pipe(pipe_slow);
11230 %}
11231 
11232 instruct divF_mem(regF dst, memory src)
11233 %{
11234   match(Set dst (DivF dst (LoadF src)));
11235 
11236   format %{ "divss   $dst, $src" %}
11237   ins_cost(150); // XXX
11238   opcode(0xF3, 0x0F, 0x5E);
11239   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11240   ins_pipe(pipe_slow);
11241 %}
11242 
11243 instruct divF_imm(regF dst, immF src)
11244 %{
11245   match(Set dst (DivF dst src));
11246 
11247   format %{ "divss   $dst, [$src]" %}
11248   ins_cost(150); // XXX
11249   opcode(0xF3, 0x0F, 0x5E);
11250   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11251   ins_pipe(pipe_slow);
11252 %}
11253 
11254 instruct divD_reg(regD dst, regD src)
11255 %{
11256   match(Set dst (DivD dst src));
11257 
11258   format %{ "divsd   $dst, $src" %}
11259   ins_cost(150); // XXX
11260   opcode(0xF2, 0x0F, 0x5E);
11261   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11262   ins_pipe(pipe_slow);
11263 %}
11264 
11265 instruct divD_mem(regD dst, memory src)
11266 %{
11267   match(Set dst (DivD dst (LoadD src)));
11268 
11269   format %{ "divsd   $dst, $src" %}
11270   ins_cost(150); // XXX
11271   opcode(0xF2, 0x0F, 0x5E);
11272   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11273   ins_pipe(pipe_slow);
11274 %}
11275 
11276 instruct divD_imm(regD dst, immD src)
11277 %{
11278   match(Set dst (DivD dst src));
11279 
11280   format %{ "divsd   $dst, [$src]" %}
11281   ins_cost(150); // XXX
11282   opcode(0xF2, 0x0F, 0x5E);
11283   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11284   ins_pipe(pipe_slow);
11285 %}
11286 
11287 instruct sqrtF_reg(regF dst, regF src)
11288 %{
11289   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11290 
11291   format %{ "sqrtss  $dst, $src" %}
11292   ins_cost(150); // XXX
11293   opcode(0xF3, 0x0F, 0x51);
11294   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11295   ins_pipe(pipe_slow);
11296 %}
11297 
11298 instruct sqrtF_mem(regF dst, memory src)
11299 %{
11300   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
11301 
11302   format %{ "sqrtss  $dst, $src" %}
11303   ins_cost(150); // XXX
11304   opcode(0xF3, 0x0F, 0x51);
11305   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11306   ins_pipe(pipe_slow);
11307 %}
11308 
11309 instruct sqrtF_imm(regF dst, immF src)
11310 %{
11311   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11312 
11313   format %{ "sqrtss  $dst, [$src]" %}
11314   ins_cost(150); // XXX
11315   opcode(0xF3, 0x0F, 0x51);
11316   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11317   ins_pipe(pipe_slow);
11318 %}
11319 
11320 instruct sqrtD_reg(regD dst, regD src)
11321 %{
11322   match(Set dst (SqrtD src));
11323 
11324   format %{ "sqrtsd  $dst, $src" %}
11325   ins_cost(150); // XXX
11326   opcode(0xF2, 0x0F, 0x51);
11327   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11328   ins_pipe(pipe_slow);
11329 %}
11330 
11331 instruct sqrtD_mem(regD dst, memory src)
11332 %{
11333   match(Set dst (SqrtD (LoadD src)));
11334 
11335   format %{ "sqrtsd  $dst, $src" %}
11336   ins_cost(150); // XXX
11337   opcode(0xF2, 0x0F, 0x51);
11338   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11339   ins_pipe(pipe_slow);
11340 %}
11341 
11342 instruct sqrtD_imm(regD dst, immD src)
11343 %{
11344   match(Set dst (SqrtD src));
11345 
11346   format %{ "sqrtsd  $dst, [$src]" %}
11347   ins_cost(150); // XXX
11348   opcode(0xF2, 0x0F, 0x51);
11349   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11350   ins_pipe(pipe_slow);
11351 %}
11352 
11353 instruct absF_reg(regF dst)
11354 %{
11355   match(Set dst (AbsF dst));
11356 
11357   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
11358   ins_encode(absF_encoding(dst));
11359   ins_pipe(pipe_slow);
11360 %}
11361 
11362 instruct absD_reg(regD dst)
11363 %{
11364   match(Set dst (AbsD dst));
11365 
11366   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
11367             "# abs double by sign masking" %}
11368   ins_encode(absD_encoding(dst));
11369   ins_pipe(pipe_slow);
11370 %}
11371 
11372 instruct negF_reg(regF dst)
11373 %{
11374   match(Set dst (NegF dst));
11375 
11376   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
11377   ins_encode(negF_encoding(dst));
11378   ins_pipe(pipe_slow);
11379 %}
11380 
11381 instruct negD_reg(regD dst)
11382 %{
11383   match(Set dst (NegD dst));
11384 
11385   format %{ "xorpd   $dst, [0x8000000000000000]\t"
11386             "# neg double by sign flipping" %}
11387   ins_encode(negD_encoding(dst));
11388   ins_pipe(pipe_slow);
11389 %}
11390 
11391 // -----------Trig and Trancendental Instructions------------------------------
11392 instruct cosD_reg(regD dst) %{
11393   match(Set dst (CosD dst));
11394 
11395   format %{ "dcos   $dst\n\t" %}
11396   opcode(0xD9, 0xFF);
11397   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11398   ins_pipe( pipe_slow );
11399 %}
11400 
11401 instruct sinD_reg(regD dst) %{
11402   match(Set dst (SinD dst));
11403 
11404   format %{ "dsin   $dst\n\t" %}
11405   opcode(0xD9, 0xFE);
11406   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11407   ins_pipe( pipe_slow );
11408 %}
11409 
11410 instruct tanD_reg(regD dst) %{
11411   match(Set dst (TanD dst));
11412 
11413   format %{ "dtan   $dst\n\t" %}
11414   ins_encode( Push_SrcXD(dst),
11415               Opcode(0xD9), Opcode(0xF2),   //fptan
11416               Opcode(0xDD), Opcode(0xD8),   //fstp st
11417               Push_ResultXD(dst) );
11418   ins_pipe( pipe_slow );
11419 %}
11420 
11421 instruct log10D_reg(regD dst) %{
11422   // The source and result Double operands in XMM registers
11423   match(Set dst (Log10D dst));
11424   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11425   // fyl2x        ; compute log_10(2) * log_2(x)
11426   format %{ "fldlg2\t\t\t#Log10\n\t"
11427             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11428          %}
11429    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11430               Push_SrcXD(dst),
11431               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11432               Push_ResultXD(dst));
11433 
11434   ins_pipe( pipe_slow );
11435 %}
11436 
11437 instruct logD_reg(regD dst) %{
11438   // The source and result Double operands in XMM registers
11439   match(Set dst (LogD dst));
11440   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11441   // fyl2x        ; compute log_e(2) * log_2(x)
11442   format %{ "fldln2\t\t\t#Log_e\n\t"
11443             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11444          %}
11445   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11446               Push_SrcXD(dst),
11447               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11448               Push_ResultXD(dst));
11449   ins_pipe( pipe_slow );
11450 %}
11451 
11452 
11453 
11454 //----------Arithmetic Conversion Instructions---------------------------------
11455 
11456 instruct roundFloat_nop(regF dst)
11457 %{
11458   match(Set dst (RoundFloat dst));
11459 
11460   ins_cost(0);
11461   ins_encode();
11462   ins_pipe(empty);
11463 %}
11464 
11465 instruct roundDouble_nop(regD dst)
11466 %{
11467   match(Set dst (RoundDouble dst));
11468 
11469   ins_cost(0);
11470   ins_encode();
11471   ins_pipe(empty);
11472 %}
11473 
11474 instruct convF2D_reg_reg(regD dst, regF src)
11475 %{
11476   match(Set dst (ConvF2D src));
11477 
11478   format %{ "cvtss2sd $dst, $src" %}
11479   opcode(0xF3, 0x0F, 0x5A);
11480   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11481   ins_pipe(pipe_slow); // XXX
11482 %}
11483 
11484 instruct convF2D_reg_mem(regD dst, memory src)
11485 %{
11486   match(Set dst (ConvF2D (LoadF src)));
11487 
11488   format %{ "cvtss2sd $dst, $src" %}
11489   opcode(0xF3, 0x0F, 0x5A);
11490   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11491   ins_pipe(pipe_slow); // XXX
11492 %}
11493 
11494 instruct convD2F_reg_reg(regF dst, regD src)
11495 %{
11496   match(Set dst (ConvD2F src));
11497 
11498   format %{ "cvtsd2ss $dst, $src" %}
11499   opcode(0xF2, 0x0F, 0x5A);
11500   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11501   ins_pipe(pipe_slow); // XXX
11502 %}
11503 
11504 instruct convD2F_reg_mem(regF dst, memory src)
11505 %{
11506   match(Set dst (ConvD2F (LoadD src)));
11507 
11508   format %{ "cvtsd2ss $dst, $src" %}
11509   opcode(0xF2, 0x0F, 0x5A);
11510   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11511   ins_pipe(pipe_slow); // XXX
11512 %}
11513 
11514 // XXX do mem variants
11515 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11516 %{
11517   match(Set dst (ConvF2I src));
11518   effect(KILL cr);
11519 
11520   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11521             "cmpl    $dst, #0x80000000\n\t"
11522             "jne,s   done\n\t"
11523             "subq    rsp, #8\n\t"
11524             "movss   [rsp], $src\n\t"
11525             "call    f2i_fixup\n\t"
11526             "popq    $dst\n"
11527     "done:   "%}
11528   opcode(0xF3, 0x0F, 0x2C);
11529   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11530              f2i_fixup(dst, src));
11531   ins_pipe(pipe_slow);
11532 %}
11533 
11534 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11535 %{
11536   match(Set dst (ConvF2L src));
11537   effect(KILL cr);
11538 
11539   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11540             "cmpq    $dst, [0x8000000000000000]\n\t"
11541             "jne,s   done\n\t"
11542             "subq    rsp, #8\n\t"
11543             "movss   [rsp], $src\n\t"
11544             "call    f2l_fixup\n\t"
11545             "popq    $dst\n"
11546     "done:   "%}
11547   opcode(0xF3, 0x0F, 0x2C);
11548   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11549              f2l_fixup(dst, src));
11550   ins_pipe(pipe_slow);
11551 %}
11552 
11553 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11554 %{
11555   match(Set dst (ConvD2I src));
11556   effect(KILL cr);
11557 
11558   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11559             "cmpl    $dst, #0x80000000\n\t"
11560             "jne,s   done\n\t"
11561             "subq    rsp, #8\n\t"
11562             "movsd   [rsp], $src\n\t"
11563             "call    d2i_fixup\n\t"
11564             "popq    $dst\n"
11565     "done:   "%}
11566   opcode(0xF2, 0x0F, 0x2C);
11567   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11568              d2i_fixup(dst, src));
11569   ins_pipe(pipe_slow);
11570 %}
11571 
11572 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11573 %{
11574   match(Set dst (ConvD2L src));
11575   effect(KILL cr);
11576 
11577   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11578             "cmpq    $dst, [0x8000000000000000]\n\t"
11579             "jne,s   done\n\t"
11580             "subq    rsp, #8\n\t"
11581             "movsd   [rsp], $src\n\t"
11582             "call    d2l_fixup\n\t"
11583             "popq    $dst\n"
11584     "done:   "%}
11585   opcode(0xF2, 0x0F, 0x2C);
11586   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11587              d2l_fixup(dst, src));
11588   ins_pipe(pipe_slow);
11589 %}
11590 
11591 instruct convI2F_reg_reg(regF dst, rRegI src)
11592 %{
11593   predicate(!UseXmmI2F);
11594   match(Set dst (ConvI2F src));
11595 
11596   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11597   opcode(0xF3, 0x0F, 0x2A);
11598   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11599   ins_pipe(pipe_slow); // XXX
11600 %}
11601 
11602 instruct convI2F_reg_mem(regF dst, memory src)
11603 %{
11604   match(Set dst (ConvI2F (LoadI src)));
11605 
11606   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11607   opcode(0xF3, 0x0F, 0x2A);
11608   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11609   ins_pipe(pipe_slow); // XXX
11610 %}
11611 
11612 instruct convI2D_reg_reg(regD dst, rRegI src)
11613 %{
11614   predicate(!UseXmmI2D);
11615   match(Set dst (ConvI2D src));
11616 
11617   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11618   opcode(0xF2, 0x0F, 0x2A);
11619   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11620   ins_pipe(pipe_slow); // XXX
11621 %}
11622 
11623 instruct convI2D_reg_mem(regD dst, memory src)
11624 %{
11625   match(Set dst (ConvI2D (LoadI src)));
11626 
11627   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11628   opcode(0xF2, 0x0F, 0x2A);
11629   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11630   ins_pipe(pipe_slow); // XXX
11631 %}
11632 
11633 instruct convXI2F_reg(regF dst, rRegI src)
11634 %{
11635   predicate(UseXmmI2F);
11636   match(Set dst (ConvI2F src));
11637 
11638   format %{ "movdl $dst, $src\n\t"
11639             "cvtdq2psl $dst, $dst\t# i2f" %}
11640   ins_encode %{
11641     __ movdl($dst$$XMMRegister, $src$$Register);
11642     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11643   %}
11644   ins_pipe(pipe_slow); // XXX
11645 %}
11646 
11647 instruct convXI2D_reg(regD dst, rRegI src)
11648 %{
11649   predicate(UseXmmI2D);
11650   match(Set dst (ConvI2D src));
11651 
11652   format %{ "movdl $dst, $src\n\t"
11653             "cvtdq2pdl $dst, $dst\t# i2d" %}
11654   ins_encode %{
11655     __ movdl($dst$$XMMRegister, $src$$Register);
11656     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11657   %}
11658   ins_pipe(pipe_slow); // XXX
11659 %}
11660 
11661 instruct convL2F_reg_reg(regF dst, rRegL src)
11662 %{
11663   match(Set dst (ConvL2F src));
11664 
11665   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11666   opcode(0xF3, 0x0F, 0x2A);
11667   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11668   ins_pipe(pipe_slow); // XXX
11669 %}
11670 
11671 instruct convL2F_reg_mem(regF dst, memory src)
11672 %{
11673   match(Set dst (ConvL2F (LoadL src)));
11674 
11675   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11676   opcode(0xF3, 0x0F, 0x2A);
11677   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11678   ins_pipe(pipe_slow); // XXX
11679 %}
11680 
11681 instruct convL2D_reg_reg(regD dst, rRegL src)
11682 %{
11683   match(Set dst (ConvL2D src));
11684 
11685   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11686   opcode(0xF2, 0x0F, 0x2A);
11687   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11688   ins_pipe(pipe_slow); // XXX
11689 %}
11690 
11691 instruct convL2D_reg_mem(regD dst, memory src)
11692 %{
11693   match(Set dst (ConvL2D (LoadL src)));
11694 
11695   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11696   opcode(0xF2, 0x0F, 0x2A);
11697   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11698   ins_pipe(pipe_slow); // XXX
11699 %}
11700 
11701 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11702 %{
11703   match(Set dst (ConvI2L src));
11704 
11705   ins_cost(125);
11706   format %{ "movslq  $dst, $src\t# i2l" %}
11707   ins_encode %{
11708     __ movslq($dst$$Register, $src$$Register);
11709   %}
11710   ins_pipe(ialu_reg_reg);
11711 %}
11712 
11713 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11714 // %{
11715 //   match(Set dst (ConvI2L src));
11716 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11717 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11718 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11719 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11720 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11721 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11722 
11723 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11724 //   ins_encode(enc_copy(dst, src));
11725 // //   opcode(0x63); // needs REX.W
11726 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11727 //   ins_pipe(ialu_reg_reg);
11728 // %}
11729 
11730 // Zero-extend convert int to long
11731 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11732 %{
11733   match(Set dst (AndL (ConvI2L src) mask));
11734 
11735   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11736   ins_encode(enc_copy(dst, src));
11737   ins_pipe(ialu_reg_reg);
11738 %}
11739 
11740 // Zero-extend convert int to long
11741 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11742 %{
11743   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11744 
11745   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11746   opcode(0x8B);
11747   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11748   ins_pipe(ialu_reg_mem);
11749 %}
11750 
11751 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11752 %{
11753   match(Set dst (AndL src mask));
11754 
11755   format %{ "movl    $dst, $src\t# zero-extend long" %}
11756   ins_encode(enc_copy_always(dst, src));
11757   ins_pipe(ialu_reg_reg);
11758 %}
11759 
11760 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11761 %{
11762   match(Set dst (ConvL2I src));
11763 
11764   format %{ "movl    $dst, $src\t# l2i" %}
11765   ins_encode(enc_copy_always(dst, src));
11766   ins_pipe(ialu_reg_reg);
11767 %}
11768 
11769 
11770 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11771   match(Set dst (MoveF2I src));
11772   effect(DEF dst, USE src);
11773 
11774   ins_cost(125);
11775   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11776   opcode(0x8B);
11777   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11778   ins_pipe(ialu_reg_mem);
11779 %}
11780 
11781 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11782   match(Set dst (MoveI2F src));
11783   effect(DEF dst, USE src);
11784 
11785   ins_cost(125);
11786   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11787   opcode(0xF3, 0x0F, 0x10);
11788   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11789   ins_pipe(pipe_slow);
11790 %}
11791 
11792 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11793   match(Set dst (MoveD2L src));
11794   effect(DEF dst, USE src);
11795 
11796   ins_cost(125);
11797   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11798   opcode(0x8B);
11799   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11800   ins_pipe(ialu_reg_mem);
11801 %}
11802 
11803 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11804   predicate(!UseXmmLoadAndClearUpper);
11805   match(Set dst (MoveL2D src));
11806   effect(DEF dst, USE src);
11807 
11808   ins_cost(125);
11809   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11810   opcode(0x66, 0x0F, 0x12);
11811   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11812   ins_pipe(pipe_slow);
11813 %}
11814 
11815 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11816   predicate(UseXmmLoadAndClearUpper);
11817   match(Set dst (MoveL2D src));
11818   effect(DEF dst, USE src);
11819 
11820   ins_cost(125);
11821   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11822   opcode(0xF2, 0x0F, 0x10);
11823   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11824   ins_pipe(pipe_slow);
11825 %}
11826 
11827 
11828 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11829   match(Set dst (MoveF2I src));
11830   effect(DEF dst, USE src);
11831 
11832   ins_cost(95); // XXX
11833   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11834   opcode(0xF3, 0x0F, 0x11);
11835   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11836   ins_pipe(pipe_slow);
11837 %}
11838 
11839 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11840   match(Set dst (MoveI2F src));
11841   effect(DEF dst, USE src);
11842 
11843   ins_cost(100);
11844   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11845   opcode(0x89);
11846   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11847   ins_pipe( ialu_mem_reg );
11848 %}
11849 
11850 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11851   match(Set dst (MoveD2L src));
11852   effect(DEF dst, USE src);
11853 
11854   ins_cost(95); // XXX
11855   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11856   opcode(0xF2, 0x0F, 0x11);
11857   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11858   ins_pipe(pipe_slow);
11859 %}
11860 
11861 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11862   match(Set dst (MoveL2D src));
11863   effect(DEF dst, USE src);
11864 
11865   ins_cost(100);
11866   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11867   opcode(0x89);
11868   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11869   ins_pipe(ialu_mem_reg);
11870 %}
11871 
11872 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11873   match(Set dst (MoveF2I src));
11874   effect(DEF dst, USE src);
11875   ins_cost(85);
11876   format %{ "movd    $dst,$src\t# MoveF2I" %}
11877   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11878   ins_pipe( pipe_slow );
11879 %}
11880 
11881 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11882   match(Set dst (MoveD2L src));
11883   effect(DEF dst, USE src);
11884   ins_cost(85);
11885   format %{ "movd    $dst,$src\t# MoveD2L" %}
11886   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11887   ins_pipe( pipe_slow );
11888 %}
11889 
11890 // The next instructions have long latency and use Int unit. Set high cost.
11891 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11892   match(Set dst (MoveI2F src));
11893   effect(DEF dst, USE src);
11894   ins_cost(300);
11895   format %{ "movd    $dst,$src\t# MoveI2F" %}
11896   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11897   ins_pipe( pipe_slow );
11898 %}
11899 
11900 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11901   match(Set dst (MoveL2D src));
11902   effect(DEF dst, USE src);
11903   ins_cost(300);
11904   format %{ "movd    $dst,$src\t# MoveL2D" %}
11905   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11906   ins_pipe( pipe_slow );
11907 %}
11908 
11909 // Replicate scalar to packed byte (1 byte) values in xmm
11910 instruct Repl8B_reg(regD dst, regD src) %{
11911   match(Set dst (Replicate8B src));
11912   format %{ "MOVDQA  $dst,$src\n\t"
11913             "PUNPCKLBW $dst,$dst\n\t"
11914             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11915   ins_encode( pshufd_8x8(dst, src));
11916   ins_pipe( pipe_slow );
11917 %}
11918 
11919 // Replicate scalar to packed byte (1 byte) values in xmm
11920 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11921   match(Set dst (Replicate8B src));
11922   format %{ "MOVD    $dst,$src\n\t"
11923             "PUNPCKLBW $dst,$dst\n\t"
11924             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11925   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11926   ins_pipe( pipe_slow );
11927 %}
11928 
11929 // Replicate scalar zero to packed byte (1 byte) values in xmm
11930 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11931   match(Set dst (Replicate8B zero));
11932   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11933   ins_encode( pxor(dst, dst));
11934   ins_pipe( fpu_reg_reg );
11935 %}
11936 
11937 // Replicate scalar to packed shore (2 byte) values in xmm
11938 instruct Repl4S_reg(regD dst, regD src) %{
11939   match(Set dst (Replicate4S src));
11940   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11941   ins_encode( pshufd_4x16(dst, src));
11942   ins_pipe( fpu_reg_reg );
11943 %}
11944 
11945 // Replicate scalar to packed shore (2 byte) values in xmm
11946 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11947   match(Set dst (Replicate4S src));
11948   format %{ "MOVD    $dst,$src\n\t"
11949             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11950   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11951   ins_pipe( fpu_reg_reg );
11952 %}
11953 
11954 // Replicate scalar zero to packed short (2 byte) values in xmm
11955 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11956   match(Set dst (Replicate4S zero));
11957   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11958   ins_encode( pxor(dst, dst));
11959   ins_pipe( fpu_reg_reg );
11960 %}
11961 
11962 // Replicate scalar to packed char (2 byte) values in xmm
11963 instruct Repl4C_reg(regD dst, regD src) %{
11964   match(Set dst (Replicate4C src));
11965   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11966   ins_encode( pshufd_4x16(dst, src));
11967   ins_pipe( fpu_reg_reg );
11968 %}
11969 
11970 // Replicate scalar to packed char (2 byte) values in xmm
11971 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11972   match(Set dst (Replicate4C src));
11973   format %{ "MOVD    $dst,$src\n\t"
11974             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11975   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11976   ins_pipe( fpu_reg_reg );
11977 %}
11978 
11979 // Replicate scalar zero to packed char (2 byte) values in xmm
11980 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11981   match(Set dst (Replicate4C zero));
11982   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11983   ins_encode( pxor(dst, dst));
11984   ins_pipe( fpu_reg_reg );
11985 %}
11986 
11987 // Replicate scalar to packed integer (4 byte) values in xmm
11988 instruct Repl2I_reg(regD dst, regD src) %{
11989   match(Set dst (Replicate2I src));
11990   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11991   ins_encode( pshufd(dst, src, 0x00));
11992   ins_pipe( fpu_reg_reg );
11993 %}
11994 
11995 // Replicate scalar to packed integer (4 byte) values in xmm
11996 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11997   match(Set dst (Replicate2I src));
11998   format %{ "MOVD   $dst,$src\n\t"
11999             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12000   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12001   ins_pipe( fpu_reg_reg );
12002 %}
12003 
12004 // Replicate scalar zero to packed integer (2 byte) values in xmm
12005 instruct Repl2I_immI0(regD dst, immI0 zero) %{
12006   match(Set dst (Replicate2I zero));
12007   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12008   ins_encode( pxor(dst, dst));
12009   ins_pipe( fpu_reg_reg );
12010 %}
12011 
12012 // Replicate scalar to packed single precision floating point values in xmm
12013 instruct Repl2F_reg(regD dst, regD src) %{
12014   match(Set dst (Replicate2F src));
12015   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12016   ins_encode( pshufd(dst, src, 0xe0));
12017   ins_pipe( fpu_reg_reg );
12018 %}
12019 
12020 // Replicate scalar to packed single precision floating point values in xmm
12021 instruct Repl2F_regF(regD dst, regF src) %{
12022   match(Set dst (Replicate2F src));
12023   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12024   ins_encode( pshufd(dst, src, 0xe0));
12025   ins_pipe( fpu_reg_reg );
12026 %}
12027 
12028 // Replicate scalar to packed single precision floating point values in xmm
12029 instruct Repl2F_immF0(regD dst, immF0 zero) %{
12030   match(Set dst (Replicate2F zero));
12031   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12032   ins_encode( pxor(dst, dst));
12033   ins_pipe( fpu_reg_reg );
12034 %}
12035 
12036 
12037 // =======================================================================
12038 // fast clearing of an array
12039 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
12040                   rFlagsReg cr)
12041 %{
12042   match(Set dummy (ClearArray cnt base));
12043   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12044 
12045   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
12046             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
12047   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
12048              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
12049   ins_pipe(pipe_slow);
12050 %}
12051 
12052 instruct string_compare(rdi_RegP str1, rsi_RegP str2, rbx_RegI cnt1, rax_RegI cnt2,
12053                         regD tmp1, regD tmp2, rcx_RegI result, rFlagsReg cr)
12054 %{
12055   match(Set result (StrComp (Binary str1 str2) (Binary cnt1 cnt2)));
12056   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12057   //ins_cost(300);
12058 
12059   format %{ "String Compare $str1,$str2,$cnt1,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
12060   ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, tmp1, tmp2, result) );
12061   ins_pipe( pipe_slow );
12062 %}
12063 
12064 instruct string_indexof(rsi_RegP str1, rdi_RegP str2, rdx_RegI cnt1, rax_RegI cnt2,
12065                         regD tmp1, rcx_RegI tmp2, rbx_RegI result, rFlagsReg cr)
12066 %{
12067   predicate(UseSSE42Intrinsics);
12068   match(Set result (StrIndexOf (Binary str1 str2) (Binary cnt1 cnt2)));
12069   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
12070 
12071   format %{ "String IndexOf $str1,$str2,$cnt1,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
12072   ins_encode( enc_String_IndexOf(str1, str2, cnt1, cnt2, tmp1, tmp2, result) );
12073   ins_pipe( pipe_slow );
12074 %}
12075 
12076 // fast string equals
12077 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, regD tmp1, regD tmp2,
12078                        rbx_RegI tmp3, rax_RegI result, rFlagsReg cr)
12079 %{
12080   match(Set result (StrEquals (Binary str1 str2) cnt));
12081   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12082 
12083   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12084   ins_encode( enc_String_Equals(str1, str2, cnt, tmp1, tmp2, tmp3, result) );
12085   ins_pipe( pipe_slow );
12086 %}
12087 
12088 // fast array equals
12089 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3,
12090                       rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
12091 %{
12092   match(Set result (AryEq ary1 ary2));
12093   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12094   //ins_cost(300);
12095 
12096   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL RAX, RBX" %}
12097   ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
12098   ins_pipe( pipe_slow );
12099 %}
12100 
12101 //----------Control Flow Instructions------------------------------------------
12102 // Signed compare Instructions
12103 
12104 // XXX more variants!!
12105 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12106 %{
12107   match(Set cr (CmpI op1 op2));
12108   effect(DEF cr, USE op1, USE op2);
12109 
12110   format %{ "cmpl    $op1, $op2" %}
12111   opcode(0x3B);  /* Opcode 3B /r */
12112   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
12113   ins_pipe(ialu_cr_reg_reg);
12114 %}
12115 
12116 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12117 %{
12118   match(Set cr (CmpI op1 op2));
12119 
12120   format %{ "cmpl    $op1, $op2" %}
12121   opcode(0x81, 0x07); /* Opcode 81 /7 */
12122   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
12123   ins_pipe(ialu_cr_reg_imm);
12124 %}
12125 
12126 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12127 %{
12128   match(Set cr (CmpI op1 (LoadI op2)));
12129 
12130   ins_cost(500); // XXX
12131   format %{ "cmpl    $op1, $op2" %}
12132   opcode(0x3B); /* Opcode 3B /r */
12133   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
12134   ins_pipe(ialu_cr_reg_mem);
12135 %}
12136 
12137 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
12138 %{
12139   match(Set cr (CmpI src zero));
12140 
12141   format %{ "testl   $src, $src" %}
12142   opcode(0x85);
12143   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
12144   ins_pipe(ialu_cr_reg_imm);
12145 %}
12146 
12147 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
12148 %{
12149   match(Set cr (CmpI (AndI src con) zero));
12150 
12151   format %{ "testl   $src, $con" %}
12152   opcode(0xF7, 0x00);
12153   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
12154   ins_pipe(ialu_cr_reg_imm);
12155 %}
12156 
12157 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
12158 %{
12159   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12160 
12161   format %{ "testl   $src, $mem" %}
12162   opcode(0x85);
12163   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
12164   ins_pipe(ialu_cr_reg_mem);
12165 %}
12166 
12167 // Unsigned compare Instructions; really, same as signed except they
12168 // produce an rFlagsRegU instead of rFlagsReg.
12169 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12170 %{
12171   match(Set cr (CmpU op1 op2));
12172 
12173   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12174   opcode(0x3B); /* Opcode 3B /r */
12175   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
12176   ins_pipe(ialu_cr_reg_reg);
12177 %}
12178 
12179 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12180 %{
12181   match(Set cr (CmpU op1 op2));
12182 
12183   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12184   opcode(0x81,0x07); /* Opcode 81 /7 */
12185   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
12186   ins_pipe(ialu_cr_reg_imm);
12187 %}
12188 
12189 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12190 %{
12191   match(Set cr (CmpU op1 (LoadI op2)));
12192 
12193   ins_cost(500); // XXX
12194   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12195   opcode(0x3B); /* Opcode 3B /r */
12196   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
12197   ins_pipe(ialu_cr_reg_mem);
12198 %}
12199 
12200 // // // Cisc-spilled version of cmpU_rReg
12201 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12202 // //%{
12203 // //  match(Set cr (CmpU (LoadI op1) op2));
12204 // //
12205 // //  format %{ "CMPu   $op1,$op2" %}
12206 // //  ins_cost(500);
12207 // //  opcode(0x39);  /* Opcode 39 /r */
12208 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12209 // //%}
12210 
12211 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
12212 %{
12213   match(Set cr (CmpU src zero));
12214 
12215   format %{ "testl  $src, $src\t# unsigned" %}
12216   opcode(0x85);
12217   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
12218   ins_pipe(ialu_cr_reg_imm);
12219 %}
12220 
12221 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12222 %{
12223   match(Set cr (CmpP op1 op2));
12224 
12225   format %{ "cmpq    $op1, $op2\t# ptr" %}
12226   opcode(0x3B); /* Opcode 3B /r */
12227   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12228   ins_pipe(ialu_cr_reg_reg);
12229 %}
12230 
12231 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12232 %{
12233   match(Set cr (CmpP op1 (LoadP op2)));
12234 
12235   ins_cost(500); // XXX
12236   format %{ "cmpq    $op1, $op2\t# ptr" %}
12237   opcode(0x3B); /* Opcode 3B /r */
12238   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12239   ins_pipe(ialu_cr_reg_mem);
12240 %}
12241 
12242 // // // Cisc-spilled version of cmpP_rReg
12243 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12244 // //%{
12245 // //  match(Set cr (CmpP (LoadP op1) op2));
12246 // //
12247 // //  format %{ "CMPu   $op1,$op2" %}
12248 // //  ins_cost(500);
12249 // //  opcode(0x39);  /* Opcode 39 /r */
12250 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12251 // //%}
12252 
12253 // XXX this is generalized by compP_rReg_mem???
12254 // Compare raw pointer (used in out-of-heap check).
12255 // Only works because non-oop pointers must be raw pointers
12256 // and raw pointers have no anti-dependencies.
12257 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12258 %{
12259   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
12260   match(Set cr (CmpP op1 (LoadP op2)));
12261 
12262   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12263   opcode(0x3B); /* Opcode 3B /r */
12264   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12265   ins_pipe(ialu_cr_reg_mem);
12266 %}
12267 
12268 // This will generate a signed flags result. This should be OK since
12269 // any compare to a zero should be eq/neq.
12270 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12271 %{
12272   match(Set cr (CmpP src zero));
12273 
12274   format %{ "testq   $src, $src\t# ptr" %}
12275   opcode(0x85);
12276   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12277   ins_pipe(ialu_cr_reg_imm);
12278 %}
12279 
12280 // This will generate a signed flags result. This should be OK since
12281 // any compare to a zero should be eq/neq.
12282 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12283 %{
12284   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
12285   match(Set cr (CmpP (LoadP op) zero));
12286 
12287   ins_cost(500); // XXX
12288   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12289   opcode(0xF7); /* Opcode F7 /0 */
12290   ins_encode(REX_mem_wide(op),
12291              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
12292   ins_pipe(ialu_cr_reg_imm);
12293 %}
12294 
12295 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12296 %{
12297   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
12298   match(Set cr (CmpP (LoadP mem) zero));
12299 
12300   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12301   ins_encode %{
12302     __ cmpq(r12, $mem$$Address);
12303   %}
12304   ins_pipe(ialu_cr_reg_mem);
12305 %}
12306 
12307 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12308 %{
12309   match(Set cr (CmpN op1 op2));
12310 
12311   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12312   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12313   ins_pipe(ialu_cr_reg_reg);
12314 %}
12315 
12316 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12317 %{
12318   match(Set cr (CmpN src (LoadN mem)));
12319 
12320   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12321   ins_encode %{
12322     __ cmpl($src$$Register, $mem$$Address);
12323   %}
12324   ins_pipe(ialu_cr_reg_mem);
12325 %}
12326 
12327 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12328   match(Set cr (CmpN op1 op2));
12329 
12330   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12331   ins_encode %{
12332     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12333   %}
12334   ins_pipe(ialu_cr_reg_imm);
12335 %}
12336 
12337 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12338 %{
12339   match(Set cr (CmpN src (LoadN mem)));
12340 
12341   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12342   ins_encode %{
12343     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12344   %}
12345   ins_pipe(ialu_cr_reg_mem);
12346 %}
12347 
12348 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12349   match(Set cr (CmpN src zero));
12350 
12351   format %{ "testl   $src, $src\t# compressed ptr" %}
12352   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12353   ins_pipe(ialu_cr_reg_imm);
12354 %}
12355 
12356 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12357 %{
12358   predicate(Universe::narrow_oop_base() != NULL);
12359   match(Set cr (CmpN (LoadN mem) zero));
12360 
12361   ins_cost(500); // XXX
12362   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12363   ins_encode %{
12364     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12365   %}
12366   ins_pipe(ialu_cr_reg_mem);
12367 %}
12368 
12369 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12370 %{
12371   predicate(Universe::narrow_oop_base() == NULL);
12372   match(Set cr (CmpN (LoadN mem) zero));
12373 
12374   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12375   ins_encode %{
12376     __ cmpl(r12, $mem$$Address);
12377   %}
12378   ins_pipe(ialu_cr_reg_mem);
12379 %}
12380 
12381 // Yanked all unsigned pointer compare operations.
12382 // Pointer compares are done with CmpP which is already unsigned.
12383 
12384 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12385 %{
12386   match(Set cr (CmpL op1 op2));
12387 
12388   format %{ "cmpq    $op1, $op2" %}
12389   opcode(0x3B);  /* Opcode 3B /r */
12390   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12391   ins_pipe(ialu_cr_reg_reg);
12392 %}
12393 
12394 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12395 %{
12396   match(Set cr (CmpL op1 op2));
12397 
12398   format %{ "cmpq    $op1, $op2" %}
12399   opcode(0x81, 0x07); /* Opcode 81 /7 */
12400   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12401   ins_pipe(ialu_cr_reg_imm);
12402 %}
12403 
12404 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12405 %{
12406   match(Set cr (CmpL op1 (LoadL op2)));
12407 
12408   format %{ "cmpq    $op1, $op2" %}
12409   opcode(0x3B); /* Opcode 3B /r */
12410   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12411   ins_pipe(ialu_cr_reg_mem);
12412 %}
12413 
12414 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12415 %{
12416   match(Set cr (CmpL src zero));
12417 
12418   format %{ "testq   $src, $src" %}
12419   opcode(0x85);
12420   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12421   ins_pipe(ialu_cr_reg_imm);
12422 %}
12423 
12424 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12425 %{
12426   match(Set cr (CmpL (AndL src con) zero));
12427 
12428   format %{ "testq   $src, $con\t# long" %}
12429   opcode(0xF7, 0x00);
12430   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12431   ins_pipe(ialu_cr_reg_imm);
12432 %}
12433 
12434 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12435 %{
12436   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12437 
12438   format %{ "testq   $src, $mem" %}
12439   opcode(0x85);
12440   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12441   ins_pipe(ialu_cr_reg_mem);
12442 %}
12443 
12444 // Manifest a CmpL result in an integer register.  Very painful.
12445 // This is the test to avoid.
12446 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12447 %{
12448   match(Set dst (CmpL3 src1 src2));
12449   effect(KILL flags);
12450 
12451   ins_cost(275); // XXX
12452   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12453             "movl    $dst, -1\n\t"
12454             "jl,s    done\n\t"
12455             "setne   $dst\n\t"
12456             "movzbl  $dst, $dst\n\t"
12457     "done:" %}
12458   ins_encode(cmpl3_flag(src1, src2, dst));
12459   ins_pipe(pipe_slow);
12460 %}
12461 
12462 //----------Max and Min--------------------------------------------------------
12463 // Min Instructions
12464 
12465 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12466 %{
12467   effect(USE_DEF dst, USE src, USE cr);
12468 
12469   format %{ "cmovlgt $dst, $src\t# min" %}
12470   opcode(0x0F, 0x4F);
12471   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12472   ins_pipe(pipe_cmov_reg);
12473 %}
12474 
12475 
12476 instruct minI_rReg(rRegI dst, rRegI src)
12477 %{
12478   match(Set dst (MinI dst src));
12479 
12480   ins_cost(200);
12481   expand %{
12482     rFlagsReg cr;
12483     compI_rReg(cr, dst, src);
12484     cmovI_reg_g(dst, src, cr);
12485   %}
12486 %}
12487 
12488 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12489 %{
12490   effect(USE_DEF dst, USE src, USE cr);
12491 
12492   format %{ "cmovllt $dst, $src\t# max" %}
12493   opcode(0x0F, 0x4C);
12494   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12495   ins_pipe(pipe_cmov_reg);
12496 %}
12497 
12498 
12499 instruct maxI_rReg(rRegI dst, rRegI src)
12500 %{
12501   match(Set dst (MaxI dst src));
12502 
12503   ins_cost(200);
12504   expand %{
12505     rFlagsReg cr;
12506     compI_rReg(cr, dst, src);
12507     cmovI_reg_l(dst, src, cr);
12508   %}
12509 %}
12510 
12511 // ============================================================================
12512 // Branch Instructions
12513 
12514 // Jump Direct - Label defines a relative address from JMP+1
12515 instruct jmpDir(label labl)
12516 %{
12517   match(Goto);
12518   effect(USE labl);
12519 
12520   ins_cost(300);
12521   format %{ "jmp     $labl" %}
12522   size(5);
12523   opcode(0xE9);
12524   ins_encode(OpcP, Lbl(labl));
12525   ins_pipe(pipe_jmp);
12526   ins_pc_relative(1);
12527 %}
12528 
12529 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12530 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12531 %{
12532   match(If cop cr);
12533   effect(USE labl);
12534 
12535   ins_cost(300);
12536   format %{ "j$cop     $labl" %}
12537   size(6);
12538   opcode(0x0F, 0x80);
12539   ins_encode(Jcc(cop, labl));
12540   ins_pipe(pipe_jcc);
12541   ins_pc_relative(1);
12542 %}
12543 
12544 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12545 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12546 %{
12547   match(CountedLoopEnd cop cr);
12548   effect(USE labl);
12549 
12550   ins_cost(300);
12551   format %{ "j$cop     $labl\t# loop end" %}
12552   size(6);
12553   opcode(0x0F, 0x80);
12554   ins_encode(Jcc(cop, labl));
12555   ins_pipe(pipe_jcc);
12556   ins_pc_relative(1);
12557 %}
12558 
12559 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12560 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12561   match(CountedLoopEnd cop cmp);
12562   effect(USE labl);
12563 
12564   ins_cost(300);
12565   format %{ "j$cop,u   $labl\t# loop end" %}
12566   size(6);
12567   opcode(0x0F, 0x80);
12568   ins_encode(Jcc(cop, labl));
12569   ins_pipe(pipe_jcc);
12570   ins_pc_relative(1);
12571 %}
12572 
12573 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12574   match(CountedLoopEnd cop cmp);
12575   effect(USE labl);
12576 
12577   ins_cost(200);
12578   format %{ "j$cop,u   $labl\t# loop end" %}
12579   size(6);
12580   opcode(0x0F, 0x80);
12581   ins_encode(Jcc(cop, labl));
12582   ins_pipe(pipe_jcc);
12583   ins_pc_relative(1);
12584 %}
12585 
12586 // Jump Direct Conditional - using unsigned comparison
12587 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12588   match(If cop cmp);
12589   effect(USE labl);
12590 
12591   ins_cost(300);
12592   format %{ "j$cop,u  $labl" %}
12593   size(6);
12594   opcode(0x0F, 0x80);
12595   ins_encode(Jcc(cop, labl));
12596   ins_pipe(pipe_jcc);
12597   ins_pc_relative(1);
12598 %}
12599 
12600 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12601   match(If cop cmp);
12602   effect(USE labl);
12603 
12604   ins_cost(200);
12605   format %{ "j$cop,u  $labl" %}
12606   size(6);
12607   opcode(0x0F, 0x80);
12608   ins_encode(Jcc(cop, labl));
12609   ins_pipe(pipe_jcc);
12610   ins_pc_relative(1);
12611 %}
12612 
12613 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12614   match(If cop cmp);
12615   effect(USE labl);
12616 
12617   ins_cost(200);
12618   format %{ $$template
12619     if ($cop$$cmpcode == Assembler::notEqual) {
12620       $$emit$$"jp,u   $labl\n\t"
12621       $$emit$$"j$cop,u   $labl"
12622     } else {
12623       $$emit$$"jp,u   done\n\t"
12624       $$emit$$"j$cop,u   $labl\n\t"
12625       $$emit$$"done:"
12626     }
12627   %}
12628   size(12);
12629   opcode(0x0F, 0x80);
12630   ins_encode %{
12631     Label* l = $labl$$label;
12632     $$$emit8$primary;
12633     emit_cc(cbuf, $secondary, Assembler::parity);
12634     int parity_disp = -1;
12635     if ($cop$$cmpcode == Assembler::notEqual) {
12636        // the two jumps 6 bytes apart so the jump distances are too
12637        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12638     } else if ($cop$$cmpcode == Assembler::equal) {
12639        parity_disp = 6;
12640     } else {
12641        ShouldNotReachHere();
12642     }
12643     emit_d32(cbuf, parity_disp);
12644     $$$emit8$primary;
12645     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12646     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12647     emit_d32(cbuf, disp);
12648   %}
12649   ins_pipe(pipe_jcc);
12650   ins_pc_relative(1);
12651 %}
12652 
12653 // ============================================================================
12654 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12655 // superklass array for an instance of the superklass.  Set a hidden
12656 // internal cache on a hit (cache is checked with exposed code in
12657 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12658 // encoding ALSO sets flags.
12659 
12660 instruct partialSubtypeCheck(rdi_RegP result,
12661                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12662                              rFlagsReg cr)
12663 %{
12664   match(Set result (PartialSubtypeCheck sub super));
12665   effect(KILL rcx, KILL cr);
12666 
12667   ins_cost(1100);  // slightly larger than the next version
12668   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12669             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12670             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12671             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12672             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12673             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12674             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12675     "miss:\t" %}
12676 
12677   opcode(0x1); // Force a XOR of RDI
12678   ins_encode(enc_PartialSubtypeCheck());
12679   ins_pipe(pipe_slow);
12680 %}
12681 
12682 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12683                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12684                                      immP0 zero,
12685                                      rdi_RegP result)
12686 %{
12687   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12688   effect(KILL rcx, KILL result);
12689 
12690   ins_cost(1000);
12691   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12692             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12693             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12694             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12695             "jne,s   miss\t\t# Missed: flags nz\n\t"
12696             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12697     "miss:\t" %}
12698 
12699   opcode(0x0); // No need to XOR RDI
12700   ins_encode(enc_PartialSubtypeCheck());
12701   ins_pipe(pipe_slow);
12702 %}
12703 
12704 // ============================================================================
12705 // Branch Instructions -- short offset versions
12706 //
12707 // These instructions are used to replace jumps of a long offset (the default
12708 // match) with jumps of a shorter offset.  These instructions are all tagged
12709 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12710 // match rules in general matching.  Instead, the ADLC generates a conversion
12711 // method in the MachNode which can be used to do in-place replacement of the
12712 // long variant with the shorter variant.  The compiler will determine if a
12713 // branch can be taken by the is_short_branch_offset() predicate in the machine
12714 // specific code section of the file.
12715 
12716 // Jump Direct - Label defines a relative address from JMP+1
12717 instruct jmpDir_short(label labl) %{
12718   match(Goto);
12719   effect(USE labl);
12720 
12721   ins_cost(300);
12722   format %{ "jmp,s   $labl" %}
12723   size(2);
12724   opcode(0xEB);
12725   ins_encode(OpcP, LblShort(labl));
12726   ins_pipe(pipe_jmp);
12727   ins_pc_relative(1);
12728   ins_short_branch(1);
12729 %}
12730 
12731 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12732 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12733   match(If cop cr);
12734   effect(USE labl);
12735 
12736   ins_cost(300);
12737   format %{ "j$cop,s   $labl" %}
12738   size(2);
12739   opcode(0x70);
12740   ins_encode(JccShort(cop, labl));
12741   ins_pipe(pipe_jcc);
12742   ins_pc_relative(1);
12743   ins_short_branch(1);
12744 %}
12745 
12746 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12747 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12748   match(CountedLoopEnd cop cr);
12749   effect(USE labl);
12750 
12751   ins_cost(300);
12752   format %{ "j$cop,s   $labl\t# loop end" %}
12753   size(2);
12754   opcode(0x70);
12755   ins_encode(JccShort(cop, labl));
12756   ins_pipe(pipe_jcc);
12757   ins_pc_relative(1);
12758   ins_short_branch(1);
12759 %}
12760 
12761 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12762 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12763   match(CountedLoopEnd cop cmp);
12764   effect(USE labl);
12765 
12766   ins_cost(300);
12767   format %{ "j$cop,us  $labl\t# loop end" %}
12768   size(2);
12769   opcode(0x70);
12770   ins_encode(JccShort(cop, labl));
12771   ins_pipe(pipe_jcc);
12772   ins_pc_relative(1);
12773   ins_short_branch(1);
12774 %}
12775 
12776 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12777   match(CountedLoopEnd cop cmp);
12778   effect(USE labl);
12779 
12780   ins_cost(300);
12781   format %{ "j$cop,us  $labl\t# loop end" %}
12782   size(2);
12783   opcode(0x70);
12784   ins_encode(JccShort(cop, labl));
12785   ins_pipe(pipe_jcc);
12786   ins_pc_relative(1);
12787   ins_short_branch(1);
12788 %}
12789 
12790 // Jump Direct Conditional - using unsigned comparison
12791 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12792   match(If cop cmp);
12793   effect(USE labl);
12794 
12795   ins_cost(300);
12796   format %{ "j$cop,us  $labl" %}
12797   size(2);
12798   opcode(0x70);
12799   ins_encode(JccShort(cop, labl));
12800   ins_pipe(pipe_jcc);
12801   ins_pc_relative(1);
12802   ins_short_branch(1);
12803 %}
12804 
12805 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12806   match(If cop cmp);
12807   effect(USE labl);
12808 
12809   ins_cost(300);
12810   format %{ "j$cop,us  $labl" %}
12811   size(2);
12812   opcode(0x70);
12813   ins_encode(JccShort(cop, labl));
12814   ins_pipe(pipe_jcc);
12815   ins_pc_relative(1);
12816   ins_short_branch(1);
12817 %}
12818 
12819 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12820   match(If cop cmp);
12821   effect(USE labl);
12822 
12823   ins_cost(300);
12824   format %{ $$template
12825     if ($cop$$cmpcode == Assembler::notEqual) {
12826       $$emit$$"jp,u,s   $labl\n\t"
12827       $$emit$$"j$cop,u,s   $labl"
12828     } else {
12829       $$emit$$"jp,u,s   done\n\t"
12830       $$emit$$"j$cop,u,s  $labl\n\t"
12831       $$emit$$"done:"
12832     }
12833   %}
12834   size(4);
12835   opcode(0x70);
12836   ins_encode %{
12837     Label* l = $labl$$label;
12838     emit_cc(cbuf, $primary, Assembler::parity);
12839     int parity_disp = -1;
12840     if ($cop$$cmpcode == Assembler::notEqual) {
12841       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12842     } else if ($cop$$cmpcode == Assembler::equal) {
12843       parity_disp = 2;
12844     } else {
12845       ShouldNotReachHere();
12846     }
12847     emit_d8(cbuf, parity_disp);
12848     emit_cc(cbuf, $primary, $cop$$cmpcode);
12849     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12850     emit_d8(cbuf, disp);
12851     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12852     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12853   %}
12854   ins_pipe(pipe_jcc);
12855   ins_pc_relative(1);
12856   ins_short_branch(1);
12857 %}
12858 
12859 // ============================================================================
12860 // inlined locking and unlocking
12861 
12862 instruct cmpFastLock(rFlagsReg cr,
12863                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12864 %{
12865   match(Set cr (FastLock object box));
12866   effect(TEMP tmp, TEMP scr);
12867 
12868   ins_cost(300);
12869   format %{ "fastlock $object,$box,$tmp,$scr" %}
12870   ins_encode(Fast_Lock(object, box, tmp, scr));
12871   ins_pipe(pipe_slow);
12872   ins_pc_relative(1);
12873 %}
12874 
12875 instruct cmpFastUnlock(rFlagsReg cr,
12876                        rRegP object, rax_RegP box, rRegP tmp)
12877 %{
12878   match(Set cr (FastUnlock object box));
12879   effect(TEMP tmp);
12880 
12881   ins_cost(300);
12882   format %{ "fastunlock $object, $box, $tmp" %}
12883   ins_encode(Fast_Unlock(object, box, tmp));
12884   ins_pipe(pipe_slow);
12885   ins_pc_relative(1);
12886 %}
12887 
12888 
12889 // ============================================================================
12890 // Safepoint Instructions
12891 instruct safePoint_poll(rFlagsReg cr)
12892 %{
12893   match(SafePoint);
12894   effect(KILL cr);
12895 
12896   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12897             "# Safepoint: poll for GC" %}
12898   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12899   ins_cost(125);
12900   ins_encode(enc_safepoint_poll);
12901   ins_pipe(ialu_reg_mem);
12902 %}
12903 
12904 // ============================================================================
12905 // Procedure Call/Return Instructions
12906 // Call Java Static Instruction
12907 // Note: If this code changes, the corresponding ret_addr_offset() and
12908 //       compute_padding() functions will have to be adjusted.
12909 instruct CallStaticJavaDirect(method meth)
12910 %{
12911   match(CallStaticJava);
12912   effect(USE meth);
12913 
12914   ins_cost(300);
12915   format %{ "call,static " %}
12916   opcode(0xE8); /* E8 cd */
12917   ins_encode(Java_Static_Call(meth), call_epilog);
12918   ins_pipe(pipe_slow);
12919   ins_pc_relative(1);
12920   ins_alignment(4);
12921 %}
12922 
12923 // Call Java Dynamic Instruction
12924 // Note: If this code changes, the corresponding ret_addr_offset() and
12925 //       compute_padding() functions will have to be adjusted.
12926 instruct CallDynamicJavaDirect(method meth)
12927 %{
12928   match(CallDynamicJava);
12929   effect(USE meth);
12930 
12931   ins_cost(300);
12932   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12933             "call,dynamic " %}
12934   opcode(0xE8); /* E8 cd */
12935   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12936   ins_pipe(pipe_slow);
12937   ins_pc_relative(1);
12938   ins_alignment(4);
12939 %}
12940 
12941 // Call Runtime Instruction
12942 instruct CallRuntimeDirect(method meth)
12943 %{
12944   match(CallRuntime);
12945   effect(USE meth);
12946 
12947   ins_cost(300);
12948   format %{ "call,runtime " %}
12949   opcode(0xE8); /* E8 cd */
12950   ins_encode(Java_To_Runtime(meth));
12951   ins_pipe(pipe_slow);
12952   ins_pc_relative(1);
12953 %}
12954 
12955 // Call runtime without safepoint
12956 instruct CallLeafDirect(method meth)
12957 %{
12958   match(CallLeaf);
12959   effect(USE meth);
12960 
12961   ins_cost(300);
12962   format %{ "call_leaf,runtime " %}
12963   opcode(0xE8); /* E8 cd */
12964   ins_encode(Java_To_Runtime(meth));
12965   ins_pipe(pipe_slow);
12966   ins_pc_relative(1);
12967 %}
12968 
12969 // Call runtime without safepoint
12970 instruct CallLeafNoFPDirect(method meth)
12971 %{
12972   match(CallLeafNoFP);
12973   effect(USE meth);
12974 
12975   ins_cost(300);
12976   format %{ "call_leaf_nofp,runtime " %}
12977   opcode(0xE8); /* E8 cd */
12978   ins_encode(Java_To_Runtime(meth));
12979   ins_pipe(pipe_slow);
12980   ins_pc_relative(1);
12981 %}
12982 
12983 // Return Instruction
12984 // Remove the return address & jump to it.
12985 // Notice: We always emit a nop after a ret to make sure there is room
12986 // for safepoint patching
12987 instruct Ret()
12988 %{
12989   match(Return);
12990 
12991   format %{ "ret" %}
12992   opcode(0xC3);
12993   ins_encode(OpcP);
12994   ins_pipe(pipe_jmp);
12995 %}
12996 
12997 // Tail Call; Jump from runtime stub to Java code.
12998 // Also known as an 'interprocedural jump'.
12999 // Target of jump will eventually return to caller.
13000 // TailJump below removes the return address.
13001 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
13002 %{
13003   match(TailCall jump_target method_oop);
13004 
13005   ins_cost(300);
13006   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
13007   opcode(0xFF, 0x4); /* Opcode FF /4 */
13008   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
13009   ins_pipe(pipe_jmp);
13010 %}
13011 
13012 // Tail Jump; remove the return address; jump to target.
13013 // TailCall above leaves the return address around.
13014 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13015 %{
13016   match(TailJump jump_target ex_oop);
13017 
13018   ins_cost(300);
13019   format %{ "popq    rdx\t# pop return address\n\t"
13020             "jmp     $jump_target" %}
13021   opcode(0xFF, 0x4); /* Opcode FF /4 */
13022   ins_encode(Opcode(0x5a), // popq rdx
13023              REX_reg(jump_target), OpcP, reg_opc(jump_target));
13024   ins_pipe(pipe_jmp);
13025 %}
13026 
13027 // Create exception oop: created by stack-crawling runtime code.
13028 // Created exception is now available to this handler, and is setup
13029 // just prior to jumping to this handler.  No code emitted.
13030 instruct CreateException(rax_RegP ex_oop)
13031 %{
13032   match(Set ex_oop (CreateEx));
13033 
13034   size(0);
13035   // use the following format syntax
13036   format %{ "# exception oop is in rax; no code emitted" %}
13037   ins_encode();
13038   ins_pipe(empty);
13039 %}
13040 
13041 // Rethrow exception:
13042 // The exception oop will come in the first argument position.
13043 // Then JUMP (not call) to the rethrow stub code.
13044 instruct RethrowException()
13045 %{
13046   match(Rethrow);
13047 
13048   // use the following format syntax
13049   format %{ "jmp     rethrow_stub" %}
13050   ins_encode(enc_rethrow);
13051   ins_pipe(pipe_jmp);
13052 %}
13053 
13054 
13055 //----------PEEPHOLE RULES-----------------------------------------------------
13056 // These must follow all instruction definitions as they use the names
13057 // defined in the instructions definitions.
13058 //
13059 // peepmatch ( root_instr_name [preceding_instruction]* );
13060 //
13061 // peepconstraint %{
13062 // (instruction_number.operand_name relational_op instruction_number.operand_name
13063 //  [, ...] );
13064 // // instruction numbers are zero-based using left to right order in peepmatch
13065 //
13066 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13067 // // provide an instruction_number.operand_name for each operand that appears
13068 // // in the replacement instruction's match rule
13069 //
13070 // ---------VM FLAGS---------------------------------------------------------
13071 //
13072 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13073 //
13074 // Each peephole rule is given an identifying number starting with zero and
13075 // increasing by one in the order seen by the parser.  An individual peephole
13076 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13077 // on the command-line.
13078 //
13079 // ---------CURRENT LIMITATIONS----------------------------------------------
13080 //
13081 // Only match adjacent instructions in same basic block
13082 // Only equality constraints
13083 // Only constraints between operands, not (0.dest_reg == RAX_enc)
13084 // Only one replacement instruction
13085 //
13086 // ---------EXAMPLE----------------------------------------------------------
13087 //
13088 // // pertinent parts of existing instructions in architecture description
13089 // instruct movI(rRegI dst, rRegI src)
13090 // %{
13091 //   match(Set dst (CopyI src));
13092 // %}
13093 //
13094 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
13095 // %{
13096 //   match(Set dst (AddI dst src));
13097 //   effect(KILL cr);
13098 // %}
13099 //
13100 // // Change (inc mov) to lea
13101 // peephole %{
13102 //   // increment preceeded by register-register move
13103 //   peepmatch ( incI_rReg movI );
13104 //   // require that the destination register of the increment
13105 //   // match the destination register of the move
13106 //   peepconstraint ( 0.dst == 1.dst );
13107 //   // construct a replacement instruction that sets
13108 //   // the destination to ( move's source register + one )
13109 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13110 // %}
13111 //
13112 
13113 // Implementation no longer uses movX instructions since
13114 // machine-independent system no longer uses CopyX nodes.
13115 //
13116 // peephole
13117 // %{
13118 //   peepmatch (incI_rReg movI);
13119 //   peepconstraint (0.dst == 1.dst);
13120 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13121 // %}
13122 
13123 // peephole
13124 // %{
13125 //   peepmatch (decI_rReg movI);
13126 //   peepconstraint (0.dst == 1.dst);
13127 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13128 // %}
13129 
13130 // peephole
13131 // %{
13132 //   peepmatch (addI_rReg_imm movI);
13133 //   peepconstraint (0.dst == 1.dst);
13134 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13135 // %}
13136 
13137 // peephole
13138 // %{
13139 //   peepmatch (incL_rReg movL);
13140 //   peepconstraint (0.dst == 1.dst);
13141 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13142 // %}
13143 
13144 // peephole
13145 // %{
13146 //   peepmatch (decL_rReg movL);
13147 //   peepconstraint (0.dst == 1.dst);
13148 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13149 // %}
13150 
13151 // peephole
13152 // %{
13153 //   peepmatch (addL_rReg_imm movL);
13154 //   peepconstraint (0.dst == 1.dst);
13155 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13156 // %}
13157 
13158 // peephole
13159 // %{
13160 //   peepmatch (addP_rReg_imm movP);
13161 //   peepconstraint (0.dst == 1.dst);
13162 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
13163 // %}
13164 
13165 // // Change load of spilled value to only a spill
13166 // instruct storeI(memory mem, rRegI src)
13167 // %{
13168 //   match(Set mem (StoreI mem src));
13169 // %}
13170 //
13171 // instruct loadI(rRegI dst, memory mem)
13172 // %{
13173 //   match(Set dst (LoadI mem));
13174 // %}
13175 //
13176 
13177 peephole
13178 %{
13179   peepmatch (loadI storeI);
13180   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13181   peepreplace (storeI(1.mem 1.mem 1.src));
13182 %}
13183 
13184 peephole
13185 %{
13186   peepmatch (loadL storeL);
13187   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13188   peepreplace (storeL(1.mem 1.mem 1.src));
13189 %}
13190 
13191 //----------SMARTSPILL RULES---------------------------------------------------
13192 // These must follow all instruction definitions as they use the names
13193 // defined in the instructions definitions.