1 //
   2 // Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 // !!!!! Special hack to get all types of calls to specify the byte offset
 555 //       from the start of the call to the point where the return address
 556 //       will point.
 557 int MachCallStaticJavaNode::ret_addr_offset()
 558 {
 559   return 5; // 5 bytes from start of call to where return address points
 560 }
 561 
 562 int MachCallDynamicJavaNode::ret_addr_offset()
 563 {
 564   return 15; // 15 bytes from start of call to where return address points
 565 }
 566 
 567 // In os_cpu .ad file
 568 // int MachCallRuntimeNode::ret_addr_offset()
 569 
 570 // Indicate if the safepoint node needs the polling page as an input.
 571 // Since amd64 does not have absolute addressing but RIP-relative
 572 // addressing and the polling page is within 2G, it doesn't.
 573 bool SafePointNode::needs_polling_address_input()
 574 {
 575   return false;
 576 }
 577 
 578 //
 579 // Compute padding required for nodes which need alignment
 580 //
 581 
 582 // The address of the call instruction needs to be 4-byte aligned to
 583 // ensure that it does not span a cache line so that it can be patched.
 584 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 585 {
 586   current_offset += 1; // skip call opcode byte
 587   return round_to(current_offset, alignment_required()) - current_offset;
 588 }
 589 
 590 // The address of the call instruction needs to be 4-byte aligned to
 591 // ensure that it does not span a cache line so that it can be patched.
 592 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 593 {
 594   current_offset += 11; // skip movq instruction + call opcode byte
 595   return round_to(current_offset, alignment_required()) - current_offset;
 596 }
 597 
 598 #ifndef PRODUCT
 599 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 600 {
 601   st->print("INT3");
 602 }
 603 #endif
 604 
 605 // EMIT_RM()
 606 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 607 {
 608   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 609   *(cbuf.code_end()) = c;
 610   cbuf.set_code_end(cbuf.code_end() + 1);
 611 }
 612 
 613 // EMIT_CC()
 614 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 615 {
 616   unsigned char c = (unsigned char) (f1 | f2);
 617   *(cbuf.code_end()) = c;
 618   cbuf.set_code_end(cbuf.code_end() + 1);
 619 }
 620 
 621 // EMIT_OPCODE()
 622 void emit_opcode(CodeBuffer &cbuf, int code)
 623 {
 624   *(cbuf.code_end()) = (unsigned char) code;
 625   cbuf.set_code_end(cbuf.code_end() + 1);
 626 }
 627 
 628 // EMIT_OPCODE() w/ relocation information
 629 void emit_opcode(CodeBuffer &cbuf,
 630                  int code, relocInfo::relocType reloc, int offset, int format)
 631 {
 632   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 633   emit_opcode(cbuf, code);
 634 }
 635 
 636 // EMIT_D8()
 637 void emit_d8(CodeBuffer &cbuf, int d8)
 638 {
 639   *(cbuf.code_end()) = (unsigned char) d8;
 640   cbuf.set_code_end(cbuf.code_end() + 1);
 641 }
 642 
 643 // EMIT_D16()
 644 void emit_d16(CodeBuffer &cbuf, int d16)
 645 {
 646   *((short *)(cbuf.code_end())) = d16;
 647   cbuf.set_code_end(cbuf.code_end() + 2);
 648 }
 649 
 650 // EMIT_D32()
 651 void emit_d32(CodeBuffer &cbuf, int d32)
 652 {
 653   *((int *)(cbuf.code_end())) = d32;
 654   cbuf.set_code_end(cbuf.code_end() + 4);
 655 }
 656 
 657 // EMIT_D64()
 658 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 659 {
 660   *((int64_t*) (cbuf.code_end())) = d64;
 661   cbuf.set_code_end(cbuf.code_end() + 8);
 662 }
 663 
 664 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 665 void emit_d32_reloc(CodeBuffer& cbuf,
 666                     int d32,
 667                     relocInfo::relocType reloc,
 668                     int format)
 669 {
 670   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 671   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 672 
 673   *((int*) (cbuf.code_end())) = d32;
 674   cbuf.set_code_end(cbuf.code_end() + 4);
 675 }
 676 
 677 // emit 32 bit value and construct relocation entry from RelocationHolder
 678 void emit_d32_reloc(CodeBuffer& cbuf,
 679                     int d32,
 680                     RelocationHolder const& rspec,
 681                     int format)
 682 {
 683 #ifdef ASSERT
 684   if (rspec.reloc()->type() == relocInfo::oop_type &&
 685       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 686     assert(oop((intptr_t)d32)->is_oop() && oop((intptr_t)d32)->is_perm(), "cannot embed non-perm oops in code");
 687   }
 688 #endif
 689   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 690 
 691   *((int* )(cbuf.code_end())) = d32;
 692   cbuf.set_code_end(cbuf.code_end() + 4);
 693 }
 694 
 695 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 696   address next_ip = cbuf.code_end() + 4;
 697   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 698                  external_word_Relocation::spec(addr),
 699                  RELOC_DISP32);
 700 }
 701 
 702 
 703 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 704 void emit_d64_reloc(CodeBuffer& cbuf,
 705                     int64_t d64,
 706                     relocInfo::relocType reloc,
 707                     int format)
 708 {
 709   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 710 
 711   *((int64_t*) (cbuf.code_end())) = d64;
 712   cbuf.set_code_end(cbuf.code_end() + 8);
 713 }
 714 
 715 // emit 64 bit value and construct relocation entry from RelocationHolder
 716 void emit_d64_reloc(CodeBuffer& cbuf,
 717                     int64_t d64,
 718                     RelocationHolder const& rspec,
 719                     int format)
 720 {
 721 #ifdef ASSERT
 722   if (rspec.reloc()->type() == relocInfo::oop_type &&
 723       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 724     assert(oop(d64)->is_oop() && oop(d64)->is_perm(),
 725            "cannot embed non-perm oops in code");
 726   }
 727 #endif
 728   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 729 
 730   *((int64_t*) (cbuf.code_end())) = d64;
 731   cbuf.set_code_end(cbuf.code_end() + 8);
 732 }
 733 
 734 // Access stack slot for load or store
 735 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 736 {
 737   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 738   if (-0x80 <= disp && disp < 0x80) {
 739     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 740     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 741     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 742   } else {
 743     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 744     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 745     emit_d32(cbuf, disp);     // Displacement // R/M byte
 746   }
 747 }
 748 
 749    // rRegI ereg, memory mem) %{    // emit_reg_mem
 750 void encode_RegMem(CodeBuffer &cbuf,
 751                    int reg,
 752                    int base, int index, int scale, int disp, bool disp_is_oop)
 753 {
 754   assert(!disp_is_oop, "cannot have disp");
 755   int regenc = reg & 7;
 756   int baseenc = base & 7;
 757   int indexenc = index & 7;
 758 
 759   // There is no index & no scale, use form without SIB byte
 760   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 761     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 762     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 763       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 764     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 765       // If 8-bit displacement, mode 0x1
 766       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 767       emit_d8(cbuf, disp);
 768     } else {
 769       // If 32-bit displacement
 770       if (base == -1) { // Special flag for absolute address
 771         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 772         if (disp_is_oop) {
 773           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 774         } else {
 775           emit_d32(cbuf, disp);
 776         }
 777       } else {
 778         // Normal base + offset
 779         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 780         if (disp_is_oop) {
 781           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 782         } else {
 783           emit_d32(cbuf, disp);
 784         }
 785       }
 786     }
 787   } else {
 788     // Else, encode with the SIB byte
 789     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 790     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 791       // If no displacement
 792       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 793       emit_rm(cbuf, scale, indexenc, baseenc);
 794     } else {
 795       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 796         // If 8-bit displacement, mode 0x1
 797         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 798         emit_rm(cbuf, scale, indexenc, baseenc);
 799         emit_d8(cbuf, disp);
 800       } else {
 801         // If 32-bit displacement
 802         if (base == 0x04 ) {
 803           emit_rm(cbuf, 0x2, regenc, 0x4);
 804           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 805         } else {
 806           emit_rm(cbuf, 0x2, regenc, 0x4);
 807           emit_rm(cbuf, scale, indexenc, baseenc); // *
 808         }
 809         if (disp_is_oop) {
 810           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 811         } else {
 812           emit_d32(cbuf, disp);
 813         }
 814       }
 815     }
 816   }
 817 }
 818 
 819 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 820 {
 821   if (dstenc != srcenc) {
 822     if (dstenc < 8) {
 823       if (srcenc >= 8) {
 824         emit_opcode(cbuf, Assembler::REX_B);
 825         srcenc -= 8;
 826       }
 827     } else {
 828       if (srcenc < 8) {
 829         emit_opcode(cbuf, Assembler::REX_R);
 830       } else {
 831         emit_opcode(cbuf, Assembler::REX_RB);
 832         srcenc -= 8;
 833       }
 834       dstenc -= 8;
 835     }
 836 
 837     emit_opcode(cbuf, 0x8B);
 838     emit_rm(cbuf, 0x3, dstenc, srcenc);
 839   }
 840 }
 841 
 842 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 843   if( dst_encoding == src_encoding ) {
 844     // reg-reg copy, use an empty encoding
 845   } else {
 846     MacroAssembler _masm(&cbuf);
 847 
 848     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 849   }
 850 }
 851 
 852 
 853 //=============================================================================
 854 #ifndef PRODUCT
 855 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 856 {
 857   Compile* C = ra_->C;
 858 
 859   int framesize = C->frame_slots() << LogBytesPerInt;
 860   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 861   // Remove wordSize for return adr already pushed
 862   // and another for the RBP we are going to save
 863   framesize -= 2*wordSize;
 864   bool need_nop = true;
 865 
 866   // Calls to C2R adapters often do not accept exceptional returns.
 867   // We require that their callers must bang for them.  But be
 868   // careful, because some VM calls (such as call site linkage) can
 869   // use several kilobytes of stack.  But the stack safety zone should
 870   // account for that.  See bugs 4446381, 4468289, 4497237.
 871   if (C->need_stack_bang(framesize)) {
 872     st->print_cr("# stack bang"); st->print("\t");
 873     need_nop = false;
 874   }
 875   st->print_cr("pushq   rbp"); st->print("\t");
 876 
 877   if (VerifyStackAtCalls) {
 878     // Majik cookie to verify stack depth
 879     st->print_cr("pushq   0xffffffffbadb100d"
 880                   "\t# Majik cookie for stack depth check");
 881     st->print("\t");
 882     framesize -= wordSize; // Remove 2 for cookie
 883     need_nop = false;
 884   }
 885 
 886   if (framesize) {
 887     st->print("subq    rsp, #%d\t# Create frame", framesize);
 888     if (framesize < 0x80 && need_nop) {
 889       st->print("\n\tnop\t# nop for patch_verified_entry");
 890     }
 891   }
 892 }
 893 #endif
 894 
 895 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 896 {
 897   Compile* C = ra_->C;
 898 
 899   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 900   // NativeJump::patch_verified_entry will be able to patch out the entry
 901   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 902   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 903   // 6 bytes. So if we don't do the fldcw or the push then we must
 904   // use the 6 byte frame allocation even if we have no frame. :-(
 905   // If method sets FPU control word do it now
 906 
 907   int framesize = C->frame_slots() << LogBytesPerInt;
 908   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 909   // Remove wordSize for return adr already pushed
 910   // and another for the RBP we are going to save
 911   framesize -= 2*wordSize;
 912   bool need_nop = true;
 913 
 914   // Calls to C2R adapters often do not accept exceptional returns.
 915   // We require that their callers must bang for them.  But be
 916   // careful, because some VM calls (such as call site linkage) can
 917   // use several kilobytes of stack.  But the stack safety zone should
 918   // account for that.  See bugs 4446381, 4468289, 4497237.
 919   if (C->need_stack_bang(framesize)) {
 920     MacroAssembler masm(&cbuf);
 921     masm.generate_stack_overflow_check(framesize);
 922     need_nop = false;
 923   }
 924 
 925   // We always push rbp so that on return to interpreter rbp will be
 926   // restored correctly and we can correct the stack.
 927   emit_opcode(cbuf, 0x50 | RBP_enc);
 928 
 929   if (VerifyStackAtCalls) {
 930     // Majik cookie to verify stack depth
 931     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 932     emit_d32(cbuf, 0xbadb100d);
 933     framesize -= wordSize; // Remove 2 for cookie
 934     need_nop = false;
 935   }
 936 
 937   if (framesize) {
 938     emit_opcode(cbuf, Assembler::REX_W);
 939     if (framesize < 0x80) {
 940       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 941       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 942       emit_d8(cbuf, framesize);
 943       if (need_nop) {
 944         emit_opcode(cbuf, 0x90); // nop
 945       }
 946     } else {
 947       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 948       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 949       emit_d32(cbuf, framesize);
 950     }
 951   }
 952 
 953   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 954 
 955 #ifdef ASSERT
 956   if (VerifyStackAtCalls) {
 957     Label L;
 958     MacroAssembler masm(&cbuf);
 959     masm.push(rax);
 960     masm.mov(rax, rsp);
 961     masm.andptr(rax, StackAlignmentInBytes-1);
 962     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 963     masm.pop(rax);
 964     masm.jcc(Assembler::equal, L);
 965     masm.stop("Stack is not properly aligned!");
 966     masm.bind(L);
 967   }
 968 #endif
 969 }
 970 
 971 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 972 {
 973   return MachNode::size(ra_); // too many variables; just compute it
 974                               // the hard way
 975 }
 976 
 977 int MachPrologNode::reloc() const
 978 {
 979   return 0; // a large enough number
 980 }
 981 
 982 //=============================================================================
 983 #ifndef PRODUCT
 984 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 985 {
 986   Compile* C = ra_->C;
 987   int framesize = C->frame_slots() << LogBytesPerInt;
 988   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 989   // Remove word for return adr already pushed
 990   // and RBP
 991   framesize -= 2*wordSize;
 992 
 993   if (framesize) {
 994     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
 995     st->print("\t");
 996   }
 997 
 998   st->print_cr("popq\trbp");
 999   if (do_polling() && C->is_method_compilation()) {
1000     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1001                   "# Safepoint: poll for GC");
1002     st->print("\t");
1003   }
1004 }
1005 #endif
1006 
1007 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1008 {
1009   Compile* C = ra_->C;
1010   int framesize = C->frame_slots() << LogBytesPerInt;
1011   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1012   // Remove word for return adr already pushed
1013   // and RBP
1014   framesize -= 2*wordSize;
1015 
1016   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1017 
1018   if (framesize) {
1019     emit_opcode(cbuf, Assembler::REX_W);
1020     if (framesize < 0x80) {
1021       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1022       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1023       emit_d8(cbuf, framesize);
1024     } else {
1025       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1026       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1027       emit_d32(cbuf, framesize);
1028     }
1029   }
1030 
1031   // popq rbp
1032   emit_opcode(cbuf, 0x58 | RBP_enc);
1033 
1034   if (do_polling() && C->is_method_compilation()) {
1035     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1036     // XXX reg_mem doesn't support RIP-relative addressing yet
1037     cbuf.set_inst_mark();
1038     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1039     emit_opcode(cbuf, 0x85); // testl
1040     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1041     // cbuf.inst_mark() is beginning of instruction
1042     emit_d32_reloc(cbuf, os::get_polling_page());
1043 //                    relocInfo::poll_return_type,
1044   }
1045 }
1046 
1047 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1048 {
1049   Compile* C = ra_->C;
1050   int framesize = C->frame_slots() << LogBytesPerInt;
1051   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1052   // Remove word for return adr already pushed
1053   // and RBP
1054   framesize -= 2*wordSize;
1055 
1056   uint size = 0;
1057 
1058   if (do_polling() && C->is_method_compilation()) {
1059     size += 6;
1060   }
1061 
1062   // count popq rbp
1063   size++;
1064 
1065   if (framesize) {
1066     if (framesize < 0x80) {
1067       size += 4;
1068     } else if (framesize) {
1069       size += 7;
1070     }
1071   }
1072 
1073   return size;
1074 }
1075 
1076 int MachEpilogNode::reloc() const
1077 {
1078   return 2; // a large enough number
1079 }
1080 
1081 const Pipeline* MachEpilogNode::pipeline() const
1082 {
1083   return MachNode::pipeline_class();
1084 }
1085 
1086 int MachEpilogNode::safepoint_offset() const
1087 {
1088   return 0;
1089 }
1090 
1091 //=============================================================================
1092 
1093 enum RC {
1094   rc_bad,
1095   rc_int,
1096   rc_float,
1097   rc_stack
1098 };
1099 
1100 static enum RC rc_class(OptoReg::Name reg)
1101 {
1102   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1103 
1104   if (OptoReg::is_stack(reg)) return rc_stack;
1105 
1106   VMReg r = OptoReg::as_VMReg(reg);
1107 
1108   if (r->is_Register()) return rc_int;
1109 
1110   assert(r->is_XMMRegister(), "must be");
1111   return rc_float;
1112 }
1113 
1114 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1115                                        PhaseRegAlloc* ra_,
1116                                        bool do_size,
1117                                        outputStream* st) const
1118 {
1119 
1120   // Get registers to move
1121   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1122   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1123   OptoReg::Name dst_second = ra_->get_reg_second(this);
1124   OptoReg::Name dst_first = ra_->get_reg_first(this);
1125 
1126   enum RC src_second_rc = rc_class(src_second);
1127   enum RC src_first_rc = rc_class(src_first);
1128   enum RC dst_second_rc = rc_class(dst_second);
1129   enum RC dst_first_rc = rc_class(dst_first);
1130 
1131   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1132          "must move at least 1 register" );
1133 
1134   if (src_first == dst_first && src_second == dst_second) {
1135     // Self copy, no move
1136     return 0;
1137   } else if (src_first_rc == rc_stack) {
1138     // mem ->
1139     if (dst_first_rc == rc_stack) {
1140       // mem -> mem
1141       assert(src_second != dst_first, "overlap");
1142       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1143           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1144         // 64-bit
1145         int src_offset = ra_->reg2offset(src_first);
1146         int dst_offset = ra_->reg2offset(dst_first);
1147         if (cbuf) {
1148           emit_opcode(*cbuf, 0xFF);
1149           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1150 
1151           emit_opcode(*cbuf, 0x8F);
1152           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1153 
1154 #ifndef PRODUCT
1155         } else if (!do_size) {
1156           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1157                      "popq    [rsp + #%d]",
1158                      src_offset,
1159                      dst_offset);
1160 #endif
1161         }
1162         return
1163           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1164           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1165       } else {
1166         // 32-bit
1167         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1168         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1169         // No pushl/popl, so:
1170         int src_offset = ra_->reg2offset(src_first);
1171         int dst_offset = ra_->reg2offset(dst_first);
1172         if (cbuf) {
1173           emit_opcode(*cbuf, Assembler::REX_W);
1174           emit_opcode(*cbuf, 0x89);
1175           emit_opcode(*cbuf, 0x44);
1176           emit_opcode(*cbuf, 0x24);
1177           emit_opcode(*cbuf, 0xF8);
1178 
1179           emit_opcode(*cbuf, 0x8B);
1180           encode_RegMem(*cbuf,
1181                         RAX_enc,
1182                         RSP_enc, 0x4, 0, src_offset,
1183                         false);
1184 
1185           emit_opcode(*cbuf, 0x89);
1186           encode_RegMem(*cbuf,
1187                         RAX_enc,
1188                         RSP_enc, 0x4, 0, dst_offset,
1189                         false);
1190 
1191           emit_opcode(*cbuf, Assembler::REX_W);
1192           emit_opcode(*cbuf, 0x8B);
1193           emit_opcode(*cbuf, 0x44);
1194           emit_opcode(*cbuf, 0x24);
1195           emit_opcode(*cbuf, 0xF8);
1196 
1197 #ifndef PRODUCT
1198         } else if (!do_size) {
1199           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1200                      "movl    rax, [rsp + #%d]\n\t"
1201                      "movl    [rsp + #%d], rax\n\t"
1202                      "movq    rax, [rsp - #8]",
1203                      src_offset,
1204                      dst_offset);
1205 #endif
1206         }
1207         return
1208           5 + // movq
1209           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1210           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1211           5; // movq
1212       }
1213     } else if (dst_first_rc == rc_int) {
1214       // mem -> gpr
1215       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1216           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1217         // 64-bit
1218         int offset = ra_->reg2offset(src_first);
1219         if (cbuf) {
1220           if (Matcher::_regEncode[dst_first] < 8) {
1221             emit_opcode(*cbuf, Assembler::REX_W);
1222           } else {
1223             emit_opcode(*cbuf, Assembler::REX_WR);
1224           }
1225           emit_opcode(*cbuf, 0x8B);
1226           encode_RegMem(*cbuf,
1227                         Matcher::_regEncode[dst_first],
1228                         RSP_enc, 0x4, 0, offset,
1229                         false);
1230 #ifndef PRODUCT
1231         } else if (!do_size) {
1232           st->print("movq    %s, [rsp + #%d]\t# spill",
1233                      Matcher::regName[dst_first],
1234                      offset);
1235 #endif
1236         }
1237         return
1238           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1239       } else {
1240         // 32-bit
1241         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1242         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1243         int offset = ra_->reg2offset(src_first);
1244         if (cbuf) {
1245           if (Matcher::_regEncode[dst_first] >= 8) {
1246             emit_opcode(*cbuf, Assembler::REX_R);
1247           }
1248           emit_opcode(*cbuf, 0x8B);
1249           encode_RegMem(*cbuf,
1250                         Matcher::_regEncode[dst_first],
1251                         RSP_enc, 0x4, 0, offset,
1252                         false);
1253 #ifndef PRODUCT
1254         } else if (!do_size) {
1255           st->print("movl    %s, [rsp + #%d]\t# spill",
1256                      Matcher::regName[dst_first],
1257                      offset);
1258 #endif
1259         }
1260         return
1261           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1262           ((Matcher::_regEncode[dst_first] < 8)
1263            ? 3
1264            : 4); // REX
1265       }
1266     } else if (dst_first_rc == rc_float) {
1267       // mem-> xmm
1268       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1269           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1270         // 64-bit
1271         int offset = ra_->reg2offset(src_first);
1272         if (cbuf) {
1273           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1274           if (Matcher::_regEncode[dst_first] >= 8) {
1275             emit_opcode(*cbuf, Assembler::REX_R);
1276           }
1277           emit_opcode(*cbuf, 0x0F);
1278           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1279           encode_RegMem(*cbuf,
1280                         Matcher::_regEncode[dst_first],
1281                         RSP_enc, 0x4, 0, offset,
1282                         false);
1283 #ifndef PRODUCT
1284         } else if (!do_size) {
1285           st->print("%s  %s, [rsp + #%d]\t# spill",
1286                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1287                      Matcher::regName[dst_first],
1288                      offset);
1289 #endif
1290         }
1291         return
1292           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1293           ((Matcher::_regEncode[dst_first] < 8)
1294            ? 5
1295            : 6); // REX
1296       } else {
1297         // 32-bit
1298         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1299         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1300         int offset = ra_->reg2offset(src_first);
1301         if (cbuf) {
1302           emit_opcode(*cbuf, 0xF3);
1303           if (Matcher::_regEncode[dst_first] >= 8) {
1304             emit_opcode(*cbuf, Assembler::REX_R);
1305           }
1306           emit_opcode(*cbuf, 0x0F);
1307           emit_opcode(*cbuf, 0x10);
1308           encode_RegMem(*cbuf,
1309                         Matcher::_regEncode[dst_first],
1310                         RSP_enc, 0x4, 0, offset,
1311                         false);
1312 #ifndef PRODUCT
1313         } else if (!do_size) {
1314           st->print("movss   %s, [rsp + #%d]\t# spill",
1315                      Matcher::regName[dst_first],
1316                      offset);
1317 #endif
1318         }
1319         return
1320           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1321           ((Matcher::_regEncode[dst_first] < 8)
1322            ? 5
1323            : 6); // REX
1324       }
1325     }
1326   } else if (src_first_rc == rc_int) {
1327     // gpr ->
1328     if (dst_first_rc == rc_stack) {
1329       // gpr -> mem
1330       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1331           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1332         // 64-bit
1333         int offset = ra_->reg2offset(dst_first);
1334         if (cbuf) {
1335           if (Matcher::_regEncode[src_first] < 8) {
1336             emit_opcode(*cbuf, Assembler::REX_W);
1337           } else {
1338             emit_opcode(*cbuf, Assembler::REX_WR);
1339           }
1340           emit_opcode(*cbuf, 0x89);
1341           encode_RegMem(*cbuf,
1342                         Matcher::_regEncode[src_first],
1343                         RSP_enc, 0x4, 0, offset,
1344                         false);
1345 #ifndef PRODUCT
1346         } else if (!do_size) {
1347           st->print("movq    [rsp + #%d], %s\t# spill",
1348                      offset,
1349                      Matcher::regName[src_first]);
1350 #endif
1351         }
1352         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1353       } else {
1354         // 32-bit
1355         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1356         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1357         int offset = ra_->reg2offset(dst_first);
1358         if (cbuf) {
1359           if (Matcher::_regEncode[src_first] >= 8) {
1360             emit_opcode(*cbuf, Assembler::REX_R);
1361           }
1362           emit_opcode(*cbuf, 0x89);
1363           encode_RegMem(*cbuf,
1364                         Matcher::_regEncode[src_first],
1365                         RSP_enc, 0x4, 0, offset,
1366                         false);
1367 #ifndef PRODUCT
1368         } else if (!do_size) {
1369           st->print("movl    [rsp + #%d], %s\t# spill",
1370                      offset,
1371                      Matcher::regName[src_first]);
1372 #endif
1373         }
1374         return
1375           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1376           ((Matcher::_regEncode[src_first] < 8)
1377            ? 3
1378            : 4); // REX
1379       }
1380     } else if (dst_first_rc == rc_int) {
1381       // gpr -> gpr
1382       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1383           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1384         // 64-bit
1385         if (cbuf) {
1386           if (Matcher::_regEncode[dst_first] < 8) {
1387             if (Matcher::_regEncode[src_first] < 8) {
1388               emit_opcode(*cbuf, Assembler::REX_W);
1389             } else {
1390               emit_opcode(*cbuf, Assembler::REX_WB);
1391             }
1392           } else {
1393             if (Matcher::_regEncode[src_first] < 8) {
1394               emit_opcode(*cbuf, Assembler::REX_WR);
1395             } else {
1396               emit_opcode(*cbuf, Assembler::REX_WRB);
1397             }
1398           }
1399           emit_opcode(*cbuf, 0x8B);
1400           emit_rm(*cbuf, 0x3,
1401                   Matcher::_regEncode[dst_first] & 7,
1402                   Matcher::_regEncode[src_first] & 7);
1403 #ifndef PRODUCT
1404         } else if (!do_size) {
1405           st->print("movq    %s, %s\t# spill",
1406                      Matcher::regName[dst_first],
1407                      Matcher::regName[src_first]);
1408 #endif
1409         }
1410         return 3; // REX
1411       } else {
1412         // 32-bit
1413         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1414         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1415         if (cbuf) {
1416           if (Matcher::_regEncode[dst_first] < 8) {
1417             if (Matcher::_regEncode[src_first] >= 8) {
1418               emit_opcode(*cbuf, Assembler::REX_B);
1419             }
1420           } else {
1421             if (Matcher::_regEncode[src_first] < 8) {
1422               emit_opcode(*cbuf, Assembler::REX_R);
1423             } else {
1424               emit_opcode(*cbuf, Assembler::REX_RB);
1425             }
1426           }
1427           emit_opcode(*cbuf, 0x8B);
1428           emit_rm(*cbuf, 0x3,
1429                   Matcher::_regEncode[dst_first] & 7,
1430                   Matcher::_regEncode[src_first] & 7);
1431 #ifndef PRODUCT
1432         } else if (!do_size) {
1433           st->print("movl    %s, %s\t# spill",
1434                      Matcher::regName[dst_first],
1435                      Matcher::regName[src_first]);
1436 #endif
1437         }
1438         return
1439           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1440           ? 2
1441           : 3; // REX
1442       }
1443     } else if (dst_first_rc == rc_float) {
1444       // gpr -> xmm
1445       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1446           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1447         // 64-bit
1448         if (cbuf) {
1449           emit_opcode(*cbuf, 0x66);
1450           if (Matcher::_regEncode[dst_first] < 8) {
1451             if (Matcher::_regEncode[src_first] < 8) {
1452               emit_opcode(*cbuf, Assembler::REX_W);
1453             } else {
1454               emit_opcode(*cbuf, Assembler::REX_WB);
1455             }
1456           } else {
1457             if (Matcher::_regEncode[src_first] < 8) {
1458               emit_opcode(*cbuf, Assembler::REX_WR);
1459             } else {
1460               emit_opcode(*cbuf, Assembler::REX_WRB);
1461             }
1462           }
1463           emit_opcode(*cbuf, 0x0F);
1464           emit_opcode(*cbuf, 0x6E);
1465           emit_rm(*cbuf, 0x3,
1466                   Matcher::_regEncode[dst_first] & 7,
1467                   Matcher::_regEncode[src_first] & 7);
1468 #ifndef PRODUCT
1469         } else if (!do_size) {
1470           st->print("movdq   %s, %s\t# spill",
1471                      Matcher::regName[dst_first],
1472                      Matcher::regName[src_first]);
1473 #endif
1474         }
1475         return 5; // REX
1476       } else {
1477         // 32-bit
1478         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1479         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1480         if (cbuf) {
1481           emit_opcode(*cbuf, 0x66);
1482           if (Matcher::_regEncode[dst_first] < 8) {
1483             if (Matcher::_regEncode[src_first] >= 8) {
1484               emit_opcode(*cbuf, Assembler::REX_B);
1485             }
1486           } else {
1487             if (Matcher::_regEncode[src_first] < 8) {
1488               emit_opcode(*cbuf, Assembler::REX_R);
1489             } else {
1490               emit_opcode(*cbuf, Assembler::REX_RB);
1491             }
1492           }
1493           emit_opcode(*cbuf, 0x0F);
1494           emit_opcode(*cbuf, 0x6E);
1495           emit_rm(*cbuf, 0x3,
1496                   Matcher::_regEncode[dst_first] & 7,
1497                   Matcher::_regEncode[src_first] & 7);
1498 #ifndef PRODUCT
1499         } else if (!do_size) {
1500           st->print("movdl   %s, %s\t# spill",
1501                      Matcher::regName[dst_first],
1502                      Matcher::regName[src_first]);
1503 #endif
1504         }
1505         return
1506           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1507           ? 4
1508           : 5; // REX
1509       }
1510     }
1511   } else if (src_first_rc == rc_float) {
1512     // xmm ->
1513     if (dst_first_rc == rc_stack) {
1514       // xmm -> mem
1515       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1516           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1517         // 64-bit
1518         int offset = ra_->reg2offset(dst_first);
1519         if (cbuf) {
1520           emit_opcode(*cbuf, 0xF2);
1521           if (Matcher::_regEncode[src_first] >= 8) {
1522               emit_opcode(*cbuf, Assembler::REX_R);
1523           }
1524           emit_opcode(*cbuf, 0x0F);
1525           emit_opcode(*cbuf, 0x11);
1526           encode_RegMem(*cbuf,
1527                         Matcher::_regEncode[src_first],
1528                         RSP_enc, 0x4, 0, offset,
1529                         false);
1530 #ifndef PRODUCT
1531         } else if (!do_size) {
1532           st->print("movsd   [rsp + #%d], %s\t# spill",
1533                      offset,
1534                      Matcher::regName[src_first]);
1535 #endif
1536         }
1537         return
1538           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1539           ((Matcher::_regEncode[src_first] < 8)
1540            ? 5
1541            : 6); // REX
1542       } else {
1543         // 32-bit
1544         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1545         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1546         int offset = ra_->reg2offset(dst_first);
1547         if (cbuf) {
1548           emit_opcode(*cbuf, 0xF3);
1549           if (Matcher::_regEncode[src_first] >= 8) {
1550               emit_opcode(*cbuf, Assembler::REX_R);
1551           }
1552           emit_opcode(*cbuf, 0x0F);
1553           emit_opcode(*cbuf, 0x11);
1554           encode_RegMem(*cbuf,
1555                         Matcher::_regEncode[src_first],
1556                         RSP_enc, 0x4, 0, offset,
1557                         false);
1558 #ifndef PRODUCT
1559         } else if (!do_size) {
1560           st->print("movss   [rsp + #%d], %s\t# spill",
1561                      offset,
1562                      Matcher::regName[src_first]);
1563 #endif
1564         }
1565         return
1566           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1567           ((Matcher::_regEncode[src_first] < 8)
1568            ? 5
1569            : 6); // REX
1570       }
1571     } else if (dst_first_rc == rc_int) {
1572       // xmm -> gpr
1573       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1574           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1575         // 64-bit
1576         if (cbuf) {
1577           emit_opcode(*cbuf, 0x66);
1578           if (Matcher::_regEncode[dst_first] < 8) {
1579             if (Matcher::_regEncode[src_first] < 8) {
1580               emit_opcode(*cbuf, Assembler::REX_W);
1581             } else {
1582               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1583             }
1584           } else {
1585             if (Matcher::_regEncode[src_first] < 8) {
1586               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1587             } else {
1588               emit_opcode(*cbuf, Assembler::REX_WRB);
1589             }
1590           }
1591           emit_opcode(*cbuf, 0x0F);
1592           emit_opcode(*cbuf, 0x7E);
1593           emit_rm(*cbuf, 0x3,
1594                   Matcher::_regEncode[dst_first] & 7,
1595                   Matcher::_regEncode[src_first] & 7);
1596 #ifndef PRODUCT
1597         } else if (!do_size) {
1598           st->print("movdq   %s, %s\t# spill",
1599                      Matcher::regName[dst_first],
1600                      Matcher::regName[src_first]);
1601 #endif
1602         }
1603         return 5; // REX
1604       } else {
1605         // 32-bit
1606         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1607         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1608         if (cbuf) {
1609           emit_opcode(*cbuf, 0x66);
1610           if (Matcher::_regEncode[dst_first] < 8) {
1611             if (Matcher::_regEncode[src_first] >= 8) {
1612               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1613             }
1614           } else {
1615             if (Matcher::_regEncode[src_first] < 8) {
1616               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1617             } else {
1618               emit_opcode(*cbuf, Assembler::REX_RB);
1619             }
1620           }
1621           emit_opcode(*cbuf, 0x0F);
1622           emit_opcode(*cbuf, 0x7E);
1623           emit_rm(*cbuf, 0x3,
1624                   Matcher::_regEncode[dst_first] & 7,
1625                   Matcher::_regEncode[src_first] & 7);
1626 #ifndef PRODUCT
1627         } else if (!do_size) {
1628           st->print("movdl   %s, %s\t# spill",
1629                      Matcher::regName[dst_first],
1630                      Matcher::regName[src_first]);
1631 #endif
1632         }
1633         return
1634           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1635           ? 4
1636           : 5; // REX
1637       }
1638     } else if (dst_first_rc == rc_float) {
1639       // xmm -> xmm
1640       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1641           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1642         // 64-bit
1643         if (cbuf) {
1644           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1645           if (Matcher::_regEncode[dst_first] < 8) {
1646             if (Matcher::_regEncode[src_first] >= 8) {
1647               emit_opcode(*cbuf, Assembler::REX_B);
1648             }
1649           } else {
1650             if (Matcher::_regEncode[src_first] < 8) {
1651               emit_opcode(*cbuf, Assembler::REX_R);
1652             } else {
1653               emit_opcode(*cbuf, Assembler::REX_RB);
1654             }
1655           }
1656           emit_opcode(*cbuf, 0x0F);
1657           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1658           emit_rm(*cbuf, 0x3,
1659                   Matcher::_regEncode[dst_first] & 7,
1660                   Matcher::_regEncode[src_first] & 7);
1661 #ifndef PRODUCT
1662         } else if (!do_size) {
1663           st->print("%s  %s, %s\t# spill",
1664                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1665                      Matcher::regName[dst_first],
1666                      Matcher::regName[src_first]);
1667 #endif
1668         }
1669         return
1670           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1671           ? 4
1672           : 5; // REX
1673       } else {
1674         // 32-bit
1675         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1676         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1677         if (cbuf) {
1678           if (!UseXmmRegToRegMoveAll)
1679             emit_opcode(*cbuf, 0xF3);
1680           if (Matcher::_regEncode[dst_first] < 8) {
1681             if (Matcher::_regEncode[src_first] >= 8) {
1682               emit_opcode(*cbuf, Assembler::REX_B);
1683             }
1684           } else {
1685             if (Matcher::_regEncode[src_first] < 8) {
1686               emit_opcode(*cbuf, Assembler::REX_R);
1687             } else {
1688               emit_opcode(*cbuf, Assembler::REX_RB);
1689             }
1690           }
1691           emit_opcode(*cbuf, 0x0F);
1692           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1693           emit_rm(*cbuf, 0x3,
1694                   Matcher::_regEncode[dst_first] & 7,
1695                   Matcher::_regEncode[src_first] & 7);
1696 #ifndef PRODUCT
1697         } else if (!do_size) {
1698           st->print("%s  %s, %s\t# spill",
1699                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1700                      Matcher::regName[dst_first],
1701                      Matcher::regName[src_first]);
1702 #endif
1703         }
1704         return
1705           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1706           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1707           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1708       }
1709     }
1710   }
1711 
1712   assert(0," foo ");
1713   Unimplemented();
1714 
1715   return 0;
1716 }
1717 
1718 #ifndef PRODUCT
1719 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1720 {
1721   implementation(NULL, ra_, false, st);
1722 }
1723 #endif
1724 
1725 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1726 {
1727   implementation(&cbuf, ra_, false, NULL);
1728 }
1729 
1730 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1731 {
1732   return implementation(NULL, ra_, true, NULL);
1733 }
1734 
1735 //=============================================================================
1736 #ifndef PRODUCT
1737 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1738 {
1739   st->print("nop \t# %d bytes pad for loops and calls", _count);
1740 }
1741 #endif
1742 
1743 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1744 {
1745   MacroAssembler _masm(&cbuf);
1746   __ nop(_count);
1747 }
1748 
1749 uint MachNopNode::size(PhaseRegAlloc*) const
1750 {
1751   return _count;
1752 }
1753 
1754 
1755 //=============================================================================
1756 #ifndef PRODUCT
1757 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1758 {
1759   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1760   int reg = ra_->get_reg_first(this);
1761   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1762             Matcher::regName[reg], offset);
1763 }
1764 #endif
1765 
1766 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1767 {
1768   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1769   int reg = ra_->get_encode(this);
1770   if (offset >= 0x80) {
1771     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1772     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1773     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1774     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1775     emit_d32(cbuf, offset);
1776   } else {
1777     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1780     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781     emit_d8(cbuf, offset);
1782   }
1783 }
1784 
1785 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1786 {
1787   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1788   return (offset < 0x80) ? 5 : 8; // REX
1789 }
1790 
1791 //=============================================================================
1792 
1793 // emit call stub, compiled java to interpreter
1794 void emit_java_to_interp(CodeBuffer& cbuf)
1795 {
1796   // Stub is fixed up when the corresponding call is converted from
1797   // calling compiled code to calling interpreted code.
1798   // movq rbx, 0
1799   // jmp -5 # to self
1800 
1801   address mark = cbuf.inst_mark();  // get mark within main instrs section
1802 
1803   // Note that the code buffer's inst_mark is always relative to insts.
1804   // That's why we must use the macroassembler to generate a stub.
1805   MacroAssembler _masm(&cbuf);
1806 
1807   address base =
1808   __ start_a_stub(Compile::MAX_stubs_size);
1809   if (base == NULL)  return;  // CodeBuffer::expand failed
1810   // static stub relocation stores the instruction address of the call
1811   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1812   // static stub relocation also tags the methodOop in the code-stream.
1813   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1814   // This is recognized as unresolved by relocs/nativeinst/ic code
1815   __ jump(RuntimeAddress(__ pc()));
1816 
1817   // Update current stubs pointer and restore code_end.
1818   __ end_a_stub();
1819 }
1820 
1821 // size of call stub, compiled java to interpretor
1822 uint size_java_to_interp()
1823 {
1824   return 15;  // movq (1+1+8); jmp (1+4)
1825 }
1826 
1827 // relocation entries for call stub, compiled java to interpretor
1828 uint reloc_java_to_interp()
1829 {
1830   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1831 }
1832 
1833 //=============================================================================
1834 #ifndef PRODUCT
1835 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1836 {
1837   if (UseCompressedOops) {
1838     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1839     if (Universe::narrow_oop_shift() != 0) {
1840       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1841     }
1842     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1843   } else {
1844     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1845                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1846   }
1847   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1848   st->print_cr("\tnop");
1849   if (!OptoBreakpoint) {
1850     st->print_cr("\tnop");
1851   }
1852 }
1853 #endif
1854 
1855 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1856 {
1857   MacroAssembler masm(&cbuf);
1858 #ifdef ASSERT
1859   uint code_size = cbuf.code_size();
1860 #endif
1861   if (UseCompressedOops) {
1862     masm.load_klass(rscratch1, j_rarg0);
1863     masm.cmpptr(rax, rscratch1);
1864   } else {
1865     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1866   }
1867 
1868   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1869 
1870   /* WARNING these NOPs are critical so that verified entry point is properly
1871      aligned for patching by NativeJump::patch_verified_entry() */
1872   int nops_cnt = 1;
1873   if (!OptoBreakpoint) {
1874     // Leave space for int3
1875      nops_cnt += 1;
1876   }
1877   if (UseCompressedOops) {
1878     // ??? divisible by 4 is aligned?
1879     nops_cnt += 1;
1880   }
1881   masm.nop(nops_cnt);
1882 
1883   assert(cbuf.code_size() - code_size == size(ra_),
1884          "checking code size of inline cache node");
1885 }
1886 
1887 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1888 {
1889   if (UseCompressedOops) {
1890     if (Universe::narrow_oop_shift() == 0) {
1891       return OptoBreakpoint ? 15 : 16;
1892     } else {
1893       return OptoBreakpoint ? 19 : 20;
1894     }
1895   } else {
1896     return OptoBreakpoint ? 11 : 12;
1897   }
1898 }
1899 
1900 
1901 //=============================================================================
1902 uint size_exception_handler()
1903 {
1904   // NativeCall instruction size is the same as NativeJump.
1905   // Note that this value is also credited (in output.cpp) to
1906   // the size of the code section.
1907   return NativeJump::instruction_size;
1908 }
1909 
1910 // Emit exception handler code.
1911 int emit_exception_handler(CodeBuffer& cbuf)
1912 {
1913 
1914   // Note that the code buffer's inst_mark is always relative to insts.
1915   // That's why we must use the macroassembler to generate a handler.
1916   MacroAssembler _masm(&cbuf);
1917   address base =
1918   __ start_a_stub(size_exception_handler());
1919   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1920   int offset = __ offset();
1921   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1922   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1923   __ end_a_stub();
1924   return offset;
1925 }
1926 
1927 uint size_deopt_handler()
1928 {
1929   // three 5 byte instructions
1930   return 15;
1931 }
1932 
1933 // Emit deopt handler code.
1934 int emit_deopt_handler(CodeBuffer& cbuf)
1935 {
1936 
1937   // Note that the code buffer's inst_mark is always relative to insts.
1938   // That's why we must use the macroassembler to generate a handler.
1939   MacroAssembler _masm(&cbuf);
1940   address base =
1941   __ start_a_stub(size_deopt_handler());
1942   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1943   int offset = __ offset();
1944   address the_pc = (address) __ pc();
1945   Label next;
1946   // push a "the_pc" on the stack without destroying any registers
1947   // as they all may be live.
1948 
1949   // push address of "next"
1950   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1951   __ bind(next);
1952   // adjust it so it matches "the_pc"
1953   __ subptr(Address(rsp, 0), __ offset() - offset);
1954   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1955   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1956   __ end_a_stub();
1957   return offset;
1958 }
1959 
1960 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1961   int mark = cbuf.insts()->mark_off();
1962   MacroAssembler _masm(&cbuf);
1963   address double_address = __ double_constant(x);
1964   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1965   emit_d32_reloc(cbuf,
1966                  (int) (double_address - cbuf.code_end() - 4),
1967                  internal_word_Relocation::spec(double_address),
1968                  RELOC_DISP32);
1969 }
1970 
1971 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1972   int mark = cbuf.insts()->mark_off();
1973   MacroAssembler _masm(&cbuf);
1974   address float_address = __ float_constant(x);
1975   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1976   emit_d32_reloc(cbuf,
1977                  (int) (float_address - cbuf.code_end() - 4),
1978                  internal_word_Relocation::spec(float_address),
1979                  RELOC_DISP32);
1980 }
1981 
1982 
1983 const bool Matcher::match_rule_supported(int opcode) {
1984   if (!has_match_rule(opcode))
1985     return false;
1986 
1987   return true;  // Per default match rules are supported.
1988 }
1989 
1990 int Matcher::regnum_to_fpu_offset(int regnum)
1991 {
1992   return regnum - 32; // The FP registers are in the second chunk
1993 }
1994 
1995 // This is UltraSparc specific, true just means we have fast l2f conversion
1996 const bool Matcher::convL2FSupported(void) {
1997   return true;
1998 }
1999 
2000 // Vector width in bytes
2001 const uint Matcher::vector_width_in_bytes(void) {
2002   return 8;
2003 }
2004 
2005 // Vector ideal reg
2006 const uint Matcher::vector_ideal_reg(void) {
2007   return Op_RegD;
2008 }
2009 
2010 // Is this branch offset short enough that a short branch can be used?
2011 //
2012 // NOTE: If the platform does not provide any short branch variants, then
2013 //       this method should return false for offset 0.
2014 bool Matcher::is_short_branch_offset(int rule, int offset) {
2015   // the short version of jmpConUCF2 contains multiple branches,
2016   // making the reach slightly less
2017   if (rule == jmpConUCF2_rule)
2018     return (-126 <= offset && offset <= 125);
2019   return (-128 <= offset && offset <= 127);
2020 }
2021 
2022 const bool Matcher::isSimpleConstant64(jlong value) {
2023   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2024   //return value == (int) value;  // Cf. storeImmL and immL32.
2025 
2026   // Probably always true, even if a temp register is required.
2027   return true;
2028 }
2029 
2030 // The ecx parameter to rep stosq for the ClearArray node is in words.
2031 const bool Matcher::init_array_count_is_in_bytes = false;
2032 
2033 // Threshold size for cleararray.
2034 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2035 
2036 // Should the Matcher clone shifts on addressing modes, expecting them
2037 // to be subsumed into complex addressing expressions or compute them
2038 // into registers?  True for Intel but false for most RISCs
2039 const bool Matcher::clone_shift_expressions = true;
2040 
2041 // Is it better to copy float constants, or load them directly from
2042 // memory?  Intel can load a float constant from a direct address,
2043 // requiring no extra registers.  Most RISCs will have to materialize
2044 // an address into a register first, so they would do better to copy
2045 // the constant from stack.
2046 const bool Matcher::rematerialize_float_constants = true; // XXX
2047 
2048 // If CPU can load and store mis-aligned doubles directly then no
2049 // fixup is needed.  Else we split the double into 2 integer pieces
2050 // and move it piece-by-piece.  Only happens when passing doubles into
2051 // C code as the Java calling convention forces doubles to be aligned.
2052 const bool Matcher::misaligned_doubles_ok = true;
2053 
2054 // No-op on amd64
2055 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2056 
2057 // Advertise here if the CPU requires explicit rounding operations to
2058 // implement the UseStrictFP mode.
2059 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2060 
2061 // Do floats take an entire double register or just half?
2062 const bool Matcher::float_in_double = true;
2063 // Do ints take an entire long register or just half?
2064 const bool Matcher::int_in_long = true;
2065 
2066 // Return whether or not this register is ever used as an argument.
2067 // This function is used on startup to build the trampoline stubs in
2068 // generateOptoStub.  Registers not mentioned will be killed by the VM
2069 // call in the trampoline, and arguments in those registers not be
2070 // available to the callee.
2071 bool Matcher::can_be_java_arg(int reg)
2072 {
2073   return
2074     reg ==  RDI_num || reg ==  RDI_H_num ||
2075     reg ==  RSI_num || reg ==  RSI_H_num ||
2076     reg ==  RDX_num || reg ==  RDX_H_num ||
2077     reg ==  RCX_num || reg ==  RCX_H_num ||
2078     reg ==   R8_num || reg ==   R8_H_num ||
2079     reg ==   R9_num || reg ==   R9_H_num ||
2080     reg ==  R12_num || reg ==  R12_H_num ||
2081     reg == XMM0_num || reg == XMM0_H_num ||
2082     reg == XMM1_num || reg == XMM1_H_num ||
2083     reg == XMM2_num || reg == XMM2_H_num ||
2084     reg == XMM3_num || reg == XMM3_H_num ||
2085     reg == XMM4_num || reg == XMM4_H_num ||
2086     reg == XMM5_num || reg == XMM5_H_num ||
2087     reg == XMM6_num || reg == XMM6_H_num ||
2088     reg == XMM7_num || reg == XMM7_H_num;
2089 }
2090 
2091 bool Matcher::is_spillable_arg(int reg)
2092 {
2093   return can_be_java_arg(reg);
2094 }
2095 
2096 // Register for DIVI projection of divmodI
2097 RegMask Matcher::divI_proj_mask() {
2098   return INT_RAX_REG_mask;
2099 }
2100 
2101 // Register for MODI projection of divmodI
2102 RegMask Matcher::modI_proj_mask() {
2103   return INT_RDX_REG_mask;
2104 }
2105 
2106 // Register for DIVL projection of divmodL
2107 RegMask Matcher::divL_proj_mask() {
2108   return LONG_RAX_REG_mask;
2109 }
2110 
2111 // Register for MODL projection of divmodL
2112 RegMask Matcher::modL_proj_mask() {
2113   return LONG_RDX_REG_mask;
2114 }
2115 
2116 static Address build_address(int b, int i, int s, int d) {
2117   Register index = as_Register(i);
2118   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2119   if (index == rsp) {
2120     index = noreg;
2121     scale = Address::no_scale;
2122   }
2123   Address addr(as_Register(b), index, scale, d);
2124   return addr;
2125 }
2126 
2127 %}
2128 
2129 //----------ENCODING BLOCK-----------------------------------------------------
2130 // This block specifies the encoding classes used by the compiler to
2131 // output byte streams.  Encoding classes are parameterized macros
2132 // used by Machine Instruction Nodes in order to generate the bit
2133 // encoding of the instruction.  Operands specify their base encoding
2134 // interface with the interface keyword.  There are currently
2135 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2136 // COND_INTER.  REG_INTER causes an operand to generate a function
2137 // which returns its register number when queried.  CONST_INTER causes
2138 // an operand to generate a function which returns the value of the
2139 // constant when queried.  MEMORY_INTER causes an operand to generate
2140 // four functions which return the Base Register, the Index Register,
2141 // the Scale Value, and the Offset Value of the operand when queried.
2142 // COND_INTER causes an operand to generate six functions which return
2143 // the encoding code (ie - encoding bits for the instruction)
2144 // associated with each basic boolean condition for a conditional
2145 // instruction.
2146 //
2147 // Instructions specify two basic values for encoding.  Again, a
2148 // function is available to check if the constant displacement is an
2149 // oop. They use the ins_encode keyword to specify their encoding
2150 // classes (which must be a sequence of enc_class names, and their
2151 // parameters, specified in the encoding block), and they use the
2152 // opcode keyword to specify, in order, their primary, secondary, and
2153 // tertiary opcode.  Only the opcode sections which a particular
2154 // instruction needs for encoding need to be specified.
2155 encode %{
2156   // Build emit functions for each basic byte or larger field in the
2157   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2158   // from C++ code in the enc_class source block.  Emit functions will
2159   // live in the main source block for now.  In future, we can
2160   // generalize this by adding a syntax that specifies the sizes of
2161   // fields in an order, so that the adlc can build the emit functions
2162   // automagically
2163 
2164   // Emit primary opcode
2165   enc_class OpcP
2166   %{
2167     emit_opcode(cbuf, $primary);
2168   %}
2169 
2170   // Emit secondary opcode
2171   enc_class OpcS
2172   %{
2173     emit_opcode(cbuf, $secondary);
2174   %}
2175 
2176   // Emit tertiary opcode
2177   enc_class OpcT
2178   %{
2179     emit_opcode(cbuf, $tertiary);
2180   %}
2181 
2182   // Emit opcode directly
2183   enc_class Opcode(immI d8)
2184   %{
2185     emit_opcode(cbuf, $d8$$constant);
2186   %}
2187 
2188   // Emit size prefix
2189   enc_class SizePrefix
2190   %{
2191     emit_opcode(cbuf, 0x66);
2192   %}
2193 
2194   enc_class reg(rRegI reg)
2195   %{
2196     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2197   %}
2198 
2199   enc_class reg_reg(rRegI dst, rRegI src)
2200   %{
2201     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2202   %}
2203 
2204   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2205   %{
2206     emit_opcode(cbuf, $opcode$$constant);
2207     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2208   %}
2209 
2210   enc_class cmpfp_fixup()
2211   %{
2212     // jnp,s exit
2213     emit_opcode(cbuf, 0x7B);
2214     emit_d8(cbuf, 0x0A);
2215 
2216     // pushfq
2217     emit_opcode(cbuf, 0x9C);
2218 
2219     // andq $0xffffff2b, (%rsp)
2220     emit_opcode(cbuf, Assembler::REX_W);
2221     emit_opcode(cbuf, 0x81);
2222     emit_opcode(cbuf, 0x24);
2223     emit_opcode(cbuf, 0x24);
2224     emit_d32(cbuf, 0xffffff2b);
2225 
2226     // popfq
2227     emit_opcode(cbuf, 0x9D);
2228 
2229     // nop (target for branch to avoid branch to branch)
2230     emit_opcode(cbuf, 0x90);
2231   %}
2232 
2233   enc_class cmpfp3(rRegI dst)
2234   %{
2235     int dstenc = $dst$$reg;
2236 
2237     // movl $dst, -1
2238     if (dstenc >= 8) {
2239       emit_opcode(cbuf, Assembler::REX_B);
2240     }
2241     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2242     emit_d32(cbuf, -1);
2243 
2244     // jp,s done
2245     emit_opcode(cbuf, 0x7A);
2246     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2247 
2248     // jb,s done
2249     emit_opcode(cbuf, 0x72);
2250     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2251 
2252     // setne $dst
2253     if (dstenc >= 4) {
2254       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2255     }
2256     emit_opcode(cbuf, 0x0F);
2257     emit_opcode(cbuf, 0x95);
2258     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2259 
2260     // movzbl $dst, $dst
2261     if (dstenc >= 4) {
2262       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2263     }
2264     emit_opcode(cbuf, 0x0F);
2265     emit_opcode(cbuf, 0xB6);
2266     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2267   %}
2268 
2269   enc_class cdql_enc(no_rax_rdx_RegI div)
2270   %{
2271     // Full implementation of Java idiv and irem; checks for
2272     // special case as described in JVM spec., p.243 & p.271.
2273     //
2274     //         normal case                           special case
2275     //
2276     // input : rax: dividend                         min_int
2277     //         reg: divisor                          -1
2278     //
2279     // output: rax: quotient  (= rax idiv reg)       min_int
2280     //         rdx: remainder (= rax irem reg)       0
2281     //
2282     //  Code sequnce:
2283     //
2284     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2285     //    5:   75 07/08                jne    e <normal>
2286     //    7:   33 d2                   xor    %edx,%edx
2287     //  [div >= 8 -> offset + 1]
2288     //  [REX_B]
2289     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2290     //    c:   74 03/04                je     11 <done>
2291     // 000000000000000e <normal>:
2292     //    e:   99                      cltd
2293     //  [div >= 8 -> offset + 1]
2294     //  [REX_B]
2295     //    f:   f7 f9                   idiv   $div
2296     // 0000000000000011 <done>:
2297 
2298     // cmp    $0x80000000,%eax
2299     emit_opcode(cbuf, 0x3d);
2300     emit_d8(cbuf, 0x00);
2301     emit_d8(cbuf, 0x00);
2302     emit_d8(cbuf, 0x00);
2303     emit_d8(cbuf, 0x80);
2304 
2305     // jne    e <normal>
2306     emit_opcode(cbuf, 0x75);
2307     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2308 
2309     // xor    %edx,%edx
2310     emit_opcode(cbuf, 0x33);
2311     emit_d8(cbuf, 0xD2);
2312 
2313     // cmp    $0xffffffffffffffff,%ecx
2314     if ($div$$reg >= 8) {
2315       emit_opcode(cbuf, Assembler::REX_B);
2316     }
2317     emit_opcode(cbuf, 0x83);
2318     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2319     emit_d8(cbuf, 0xFF);
2320 
2321     // je     11 <done>
2322     emit_opcode(cbuf, 0x74);
2323     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2324 
2325     // <normal>
2326     // cltd
2327     emit_opcode(cbuf, 0x99);
2328 
2329     // idivl (note: must be emitted by the user of this rule)
2330     // <done>
2331   %}
2332 
2333   enc_class cdqq_enc(no_rax_rdx_RegL div)
2334   %{
2335     // Full implementation of Java ldiv and lrem; checks for
2336     // special case as described in JVM spec., p.243 & p.271.
2337     //
2338     //         normal case                           special case
2339     //
2340     // input : rax: dividend                         min_long
2341     //         reg: divisor                          -1
2342     //
2343     // output: rax: quotient  (= rax idiv reg)       min_long
2344     //         rdx: remainder (= rax irem reg)       0
2345     //
2346     //  Code sequnce:
2347     //
2348     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2349     //    7:   00 00 80
2350     //    a:   48 39 d0                cmp    %rdx,%rax
2351     //    d:   75 08                   jne    17 <normal>
2352     //    f:   33 d2                   xor    %edx,%edx
2353     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2354     //   15:   74 05                   je     1c <done>
2355     // 0000000000000017 <normal>:
2356     //   17:   48 99                   cqto
2357     //   19:   48 f7 f9                idiv   $div
2358     // 000000000000001c <done>:
2359 
2360     // mov    $0x8000000000000000,%rdx
2361     emit_opcode(cbuf, Assembler::REX_W);
2362     emit_opcode(cbuf, 0xBA);
2363     emit_d8(cbuf, 0x00);
2364     emit_d8(cbuf, 0x00);
2365     emit_d8(cbuf, 0x00);
2366     emit_d8(cbuf, 0x00);
2367     emit_d8(cbuf, 0x00);
2368     emit_d8(cbuf, 0x00);
2369     emit_d8(cbuf, 0x00);
2370     emit_d8(cbuf, 0x80);
2371 
2372     // cmp    %rdx,%rax
2373     emit_opcode(cbuf, Assembler::REX_W);
2374     emit_opcode(cbuf, 0x39);
2375     emit_d8(cbuf, 0xD0);
2376 
2377     // jne    17 <normal>
2378     emit_opcode(cbuf, 0x75);
2379     emit_d8(cbuf, 0x08);
2380 
2381     // xor    %edx,%edx
2382     emit_opcode(cbuf, 0x33);
2383     emit_d8(cbuf, 0xD2);
2384 
2385     // cmp    $0xffffffffffffffff,$div
2386     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2387     emit_opcode(cbuf, 0x83);
2388     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2389     emit_d8(cbuf, 0xFF);
2390 
2391     // je     1e <done>
2392     emit_opcode(cbuf, 0x74);
2393     emit_d8(cbuf, 0x05);
2394 
2395     // <normal>
2396     // cqto
2397     emit_opcode(cbuf, Assembler::REX_W);
2398     emit_opcode(cbuf, 0x99);
2399 
2400     // idivq (note: must be emitted by the user of this rule)
2401     // <done>
2402   %}
2403 
2404   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2405   enc_class OpcSE(immI imm)
2406   %{
2407     // Emit primary opcode and set sign-extend bit
2408     // Check for 8-bit immediate, and set sign extend bit in opcode
2409     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2410       emit_opcode(cbuf, $primary | 0x02);
2411     } else {
2412       // 32-bit immediate
2413       emit_opcode(cbuf, $primary);
2414     }
2415   %}
2416 
2417   enc_class OpcSErm(rRegI dst, immI imm)
2418   %{
2419     // OpcSEr/m
2420     int dstenc = $dst$$reg;
2421     if (dstenc >= 8) {
2422       emit_opcode(cbuf, Assembler::REX_B);
2423       dstenc -= 8;
2424     }
2425     // Emit primary opcode and set sign-extend bit
2426     // Check for 8-bit immediate, and set sign extend bit in opcode
2427     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2428       emit_opcode(cbuf, $primary | 0x02);
2429     } else {
2430       // 32-bit immediate
2431       emit_opcode(cbuf, $primary);
2432     }
2433     // Emit r/m byte with secondary opcode, after primary opcode.
2434     emit_rm(cbuf, 0x3, $secondary, dstenc);
2435   %}
2436 
2437   enc_class OpcSErm_wide(rRegL dst, immI imm)
2438   %{
2439     // OpcSEr/m
2440     int dstenc = $dst$$reg;
2441     if (dstenc < 8) {
2442       emit_opcode(cbuf, Assembler::REX_W);
2443     } else {
2444       emit_opcode(cbuf, Assembler::REX_WB);
2445       dstenc -= 8;
2446     }
2447     // Emit primary opcode and set sign-extend bit
2448     // Check for 8-bit immediate, and set sign extend bit in opcode
2449     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2450       emit_opcode(cbuf, $primary | 0x02);
2451     } else {
2452       // 32-bit immediate
2453       emit_opcode(cbuf, $primary);
2454     }
2455     // Emit r/m byte with secondary opcode, after primary opcode.
2456     emit_rm(cbuf, 0x3, $secondary, dstenc);
2457   %}
2458 
2459   enc_class Con8or32(immI imm)
2460   %{
2461     // Check for 8-bit immediate, and set sign extend bit in opcode
2462     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2463       $$$emit8$imm$$constant;
2464     } else {
2465       // 32-bit immediate
2466       $$$emit32$imm$$constant;
2467     }
2468   %}
2469 
2470   enc_class Lbl(label labl)
2471   %{
2472     // JMP, CALL
2473     Label* l = $labl$$label;
2474     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2475   %}
2476 
2477   enc_class LblShort(label labl)
2478   %{
2479     // JMP, CALL
2480     Label* l = $labl$$label;
2481     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2482     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2483     emit_d8(cbuf, disp);
2484   %}
2485 
2486   enc_class opc2_reg(rRegI dst)
2487   %{
2488     // BSWAP
2489     emit_cc(cbuf, $secondary, $dst$$reg);
2490   %}
2491 
2492   enc_class opc3_reg(rRegI dst)
2493   %{
2494     // BSWAP
2495     emit_cc(cbuf, $tertiary, $dst$$reg);
2496   %}
2497 
2498   enc_class reg_opc(rRegI div)
2499   %{
2500     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2501     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2502   %}
2503 
2504   enc_class Jcc(cmpOp cop, label labl)
2505   %{
2506     // JCC
2507     Label* l = $labl$$label;
2508     $$$emit8$primary;
2509     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2510     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2511   %}
2512 
2513   enc_class JccShort (cmpOp cop, label labl)
2514   %{
2515   // JCC
2516     Label *l = $labl$$label;
2517     emit_cc(cbuf, $primary, $cop$$cmpcode);
2518     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2519     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2520     emit_d8(cbuf, disp);
2521   %}
2522 
2523   enc_class enc_cmov(cmpOp cop)
2524   %{
2525     // CMOV
2526     $$$emit8$primary;
2527     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2528   %}
2529 
2530   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2531   %{
2532     // Invert sense of branch from sense of cmov
2533     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2534     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2535                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2536                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2537     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2538     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2539     if ($dst$$reg < 8) {
2540       if ($src$$reg >= 8) {
2541         emit_opcode(cbuf, Assembler::REX_B);
2542       }
2543     } else {
2544       if ($src$$reg < 8) {
2545         emit_opcode(cbuf, Assembler::REX_R);
2546       } else {
2547         emit_opcode(cbuf, Assembler::REX_RB);
2548       }
2549     }
2550     emit_opcode(cbuf, 0x0F);
2551     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2552     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2553   %}
2554 
2555   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2556   %{
2557     // Invert sense of branch from sense of cmov
2558     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2559     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2560 
2561     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2562     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2563     if ($dst$$reg < 8) {
2564       if ($src$$reg >= 8) {
2565         emit_opcode(cbuf, Assembler::REX_B);
2566       }
2567     } else {
2568       if ($src$$reg < 8) {
2569         emit_opcode(cbuf, Assembler::REX_R);
2570       } else {
2571         emit_opcode(cbuf, Assembler::REX_RB);
2572       }
2573     }
2574     emit_opcode(cbuf, 0x0F);
2575     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2576     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2577   %}
2578 
2579   enc_class enc_PartialSubtypeCheck()
2580   %{
2581     Register Rrdi = as_Register(RDI_enc); // result register
2582     Register Rrax = as_Register(RAX_enc); // super class
2583     Register Rrcx = as_Register(RCX_enc); // killed
2584     Register Rrsi = as_Register(RSI_enc); // sub class
2585     Label miss;
2586     const bool set_cond_codes = true;
2587 
2588     MacroAssembler _masm(&cbuf);
2589     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2590                                      NULL, &miss,
2591                                      /*set_cond_codes:*/ true);
2592     if ($primary) {
2593       __ xorptr(Rrdi, Rrdi);
2594     }
2595     __ bind(miss);
2596   %}
2597 
2598   enc_class Java_To_Interpreter(method meth)
2599   %{
2600     // CALL Java_To_Interpreter
2601     // This is the instruction starting address for relocation info.
2602     cbuf.set_inst_mark();
2603     $$$emit8$primary;
2604     // CALL directly to the runtime
2605     emit_d32_reloc(cbuf,
2606                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2607                    runtime_call_Relocation::spec(),
2608                    RELOC_DISP32);
2609   %}
2610 
2611   enc_class Java_Static_Call(method meth)
2612   %{
2613     // JAVA STATIC CALL
2614     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2615     // determine who we intended to call.
2616     cbuf.set_inst_mark();
2617     $$$emit8$primary;
2618 
2619     if (!_method) {
2620       emit_d32_reloc(cbuf,
2621                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2622                      runtime_call_Relocation::spec(),
2623                      RELOC_DISP32);
2624     } else if (_optimized_virtual) {
2625       emit_d32_reloc(cbuf,
2626                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2627                      opt_virtual_call_Relocation::spec(),
2628                      RELOC_DISP32);
2629     } else {
2630       emit_d32_reloc(cbuf,
2631                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2632                      static_call_Relocation::spec(),
2633                      RELOC_DISP32);
2634     }
2635     if (_method) {
2636       // Emit stub for static call
2637       emit_java_to_interp(cbuf);
2638     }
2639   %}
2640 
2641   enc_class Java_Dynamic_Call(method meth)
2642   %{
2643     // JAVA DYNAMIC CALL
2644     // !!!!!
2645     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2646     // emit_call_dynamic_prologue( cbuf );
2647     cbuf.set_inst_mark();
2648 
2649     // movq rax, -1
2650     emit_opcode(cbuf, Assembler::REX_W);
2651     emit_opcode(cbuf, 0xB8 | RAX_enc);
2652     emit_d64_reloc(cbuf,
2653                    (int64_t) Universe::non_oop_word(),
2654                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2655     address virtual_call_oop_addr = cbuf.inst_mark();
2656     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2657     // who we intended to call.
2658     cbuf.set_inst_mark();
2659     $$$emit8$primary;
2660     emit_d32_reloc(cbuf,
2661                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2662                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2663                    RELOC_DISP32);
2664   %}
2665 
2666   enc_class Java_Compiled_Call(method meth)
2667   %{
2668     // JAVA COMPILED CALL
2669     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2670 
2671     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2672     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2673 
2674     // callq *disp(%rax)
2675     cbuf.set_inst_mark();
2676     $$$emit8$primary;
2677     if (disp < 0x80) {
2678       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2679       emit_d8(cbuf, disp); // Displacement
2680     } else {
2681       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2682       emit_d32(cbuf, disp); // Displacement
2683     }
2684   %}
2685 
2686   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2687   %{
2688     // SAL, SAR, SHR
2689     int dstenc = $dst$$reg;
2690     if (dstenc >= 8) {
2691       emit_opcode(cbuf, Assembler::REX_B);
2692       dstenc -= 8;
2693     }
2694     $$$emit8$primary;
2695     emit_rm(cbuf, 0x3, $secondary, dstenc);
2696     $$$emit8$shift$$constant;
2697   %}
2698 
2699   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2700   %{
2701     // SAL, SAR, SHR
2702     int dstenc = $dst$$reg;
2703     if (dstenc < 8) {
2704       emit_opcode(cbuf, Assembler::REX_W);
2705     } else {
2706       emit_opcode(cbuf, Assembler::REX_WB);
2707       dstenc -= 8;
2708     }
2709     $$$emit8$primary;
2710     emit_rm(cbuf, 0x3, $secondary, dstenc);
2711     $$$emit8$shift$$constant;
2712   %}
2713 
2714   enc_class load_immI(rRegI dst, immI src)
2715   %{
2716     int dstenc = $dst$$reg;
2717     if (dstenc >= 8) {
2718       emit_opcode(cbuf, Assembler::REX_B);
2719       dstenc -= 8;
2720     }
2721     emit_opcode(cbuf, 0xB8 | dstenc);
2722     $$$emit32$src$$constant;
2723   %}
2724 
2725   enc_class load_immL(rRegL dst, immL src)
2726   %{
2727     int dstenc = $dst$$reg;
2728     if (dstenc < 8) {
2729       emit_opcode(cbuf, Assembler::REX_W);
2730     } else {
2731       emit_opcode(cbuf, Assembler::REX_WB);
2732       dstenc -= 8;
2733     }
2734     emit_opcode(cbuf, 0xB8 | dstenc);
2735     emit_d64(cbuf, $src$$constant);
2736   %}
2737 
2738   enc_class load_immUL32(rRegL dst, immUL32 src)
2739   %{
2740     // same as load_immI, but this time we care about zeroes in the high word
2741     int dstenc = $dst$$reg;
2742     if (dstenc >= 8) {
2743       emit_opcode(cbuf, Assembler::REX_B);
2744       dstenc -= 8;
2745     }
2746     emit_opcode(cbuf, 0xB8 | dstenc);
2747     $$$emit32$src$$constant;
2748   %}
2749 
2750   enc_class load_immL32(rRegL dst, immL32 src)
2751   %{
2752     int dstenc = $dst$$reg;
2753     if (dstenc < 8) {
2754       emit_opcode(cbuf, Assembler::REX_W);
2755     } else {
2756       emit_opcode(cbuf, Assembler::REX_WB);
2757       dstenc -= 8;
2758     }
2759     emit_opcode(cbuf, 0xC7);
2760     emit_rm(cbuf, 0x03, 0x00, dstenc);
2761     $$$emit32$src$$constant;
2762   %}
2763 
2764   enc_class load_immP31(rRegP dst, immP32 src)
2765   %{
2766     // same as load_immI, but this time we care about zeroes in the high word
2767     int dstenc = $dst$$reg;
2768     if (dstenc >= 8) {
2769       emit_opcode(cbuf, Assembler::REX_B);
2770       dstenc -= 8;
2771     }
2772     emit_opcode(cbuf, 0xB8 | dstenc);
2773     $$$emit32$src$$constant;
2774   %}
2775 
2776   enc_class load_immP(rRegP dst, immP src)
2777   %{
2778     int dstenc = $dst$$reg;
2779     if (dstenc < 8) {
2780       emit_opcode(cbuf, Assembler::REX_W);
2781     } else {
2782       emit_opcode(cbuf, Assembler::REX_WB);
2783       dstenc -= 8;
2784     }
2785     emit_opcode(cbuf, 0xB8 | dstenc);
2786     // This next line should be generated from ADLC
2787     if ($src->constant_is_oop()) {
2788       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2789     } else {
2790       emit_d64(cbuf, $src$$constant);
2791     }
2792   %}
2793 
2794   enc_class load_immF(regF dst, immF con)
2795   %{
2796     // XXX reg_mem doesn't support RIP-relative addressing yet
2797     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2798     emit_float_constant(cbuf, $con$$constant);
2799   %}
2800 
2801   enc_class load_immD(regD dst, immD con)
2802   %{
2803     // XXX reg_mem doesn't support RIP-relative addressing yet
2804     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2805     emit_double_constant(cbuf, $con$$constant);
2806   %}
2807 
2808   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2809     emit_opcode(cbuf, 0xF3);
2810     if ($dst$$reg >= 8) {
2811       emit_opcode(cbuf, Assembler::REX_R);
2812     }
2813     emit_opcode(cbuf, 0x0F);
2814     emit_opcode(cbuf, 0x10);
2815     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2816     emit_float_constant(cbuf, $con$$constant);
2817   %}
2818 
2819   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2820     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2821     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2822     if ($dst$$reg >= 8) {
2823       emit_opcode(cbuf, Assembler::REX_R);
2824     }
2825     emit_opcode(cbuf, 0x0F);
2826     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2827     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2828     emit_double_constant(cbuf, $con$$constant);
2829   %}
2830 
2831   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2832   enc_class enc_copy(rRegI dst, rRegI src)
2833   %{
2834     encode_copy(cbuf, $dst$$reg, $src$$reg);
2835   %}
2836 
2837   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2838   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2839     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2840   %}
2841 
2842   enc_class enc_copy_always(rRegI dst, rRegI src)
2843   %{
2844     int srcenc = $src$$reg;
2845     int dstenc = $dst$$reg;
2846 
2847     if (dstenc < 8) {
2848       if (srcenc >= 8) {
2849         emit_opcode(cbuf, Assembler::REX_B);
2850         srcenc -= 8;
2851       }
2852     } else {
2853       if (srcenc < 8) {
2854         emit_opcode(cbuf, Assembler::REX_R);
2855       } else {
2856         emit_opcode(cbuf, Assembler::REX_RB);
2857         srcenc -= 8;
2858       }
2859       dstenc -= 8;
2860     }
2861 
2862     emit_opcode(cbuf, 0x8B);
2863     emit_rm(cbuf, 0x3, dstenc, srcenc);
2864   %}
2865 
2866   enc_class enc_copy_wide(rRegL dst, rRegL src)
2867   %{
2868     int srcenc = $src$$reg;
2869     int dstenc = $dst$$reg;
2870 
2871     if (dstenc != srcenc) {
2872       if (dstenc < 8) {
2873         if (srcenc < 8) {
2874           emit_opcode(cbuf, Assembler::REX_W);
2875         } else {
2876           emit_opcode(cbuf, Assembler::REX_WB);
2877           srcenc -= 8;
2878         }
2879       } else {
2880         if (srcenc < 8) {
2881           emit_opcode(cbuf, Assembler::REX_WR);
2882         } else {
2883           emit_opcode(cbuf, Assembler::REX_WRB);
2884           srcenc -= 8;
2885         }
2886         dstenc -= 8;
2887       }
2888       emit_opcode(cbuf, 0x8B);
2889       emit_rm(cbuf, 0x3, dstenc, srcenc);
2890     }
2891   %}
2892 
2893   enc_class Con32(immI src)
2894   %{
2895     // Output immediate
2896     $$$emit32$src$$constant;
2897   %}
2898 
2899   enc_class Con64(immL src)
2900   %{
2901     // Output immediate
2902     emit_d64($src$$constant);
2903   %}
2904 
2905   enc_class Con32F_as_bits(immF src)
2906   %{
2907     // Output Float immediate bits
2908     jfloat jf = $src$$constant;
2909     jint jf_as_bits = jint_cast(jf);
2910     emit_d32(cbuf, jf_as_bits);
2911   %}
2912 
2913   enc_class Con16(immI src)
2914   %{
2915     // Output immediate
2916     $$$emit16$src$$constant;
2917   %}
2918 
2919   // How is this different from Con32??? XXX
2920   enc_class Con_d32(immI src)
2921   %{
2922     emit_d32(cbuf,$src$$constant);
2923   %}
2924 
2925   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2926     // Output immediate memory reference
2927     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2928     emit_d32(cbuf, 0x00);
2929   %}
2930 
2931   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2932     MacroAssembler masm(&cbuf);
2933 
2934     Register switch_reg = as_Register($switch_val$$reg);
2935     Register dest_reg   = as_Register($dest$$reg);
2936     address table_base  = masm.address_table_constant(_index2label);
2937 
2938     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2939     // to do that and the compiler is using that register as one it can allocate.
2940     // So we build it all by hand.
2941     // Address index(noreg, switch_reg, Address::times_1);
2942     // ArrayAddress dispatch(table, index);
2943 
2944     Address dispatch(dest_reg, switch_reg, Address::times_1);
2945 
2946     masm.lea(dest_reg, InternalAddress(table_base));
2947     masm.jmp(dispatch);
2948   %}
2949 
2950   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2951     MacroAssembler masm(&cbuf);
2952 
2953     Register switch_reg = as_Register($switch_val$$reg);
2954     Register dest_reg   = as_Register($dest$$reg);
2955     address table_base  = masm.address_table_constant(_index2label);
2956 
2957     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2958     // to do that and the compiler is using that register as one it can allocate.
2959     // So we build it all by hand.
2960     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2961     // ArrayAddress dispatch(table, index);
2962 
2963     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2964 
2965     masm.lea(dest_reg, InternalAddress(table_base));
2966     masm.jmp(dispatch);
2967   %}
2968 
2969   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2970     MacroAssembler masm(&cbuf);
2971 
2972     Register switch_reg = as_Register($switch_val$$reg);
2973     Register dest_reg   = as_Register($dest$$reg);
2974     address table_base  = masm.address_table_constant(_index2label);
2975 
2976     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2977     // to do that and the compiler is using that register as one it can allocate.
2978     // So we build it all by hand.
2979     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2980     // ArrayAddress dispatch(table, index);
2981 
2982     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2983     masm.lea(dest_reg, InternalAddress(table_base));
2984     masm.jmp(dispatch);
2985 
2986   %}
2987 
2988   enc_class lock_prefix()
2989   %{
2990     if (os::is_MP()) {
2991       emit_opcode(cbuf, 0xF0); // lock
2992     }
2993   %}
2994 
2995   enc_class REX_mem(memory mem)
2996   %{
2997     if ($mem$$base >= 8) {
2998       if ($mem$$index < 8) {
2999         emit_opcode(cbuf, Assembler::REX_B);
3000       } else {
3001         emit_opcode(cbuf, Assembler::REX_XB);
3002       }
3003     } else {
3004       if ($mem$$index >= 8) {
3005         emit_opcode(cbuf, Assembler::REX_X);
3006       }
3007     }
3008   %}
3009 
3010   enc_class REX_mem_wide(memory mem)
3011   %{
3012     if ($mem$$base >= 8) {
3013       if ($mem$$index < 8) {
3014         emit_opcode(cbuf, Assembler::REX_WB);
3015       } else {
3016         emit_opcode(cbuf, Assembler::REX_WXB);
3017       }
3018     } else {
3019       if ($mem$$index < 8) {
3020         emit_opcode(cbuf, Assembler::REX_W);
3021       } else {
3022         emit_opcode(cbuf, Assembler::REX_WX);
3023       }
3024     }
3025   %}
3026 
3027   // for byte regs
3028   enc_class REX_breg(rRegI reg)
3029   %{
3030     if ($reg$$reg >= 4) {
3031       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3032     }
3033   %}
3034 
3035   // for byte regs
3036   enc_class REX_reg_breg(rRegI dst, rRegI src)
3037   %{
3038     if ($dst$$reg < 8) {
3039       if ($src$$reg >= 4) {
3040         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3041       }
3042     } else {
3043       if ($src$$reg < 8) {
3044         emit_opcode(cbuf, Assembler::REX_R);
3045       } else {
3046         emit_opcode(cbuf, Assembler::REX_RB);
3047       }
3048     }
3049   %}
3050 
3051   // for byte regs
3052   enc_class REX_breg_mem(rRegI reg, memory mem)
3053   %{
3054     if ($reg$$reg < 8) {
3055       if ($mem$$base < 8) {
3056         if ($mem$$index >= 8) {
3057           emit_opcode(cbuf, Assembler::REX_X);
3058         } else if ($reg$$reg >= 4) {
3059           emit_opcode(cbuf, Assembler::REX);
3060         }
3061       } else {
3062         if ($mem$$index < 8) {
3063           emit_opcode(cbuf, Assembler::REX_B);
3064         } else {
3065           emit_opcode(cbuf, Assembler::REX_XB);
3066         }
3067       }
3068     } else {
3069       if ($mem$$base < 8) {
3070         if ($mem$$index < 8) {
3071           emit_opcode(cbuf, Assembler::REX_R);
3072         } else {
3073           emit_opcode(cbuf, Assembler::REX_RX);
3074         }
3075       } else {
3076         if ($mem$$index < 8) {
3077           emit_opcode(cbuf, Assembler::REX_RB);
3078         } else {
3079           emit_opcode(cbuf, Assembler::REX_RXB);
3080         }
3081       }
3082     }
3083   %}
3084 
3085   enc_class REX_reg(rRegI reg)
3086   %{
3087     if ($reg$$reg >= 8) {
3088       emit_opcode(cbuf, Assembler::REX_B);
3089     }
3090   %}
3091 
3092   enc_class REX_reg_wide(rRegI reg)
3093   %{
3094     if ($reg$$reg < 8) {
3095       emit_opcode(cbuf, Assembler::REX_W);
3096     } else {
3097       emit_opcode(cbuf, Assembler::REX_WB);
3098     }
3099   %}
3100 
3101   enc_class REX_reg_reg(rRegI dst, rRegI src)
3102   %{
3103     if ($dst$$reg < 8) {
3104       if ($src$$reg >= 8) {
3105         emit_opcode(cbuf, Assembler::REX_B);
3106       }
3107     } else {
3108       if ($src$$reg < 8) {
3109         emit_opcode(cbuf, Assembler::REX_R);
3110       } else {
3111         emit_opcode(cbuf, Assembler::REX_RB);
3112       }
3113     }
3114   %}
3115 
3116   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3117   %{
3118     if ($dst$$reg < 8) {
3119       if ($src$$reg < 8) {
3120         emit_opcode(cbuf, Assembler::REX_W);
3121       } else {
3122         emit_opcode(cbuf, Assembler::REX_WB);
3123       }
3124     } else {
3125       if ($src$$reg < 8) {
3126         emit_opcode(cbuf, Assembler::REX_WR);
3127       } else {
3128         emit_opcode(cbuf, Assembler::REX_WRB);
3129       }
3130     }
3131   %}
3132 
3133   enc_class REX_reg_mem(rRegI reg, memory mem)
3134   %{
3135     if ($reg$$reg < 8) {
3136       if ($mem$$base < 8) {
3137         if ($mem$$index >= 8) {
3138           emit_opcode(cbuf, Assembler::REX_X);
3139         }
3140       } else {
3141         if ($mem$$index < 8) {
3142           emit_opcode(cbuf, Assembler::REX_B);
3143         } else {
3144           emit_opcode(cbuf, Assembler::REX_XB);
3145         }
3146       }
3147     } else {
3148       if ($mem$$base < 8) {
3149         if ($mem$$index < 8) {
3150           emit_opcode(cbuf, Assembler::REX_R);
3151         } else {
3152           emit_opcode(cbuf, Assembler::REX_RX);
3153         }
3154       } else {
3155         if ($mem$$index < 8) {
3156           emit_opcode(cbuf, Assembler::REX_RB);
3157         } else {
3158           emit_opcode(cbuf, Assembler::REX_RXB);
3159         }
3160       }
3161     }
3162   %}
3163 
3164   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3165   %{
3166     if ($reg$$reg < 8) {
3167       if ($mem$$base < 8) {
3168         if ($mem$$index < 8) {
3169           emit_opcode(cbuf, Assembler::REX_W);
3170         } else {
3171           emit_opcode(cbuf, Assembler::REX_WX);
3172         }
3173       } else {
3174         if ($mem$$index < 8) {
3175           emit_opcode(cbuf, Assembler::REX_WB);
3176         } else {
3177           emit_opcode(cbuf, Assembler::REX_WXB);
3178         }
3179       }
3180     } else {
3181       if ($mem$$base < 8) {
3182         if ($mem$$index < 8) {
3183           emit_opcode(cbuf, Assembler::REX_WR);
3184         } else {
3185           emit_opcode(cbuf, Assembler::REX_WRX);
3186         }
3187       } else {
3188         if ($mem$$index < 8) {
3189           emit_opcode(cbuf, Assembler::REX_WRB);
3190         } else {
3191           emit_opcode(cbuf, Assembler::REX_WRXB);
3192         }
3193       }
3194     }
3195   %}
3196 
3197   enc_class reg_mem(rRegI ereg, memory mem)
3198   %{
3199     // High registers handle in encode_RegMem
3200     int reg = $ereg$$reg;
3201     int base = $mem$$base;
3202     int index = $mem$$index;
3203     int scale = $mem$$scale;
3204     int disp = $mem$$disp;
3205     bool disp_is_oop = $mem->disp_is_oop();
3206 
3207     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3208   %}
3209 
3210   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3211   %{
3212     int rm_byte_opcode = $rm_opcode$$constant;
3213 
3214     // High registers handle in encode_RegMem
3215     int base = $mem$$base;
3216     int index = $mem$$index;
3217     int scale = $mem$$scale;
3218     int displace = $mem$$disp;
3219 
3220     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3221                                             // working with static
3222                                             // globals
3223     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3224                   disp_is_oop);
3225   %}
3226 
3227   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3228   %{
3229     int reg_encoding = $dst$$reg;
3230     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3231     int index        = 0x04;            // 0x04 indicates no index
3232     int scale        = 0x00;            // 0x00 indicates no scale
3233     int displace     = $src1$$constant; // 0x00 indicates no displacement
3234     bool disp_is_oop = false;
3235     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3236                   disp_is_oop);
3237   %}
3238 
3239   enc_class neg_reg(rRegI dst)
3240   %{
3241     int dstenc = $dst$$reg;
3242     if (dstenc >= 8) {
3243       emit_opcode(cbuf, Assembler::REX_B);
3244       dstenc -= 8;
3245     }
3246     // NEG $dst
3247     emit_opcode(cbuf, 0xF7);
3248     emit_rm(cbuf, 0x3, 0x03, dstenc);
3249   %}
3250 
3251   enc_class neg_reg_wide(rRegI dst)
3252   %{
3253     int dstenc = $dst$$reg;
3254     if (dstenc < 8) {
3255       emit_opcode(cbuf, Assembler::REX_W);
3256     } else {
3257       emit_opcode(cbuf, Assembler::REX_WB);
3258       dstenc -= 8;
3259     }
3260     // NEG $dst
3261     emit_opcode(cbuf, 0xF7);
3262     emit_rm(cbuf, 0x3, 0x03, dstenc);
3263   %}
3264 
3265   enc_class setLT_reg(rRegI dst)
3266   %{
3267     int dstenc = $dst$$reg;
3268     if (dstenc >= 8) {
3269       emit_opcode(cbuf, Assembler::REX_B);
3270       dstenc -= 8;
3271     } else if (dstenc >= 4) {
3272       emit_opcode(cbuf, Assembler::REX);
3273     }
3274     // SETLT $dst
3275     emit_opcode(cbuf, 0x0F);
3276     emit_opcode(cbuf, 0x9C);
3277     emit_rm(cbuf, 0x3, 0x0, dstenc);
3278   %}
3279 
3280   enc_class setNZ_reg(rRegI dst)
3281   %{
3282     int dstenc = $dst$$reg;
3283     if (dstenc >= 8) {
3284       emit_opcode(cbuf, Assembler::REX_B);
3285       dstenc -= 8;
3286     } else if (dstenc >= 4) {
3287       emit_opcode(cbuf, Assembler::REX);
3288     }
3289     // SETNZ $dst
3290     emit_opcode(cbuf, 0x0F);
3291     emit_opcode(cbuf, 0x95);
3292     emit_rm(cbuf, 0x3, 0x0, dstenc);
3293   %}
3294 
3295   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3296                        rcx_RegI tmp)
3297   %{
3298     // cadd_cmpLT
3299 
3300     int tmpReg = $tmp$$reg;
3301 
3302     int penc = $p$$reg;
3303     int qenc = $q$$reg;
3304     int yenc = $y$$reg;
3305 
3306     // subl $p,$q
3307     if (penc < 8) {
3308       if (qenc >= 8) {
3309         emit_opcode(cbuf, Assembler::REX_B);
3310       }
3311     } else {
3312       if (qenc < 8) {
3313         emit_opcode(cbuf, Assembler::REX_R);
3314       } else {
3315         emit_opcode(cbuf, Assembler::REX_RB);
3316       }
3317     }
3318     emit_opcode(cbuf, 0x2B);
3319     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3320 
3321     // sbbl $tmp, $tmp
3322     emit_opcode(cbuf, 0x1B);
3323     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3324 
3325     // andl $tmp, $y
3326     if (yenc >= 8) {
3327       emit_opcode(cbuf, Assembler::REX_B);
3328     }
3329     emit_opcode(cbuf, 0x23);
3330     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3331 
3332     // addl $p,$tmp
3333     if (penc >= 8) {
3334         emit_opcode(cbuf, Assembler::REX_R);
3335     }
3336     emit_opcode(cbuf, 0x03);
3337     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3338   %}
3339 
3340   // Compare the lonogs and set -1, 0, or 1 into dst
3341   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3342   %{
3343     int src1enc = $src1$$reg;
3344     int src2enc = $src2$$reg;
3345     int dstenc = $dst$$reg;
3346 
3347     // cmpq $src1, $src2
3348     if (src1enc < 8) {
3349       if (src2enc < 8) {
3350         emit_opcode(cbuf, Assembler::REX_W);
3351       } else {
3352         emit_opcode(cbuf, Assembler::REX_WB);
3353       }
3354     } else {
3355       if (src2enc < 8) {
3356         emit_opcode(cbuf, Assembler::REX_WR);
3357       } else {
3358         emit_opcode(cbuf, Assembler::REX_WRB);
3359       }
3360     }
3361     emit_opcode(cbuf, 0x3B);
3362     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3363 
3364     // movl $dst, -1
3365     if (dstenc >= 8) {
3366       emit_opcode(cbuf, Assembler::REX_B);
3367     }
3368     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3369     emit_d32(cbuf, -1);
3370 
3371     // jl,s done
3372     emit_opcode(cbuf, 0x7C);
3373     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3374 
3375     // setne $dst
3376     if (dstenc >= 4) {
3377       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3378     }
3379     emit_opcode(cbuf, 0x0F);
3380     emit_opcode(cbuf, 0x95);
3381     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3382 
3383     // movzbl $dst, $dst
3384     if (dstenc >= 4) {
3385       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3386     }
3387     emit_opcode(cbuf, 0x0F);
3388     emit_opcode(cbuf, 0xB6);
3389     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3390   %}
3391 
3392   enc_class Push_ResultXD(regD dst) %{
3393     int dstenc = $dst$$reg;
3394 
3395     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3396 
3397     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3398     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3399     if (dstenc >= 8) {
3400       emit_opcode(cbuf, Assembler::REX_R);
3401     }
3402     emit_opcode  (cbuf, 0x0F );
3403     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3404     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3405 
3406     // add rsp,8
3407     emit_opcode(cbuf, Assembler::REX_W);
3408     emit_opcode(cbuf,0x83);
3409     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3410     emit_d8(cbuf,0x08);
3411   %}
3412 
3413   enc_class Push_SrcXD(regD src) %{
3414     int srcenc = $src$$reg;
3415 
3416     // subq rsp,#8
3417     emit_opcode(cbuf, Assembler::REX_W);
3418     emit_opcode(cbuf, 0x83);
3419     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3420     emit_d8(cbuf, 0x8);
3421 
3422     // movsd [rsp],src
3423     emit_opcode(cbuf, 0xF2);
3424     if (srcenc >= 8) {
3425       emit_opcode(cbuf, Assembler::REX_R);
3426     }
3427     emit_opcode(cbuf, 0x0F);
3428     emit_opcode(cbuf, 0x11);
3429     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3430 
3431     // fldd [rsp]
3432     emit_opcode(cbuf, 0x66);
3433     emit_opcode(cbuf, 0xDD);
3434     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3435   %}
3436 
3437 
3438   enc_class movq_ld(regD dst, memory mem) %{
3439     MacroAssembler _masm(&cbuf);
3440     __ movq($dst$$XMMRegister, $mem$$Address);
3441   %}
3442 
3443   enc_class movq_st(memory mem, regD src) %{
3444     MacroAssembler _masm(&cbuf);
3445     __ movq($mem$$Address, $src$$XMMRegister);
3446   %}
3447 
3448   enc_class pshufd_8x8(regF dst, regF src) %{
3449     MacroAssembler _masm(&cbuf);
3450 
3451     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3452     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3453     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3454   %}
3455 
3456   enc_class pshufd_4x16(regF dst, regF src) %{
3457     MacroAssembler _masm(&cbuf);
3458 
3459     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3460   %}
3461 
3462   enc_class pshufd(regD dst, regD src, int mode) %{
3463     MacroAssembler _masm(&cbuf);
3464 
3465     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3466   %}
3467 
3468   enc_class pxor(regD dst, regD src) %{
3469     MacroAssembler _masm(&cbuf);
3470 
3471     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3472   %}
3473 
3474   enc_class mov_i2x(regD dst, rRegI src) %{
3475     MacroAssembler _masm(&cbuf);
3476 
3477     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3478   %}
3479 
3480   // obj: object to lock
3481   // box: box address (header location) -- killed
3482   // tmp: rax -- killed
3483   // scr: rbx -- killed
3484   //
3485   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3486   // from i486.ad.  See that file for comments.
3487   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3488   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3489 
3490 
3491   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3492   %{
3493     Register objReg = as_Register((int)$obj$$reg);
3494     Register boxReg = as_Register((int)$box$$reg);
3495     Register tmpReg = as_Register($tmp$$reg);
3496     Register scrReg = as_Register($scr$$reg);
3497     MacroAssembler masm(&cbuf);
3498 
3499     // Verify uniqueness of register assignments -- necessary but not sufficient
3500     assert (objReg != boxReg && objReg != tmpReg &&
3501             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3502 
3503     if (_counters != NULL) {
3504       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3505     }
3506     if (EmitSync & 1) {
3507         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3508         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3509         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3510     } else
3511     if (EmitSync & 2) {
3512         Label DONE_LABEL;
3513         if (UseBiasedLocking) {
3514            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3515           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3516         }
3517         // QQQ was movl...
3518         masm.movptr(tmpReg, 0x1);
3519         masm.orptr(tmpReg, Address(objReg, 0));
3520         masm.movptr(Address(boxReg, 0), tmpReg);
3521         if (os::is_MP()) {
3522           masm.lock();
3523         }
3524         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3525         masm.jcc(Assembler::equal, DONE_LABEL);
3526 
3527         // Recursive locking
3528         masm.subptr(tmpReg, rsp);
3529         masm.andptr(tmpReg, 7 - os::vm_page_size());
3530         masm.movptr(Address(boxReg, 0), tmpReg);
3531 
3532         masm.bind(DONE_LABEL);
3533         masm.nop(); // avoid branch to branch
3534     } else {
3535         Label DONE_LABEL, IsInflated, Egress;
3536 
3537         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3538         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3539         masm.jcc   (Assembler::notZero, IsInflated) ; 
3540          
3541         // it's stack-locked, biased or neutral
3542         // TODO: optimize markword triage order to reduce the number of
3543         // conditional branches in the most common cases.
3544         // Beware -- there's a subtle invariant that fetch of the markword
3545         // at [FETCH], below, will never observe a biased encoding (*101b).
3546         // If this invariant is not held we'll suffer exclusion (safety) failure.
3547 
3548         if (UseBiasedLocking && !UseOptoBiasInlining) {
3549           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3550           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3551         }
3552 
3553         // was q will it destroy high?
3554         masm.orl   (tmpReg, 1) ; 
3555         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3556         if (os::is_MP()) { masm.lock(); } 
3557         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3558         if (_counters != NULL) {
3559            masm.cond_inc32(Assembler::equal,
3560                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3561         }
3562         masm.jcc   (Assembler::equal, DONE_LABEL);
3563 
3564         // Recursive locking
3565         masm.subptr(tmpReg, rsp);
3566         masm.andptr(tmpReg, 7 - os::vm_page_size());
3567         masm.movptr(Address(boxReg, 0), tmpReg);
3568         if (_counters != NULL) {
3569            masm.cond_inc32(Assembler::equal,
3570                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3571         }
3572         masm.jmp   (DONE_LABEL) ;
3573 
3574         masm.bind  (IsInflated) ;
3575         // It's inflated
3576 
3577         // TODO: someday avoid the ST-before-CAS penalty by
3578         // relocating (deferring) the following ST.
3579         // We should also think about trying a CAS without having
3580         // fetched _owner.  If the CAS is successful we may
3581         // avoid an RTO->RTS upgrade on the $line.
3582         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3583         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3584 
3585         masm.mov    (boxReg, tmpReg) ; 
3586         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3587         masm.testptr(tmpReg, tmpReg) ;   
3588         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3589 
3590         // It's inflated and appears unlocked
3591         if (os::is_MP()) { masm.lock(); } 
3592         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3593         // Intentional fall-through into DONE_LABEL ...
3594 
3595         masm.bind  (DONE_LABEL) ;
3596         masm.nop   () ;                 // avoid jmp to jmp
3597     }
3598   %}
3599 
3600   // obj: object to unlock
3601   // box: box address (displaced header location), killed
3602   // RBX: killed tmp; cannot be obj nor box
3603   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3604   %{
3605 
3606     Register objReg = as_Register($obj$$reg);
3607     Register boxReg = as_Register($box$$reg);
3608     Register tmpReg = as_Register($tmp$$reg);
3609     MacroAssembler masm(&cbuf);
3610 
3611     if (EmitSync & 4) { 
3612        masm.cmpptr(rsp, 0) ; 
3613     } else
3614     if (EmitSync & 8) {
3615        Label DONE_LABEL;
3616        if (UseBiasedLocking) {
3617          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3618        }
3619 
3620        // Check whether the displaced header is 0
3621        //(=> recursive unlock)
3622        masm.movptr(tmpReg, Address(boxReg, 0));
3623        masm.testptr(tmpReg, tmpReg);
3624        masm.jcc(Assembler::zero, DONE_LABEL);
3625 
3626        // If not recursive lock, reset the header to displaced header
3627        if (os::is_MP()) {
3628          masm.lock();
3629        }
3630        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3631        masm.bind(DONE_LABEL);
3632        masm.nop(); // avoid branch to branch
3633     } else {
3634        Label DONE_LABEL, Stacked, CheckSucc ;
3635 
3636        if (UseBiasedLocking && !UseOptoBiasInlining) {
3637          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3638        }
3639         
3640        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3641        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3642        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3643        masm.testl (tmpReg, 0x02) ; 
3644        masm.jcc   (Assembler::zero, Stacked) ; 
3645         
3646        // It's inflated
3647        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3648        masm.xorptr(boxReg, r15_thread) ; 
3649        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3650        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3651        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3652        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3653        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3654        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3655        masm.jmp   (DONE_LABEL) ; 
3656         
3657        if ((EmitSync & 65536) == 0) { 
3658          Label LSuccess, LGoSlowPath ;
3659          masm.bind  (CheckSucc) ;
3660          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3661          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3662 
3663          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3664          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3665          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3666          // are all faster when the write buffer is populated.
3667          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3668          if (os::is_MP()) {
3669             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3670          }
3671          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3672          masm.jcc   (Assembler::notZero, LSuccess) ;
3673 
3674          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3675          if (os::is_MP()) { masm.lock(); }
3676          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3677          masm.jcc   (Assembler::notEqual, LSuccess) ;
3678          // Intentional fall-through into slow-path
3679 
3680          masm.bind  (LGoSlowPath) ;
3681          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3682          masm.jmp   (DONE_LABEL) ;
3683 
3684          masm.bind  (LSuccess) ;
3685          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3686          masm.jmp   (DONE_LABEL) ;
3687        }
3688 
3689        masm.bind  (Stacked) ; 
3690        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3691        if (os::is_MP()) { masm.lock(); } 
3692        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3693 
3694        if (EmitSync & 65536) {
3695           masm.bind (CheckSucc) ;
3696        }
3697        masm.bind(DONE_LABEL);
3698        if (EmitSync & 32768) {
3699           masm.nop();                      // avoid branch to branch
3700        }
3701     }
3702   %}
3703 
3704   enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
3705                         rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
3706     Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3707           POP_LABEL, DONE_LABEL, CONT_LABEL,
3708           WHILE_HEAD_LABEL;
3709     MacroAssembler masm(&cbuf);
3710 
3711     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3712     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
3713 
3714     // Get the first character position in both strings
3715     //         [8] char array, [12] offset, [16] count
3716     int value_offset  = java_lang_String::value_offset_in_bytes();
3717     int offset_offset = java_lang_String::offset_offset_in_bytes();
3718     int count_offset  = java_lang_String::count_offset_in_bytes();
3719     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3720 
3721     masm.load_heap_oop(rax, Address(rsi, value_offset));
3722     masm.movl(rcx, Address(rsi, offset_offset));
3723     masm.lea(rax, Address(rax, rcx, Address::times_2, base_offset));
3724     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3725     masm.movl(rcx, Address(rdi, offset_offset));
3726     masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3727 
3728     // Compute the minimum of the string lengths(rsi) and the
3729     // difference of the string lengths (stack)
3730 
3731     // do the conditional move stuff
3732     masm.movl(rdi, Address(rdi, count_offset));
3733     masm.movl(rsi, Address(rsi, count_offset));
3734     masm.movl(rcx, rdi);
3735     masm.subl(rdi, rsi);
3736     masm.push(rdi);
3737     masm.cmov(Assembler::lessEqual, rsi, rcx);
3738 
3739     // Is the minimum length zero?
3740     masm.bind(RCX_GOOD_LABEL);
3741     masm.testl(rsi, rsi);
3742     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3743 
3744     // Load first characters
3745     masm.load_unsigned_short(rcx, Address(rbx, 0));
3746     masm.load_unsigned_short(rdi, Address(rax, 0));
3747 
3748     // Compare first characters
3749     masm.subl(rcx, rdi);
3750     masm.jcc(Assembler::notZero,  POP_LABEL);
3751     masm.decrementl(rsi);
3752     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3753 
3754     {
3755       // Check after comparing first character to see if strings are equivalent
3756       Label LSkip2;
3757       // Check if the strings start at same location
3758       masm.cmpptr(rbx, rax);
3759       masm.jccb(Assembler::notEqual, LSkip2);
3760 
3761       // Check if the length difference is zero (from stack)
3762       masm.cmpl(Address(rsp, 0), 0x0);
3763       masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3764 
3765       // Strings might not be equivalent
3766       masm.bind(LSkip2);
3767     }
3768 
3769     // Advance to next character
3770     masm.addptr(rax, 2);
3771     masm.addptr(rbx, 2);
3772 
3773     if (UseSSE42Intrinsics) {
3774       // With SSE4.2, use double quad vector compare
3775       Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3776       // Setup to compare 16-byte vectors
3777       masm.movl(rdi, rsi);
3778       masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
3779       masm.andl(rdi, 0x00000007); // rdi holds the tail count
3780       masm.testl(rsi, rsi);
3781       masm.jccb(Assembler::zero, COMPARE_TAIL);
3782 
3783       masm.lea(rax, Address(rax, rsi, Address::times_2));
3784       masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3785       masm.negptr(rsi);
3786 
3787       masm.bind(COMPARE_VECTORS);
3788       masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
3789       masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
3790       masm.pxor(tmp1Reg, tmp2Reg);
3791       masm.ptest(tmp1Reg, tmp1Reg);
3792       masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
3793       masm.addptr(rsi, 8);
3794       masm.jcc(Assembler::notZero, COMPARE_VECTORS);
3795       masm.jmpb(COMPARE_TAIL);
3796 
3797       // Mismatched characters in the vectors
3798       masm.bind(VECTOR_NOT_EQUAL);
3799       masm.lea(rax, Address(rax, rsi, Address::times_2));
3800       masm.lea(rbx, Address(rbx, rsi, Address::times_2));
3801       masm.movl(rdi, 8);
3802 
3803       // Compare tail (< 8 chars), or rescan last vectors to
3804       // find 1st mismatched characters
3805       masm.bind(COMPARE_TAIL);
3806       masm.testl(rdi, rdi);
3807       masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
3808       masm.movl(rsi, rdi);
3809       // Fallthru to tail compare
3810     }
3811 
3812     // Shift RAX and RBX to the end of the arrays, negate min
3813     masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
3814     masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
3815     masm.negptr(rsi);
3816 
3817     // Compare the rest of the characters
3818     masm.bind(WHILE_HEAD_LABEL);
3819     masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
3820     masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
3821     masm.subl(rcx, rdi);
3822     masm.jccb(Assembler::notZero, POP_LABEL);
3823     masm.increment(rsi);
3824     masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3825 
3826     // Strings are equal up to min length.  Return the length difference.
3827     masm.bind(LENGTH_DIFF_LABEL);
3828     masm.pop(rcx);
3829     masm.jmpb(DONE_LABEL);
3830 
3831     // Discard the stored length difference
3832     masm.bind(POP_LABEL);
3833     masm.addptr(rsp, 8);
3834 
3835     // That's it
3836     masm.bind(DONE_LABEL);
3837   %}
3838 
3839  enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
3840                         rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{
3841     // SSE4.2 version
3842     Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
3843           SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
3844     MacroAssembler masm(&cbuf);
3845 
3846     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3847 
3848     // Get the first character position in both strings
3849     //         [8] char array, [12] offset, [16] count
3850     int value_offset  = java_lang_String::value_offset_in_bytes();
3851     int offset_offset = java_lang_String::offset_offset_in_bytes();
3852     int count_offset  = java_lang_String::count_offset_in_bytes();
3853     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3854 
3855     // Get counts for string and substr
3856     masm.movl(rdx, Address(rsi, count_offset));
3857     masm.movl(rax, Address(rdi, count_offset));
3858     // Check for substr count > string count
3859     masm.cmpl(rax, rdx);
3860     masm.jcc(Assembler::greater, RET_NEG_ONE);
3861 
3862     // Start the indexOf operation
3863     // Get start addr of string
3864     masm.load_heap_oop(rbx, Address(rsi, value_offset));
3865     masm.movl(rcx, Address(rsi, offset_offset));
3866     masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
3867     masm.push(rsi);
3868 
3869     // Get start addr of substr
3870     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3871     masm.movl(rcx, Address(rdi, offset_offset));
3872     masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
3873     masm.push(rdi);
3874     masm.push(rax);
3875     masm.jmpb(PREP_FOR_SCAN);
3876 
3877     // Substr count saved at sp
3878     // Substr saved at sp+8
3879     // String saved at sp+16
3880 
3881     // Prep to load substr for scan
3882     masm.bind(LOAD_SUBSTR);
3883     masm.movptr(rdi, Address(rsp, 8));
3884     masm.movl(rax, Address(rsp, 0));
3885 
3886     // Load substr
3887     masm.bind(PREP_FOR_SCAN);
3888     masm.movdqu(tmp1Reg, Address(rdi, 0));
3889     masm.addq(rdx, 8);    // prime the loop
3890     masm.subptr(rsi, 16);
3891 
3892     // Scan string for substr in 16-byte vectors
3893     masm.bind(SCAN_TO_SUBSTR);
3894     masm.subq(rdx, 8);
3895     masm.addptr(rsi, 16);
3896     masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3897     masm.jcc(Assembler::above, SCAN_TO_SUBSTR);
3898     masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND);
3899 
3900     // Fallthru: found a potential substr
3901 
3902     //Make sure string is still long enough
3903     masm.subl(rdx, rcx);
3904     masm.cmpl(rdx, rax);
3905     masm.jccb(Assembler::negative, RET_NOT_FOUND);
3906     // Compute start addr of substr
3907     masm.lea(rsi, Address(rsi, rcx, Address::times_2));
3908     masm.movptr(rbx, rsi);
3909 
3910     // Compare potential substr
3911     masm.addq(rdx, 8);        // prime the loop
3912     masm.addq(rax, 8);
3913     masm.subptr(rsi, 16);
3914     masm.subptr(rdi, 16);
3915 
3916     // Scan 16-byte vectors of string and substr
3917     masm.bind(SCAN_SUBSTR);
3918     masm.subq(rax, 8);
3919     masm.subq(rdx, 8);
3920     masm.addptr(rsi, 16);
3921     masm.addptr(rdi, 16);
3922     masm.movdqu(tmp1Reg, Address(rdi, 0));
3923     masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
3924     masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
3925     masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
3926 
3927     // Compute substr offset
3928     masm.movptr(rsi, Address(rsp, 16));
3929     masm.subptr(rbx, rsi);
3930     masm.shrl(rbx, 1);
3931     masm.jmpb(CLEANUP);
3932 
3933     masm.bind(RET_NEG_ONE);
3934     masm.movl(rbx, -1);
3935     masm.jmpb(DONE);
3936 
3937     masm.bind(RET_NOT_FOUND);
3938     masm.movl(rbx, -1);
3939 
3940     masm.bind(CLEANUP);
3941     masm.addptr(rsp, 24);
3942 
3943     masm.bind(DONE);
3944   %}
3945 
3946   enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
3947                               rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{
3948     Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
3949     MacroAssembler masm(&cbuf);
3950 
3951     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
3952     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
3953 
3954     int value_offset  = java_lang_String::value_offset_in_bytes();
3955     int offset_offset = java_lang_String::offset_offset_in_bytes();
3956     int count_offset  = java_lang_String::count_offset_in_bytes();
3957     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3958 
3959     // does source == target string?
3960     masm.cmpptr(rdi, rsi);
3961     masm.jcc(Assembler::equal, RET_TRUE);
3962 
3963     // get and compare counts
3964     masm.movl(rcx, Address(rdi, count_offset));
3965     masm.movl(rax, Address(rsi, count_offset));
3966     masm.cmpl(rcx, rax);
3967     masm.jcc(Assembler::notEqual, RET_FALSE);
3968     masm.testl(rax, rax);
3969     masm.jcc(Assembler::zero, RET_TRUE);
3970 
3971     // get source string offset and value
3972     masm.load_heap_oop(rbx, Address(rsi, value_offset));
3973     masm.movl(rax, Address(rsi, offset_offset));
3974     masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset));
3975 
3976     // get compare string offset and value
3977     masm.load_heap_oop(rbx, Address(rdi, value_offset));
3978     masm.movl(rax, Address(rdi, offset_offset));
3979     masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset));
3980 
3981     // Set byte count
3982     masm.shll(rcx, 1);
3983     masm.movl(rax, rcx);
3984 
3985     if (UseSSE42Intrinsics) {
3986       // With SSE4.2, use double quad vector compare
3987       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3988       // Compare 16-byte vectors
3989       masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
3990       masm.andl(rax, 0x0000000e);  // tail count (in bytes)
3991       masm.testl(rcx, rcx);
3992       masm.jccb(Assembler::zero, COMPARE_TAIL);
3993       masm.lea(rdi, Address(rdi, rcx, Address::times_1));
3994       masm.lea(rsi, Address(rsi, rcx, Address::times_1));
3995       masm.negptr(rcx);
3996 
3997       masm.bind(COMPARE_WIDE_VECTORS);
3998       masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
3999       masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
4000       masm.pxor(tmp1Reg, tmp2Reg);
4001       masm.ptest(tmp1Reg, tmp1Reg);
4002       masm.jccb(Assembler::notZero, RET_FALSE);
4003       masm.addptr(rcx, 16);
4004       masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4005       masm.bind(COMPARE_TAIL);
4006       masm.movl(rcx, rax);
4007       // Fallthru to tail compare
4008     }
4009 
4010     // Compare 4-byte vectors
4011     masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
4012     masm.andl(rax, 0x00000002);  // tail char (in bytes)
4013     masm.testl(rcx, rcx);
4014     masm.jccb(Assembler::zero, COMPARE_CHAR);
4015     masm.lea(rdi, Address(rdi, rcx, Address::times_1));
4016     masm.lea(rsi, Address(rsi, rcx, Address::times_1));
4017     masm.negptr(rcx);
4018 
4019     masm.bind(COMPARE_VECTORS);
4020     masm.movl(rbx, Address(rdi, rcx, Address::times_1));
4021     masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
4022     masm.jccb(Assembler::notEqual, RET_FALSE);
4023     masm.addptr(rcx, 4);
4024     masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4025 
4026     // Compare trailing char (final 2 bytes), if any
4027     masm.bind(COMPARE_CHAR);
4028     masm.testl(rax, rax);
4029     masm.jccb(Assembler::zero, RET_TRUE);
4030     masm.load_unsigned_short(rbx, Address(rdi, 0));
4031     masm.load_unsigned_short(rcx, Address(rsi, 0));
4032     masm.cmpl(rbx, rcx);
4033     masm.jccb(Assembler::notEqual, RET_FALSE);
4034 
4035     masm.bind(RET_TRUE);
4036     masm.movl(rax, 1);   // return true
4037     masm.jmpb(DONE);
4038 
4039     masm.bind(RET_FALSE);
4040     masm.xorl(rax, rax); // return false
4041 
4042     masm.bind(DONE);
4043   %}
4044 
4045   enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2,
4046                              rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
4047     Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
4048     MacroAssembler masm(&cbuf);
4049 
4050     XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
4051     XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
4052     Register ary1Reg      = as_Register($ary1$$reg);
4053     Register ary2Reg      = as_Register($ary2$$reg);
4054     Register tmp3Reg      = as_Register($tmp3$$reg);
4055     Register tmp4Reg      = as_Register($tmp4$$reg);
4056     Register resultReg    = as_Register($result$$reg);
4057 
4058     int length_offset  = arrayOopDesc::length_offset_in_bytes();
4059     int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
4060 
4061     // Check the input args
4062     masm.cmpq(ary1Reg, ary2Reg);
4063     masm.jcc(Assembler::equal, TRUE_LABEL);
4064     masm.testq(ary1Reg, ary1Reg);
4065     masm.jcc(Assembler::zero, FALSE_LABEL);
4066     masm.testq(ary2Reg, ary2Reg);
4067     masm.jcc(Assembler::zero, FALSE_LABEL);
4068 
4069     // Check the lengths
4070     masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
4071     masm.movl(resultReg, Address(ary2Reg, length_offset));
4072     masm.cmpl(tmp4Reg, resultReg);
4073     masm.jcc(Assembler::notEqual, FALSE_LABEL);
4074     masm.testl(resultReg, resultReg);
4075     masm.jcc(Assembler::zero, TRUE_LABEL);
4076 
4077     //load array address
4078     masm.lea(ary1Reg, Address(ary1Reg, base_offset));
4079     masm.lea(ary2Reg, Address(ary2Reg, base_offset));
4080 
4081     //set byte count
4082     masm.shll(tmp4Reg, 1);
4083     masm.movl(resultReg,tmp4Reg);
4084 
4085     if (UseSSE42Intrinsics){
4086       // With SSE4.2, use double quad vector compare
4087       Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
4088       // Compare 16-byte vectors
4089       masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
4090       masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
4091       masm.testl(tmp4Reg, tmp4Reg);
4092       masm.jccb(Assembler::zero, COMPARE_TAIL);
4093       masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4094       masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4095       masm.negptr(tmp4Reg);
4096 
4097       masm.bind(COMPARE_WIDE_VECTORS);
4098       masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4099       masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4100       masm.pxor(tmp1Reg, tmp2Reg);
4101       masm.ptest(tmp1Reg, tmp1Reg);
4102 
4103       masm.jccb(Assembler::notZero, FALSE_LABEL);
4104       masm.addptr(tmp4Reg, 16);
4105       masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
4106       masm.bind(COMPARE_TAIL);
4107       masm.movl(tmp4Reg, resultReg);
4108       // Fallthru to tail compare
4109     }
4110 
4111    // Compare 4-byte vectors
4112     masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
4113     masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
4114     masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char
4115     masm.jccb(Assembler::zero, COMPARE_CHAR);
4116     masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4117     masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4118     masm.negptr(tmp4Reg);
4119 
4120     masm.bind(COMPARE_VECTORS);
4121     masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
4122     masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
4123     masm.jccb(Assembler::notEqual, FALSE_LABEL);
4124     masm.addptr(tmp4Reg, 4);
4125     masm.jcc(Assembler::notZero, COMPARE_VECTORS);
4126 
4127     // Compare trailing char (final 2 bytes), if any
4128     masm.bind(COMPARE_CHAR);
4129     masm.testl(resultReg, resultReg);
4130     masm.jccb(Assembler::zero, TRUE_LABEL);
4131     masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
4132     masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
4133     masm.cmpl(tmp3Reg, tmp4Reg);
4134     masm.jccb(Assembler::notEqual, FALSE_LABEL);
4135 
4136     masm.bind(TRUE_LABEL);
4137     masm.movl(resultReg, 1);   // return true
4138     masm.jmpb(DONE);
4139 
4140     masm.bind(FALSE_LABEL);
4141     masm.xorl(resultReg, resultReg); // return false
4142 
4143     // That's it
4144     masm.bind(DONE);
4145   %}
4146 
4147   enc_class enc_rethrow()
4148   %{
4149     cbuf.set_inst_mark();
4150     emit_opcode(cbuf, 0xE9); // jmp entry
4151     emit_d32_reloc(cbuf,
4152                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
4153                    runtime_call_Relocation::spec(),
4154                    RELOC_DISP32);
4155   %}
4156 
4157   enc_class absF_encoding(regF dst)
4158   %{
4159     int dstenc = $dst$$reg;
4160     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
4161 
4162     cbuf.set_inst_mark();
4163     if (dstenc >= 8) {
4164       emit_opcode(cbuf, Assembler::REX_R);
4165       dstenc -= 8;
4166     }
4167     // XXX reg_mem doesn't support RIP-relative addressing yet
4168     emit_opcode(cbuf, 0x0F);
4169     emit_opcode(cbuf, 0x54);
4170     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4171     emit_d32_reloc(cbuf, signmask_address);
4172   %}
4173 
4174   enc_class absD_encoding(regD dst)
4175   %{
4176     int dstenc = $dst$$reg;
4177     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
4178 
4179     cbuf.set_inst_mark();
4180     emit_opcode(cbuf, 0x66);
4181     if (dstenc >= 8) {
4182       emit_opcode(cbuf, Assembler::REX_R);
4183       dstenc -= 8;
4184     }
4185     // XXX reg_mem doesn't support RIP-relative addressing yet
4186     emit_opcode(cbuf, 0x0F);
4187     emit_opcode(cbuf, 0x54);
4188     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4189     emit_d32_reloc(cbuf, signmask_address);
4190   %}
4191 
4192   enc_class negF_encoding(regF dst)
4193   %{
4194     int dstenc = $dst$$reg;
4195     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
4196 
4197     cbuf.set_inst_mark();
4198     if (dstenc >= 8) {
4199       emit_opcode(cbuf, Assembler::REX_R);
4200       dstenc -= 8;
4201     }
4202     // XXX reg_mem doesn't support RIP-relative addressing yet
4203     emit_opcode(cbuf, 0x0F);
4204     emit_opcode(cbuf, 0x57);
4205     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4206     emit_d32_reloc(cbuf, signflip_address);
4207   %}
4208 
4209   enc_class negD_encoding(regD dst)
4210   %{
4211     int dstenc = $dst$$reg;
4212     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
4213 
4214     cbuf.set_inst_mark();
4215     emit_opcode(cbuf, 0x66);
4216     if (dstenc >= 8) {
4217       emit_opcode(cbuf, Assembler::REX_R);
4218       dstenc -= 8;
4219     }
4220     // XXX reg_mem doesn't support RIP-relative addressing yet
4221     emit_opcode(cbuf, 0x0F);
4222     emit_opcode(cbuf, 0x57);
4223     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
4224     emit_d32_reloc(cbuf, signflip_address);
4225   %}
4226 
4227   enc_class f2i_fixup(rRegI dst, regF src)
4228   %{
4229     int dstenc = $dst$$reg;
4230     int srcenc = $src$$reg;
4231 
4232     // cmpl $dst, #0x80000000
4233     if (dstenc >= 8) {
4234       emit_opcode(cbuf, Assembler::REX_B);
4235     }
4236     emit_opcode(cbuf, 0x81);
4237     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4238     emit_d32(cbuf, 0x80000000);
4239 
4240     // jne,s done
4241     emit_opcode(cbuf, 0x75);
4242     if (srcenc < 8 && dstenc < 8) {
4243       emit_d8(cbuf, 0xF);
4244     } else if (srcenc >= 8 && dstenc >= 8) {
4245       emit_d8(cbuf, 0x11);
4246     } else {
4247       emit_d8(cbuf, 0x10);
4248     }
4249 
4250     // subq rsp, #8
4251     emit_opcode(cbuf, Assembler::REX_W);
4252     emit_opcode(cbuf, 0x83);
4253     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4254     emit_d8(cbuf, 8);
4255 
4256     // movss [rsp], $src
4257     emit_opcode(cbuf, 0xF3);
4258     if (srcenc >= 8) {
4259       emit_opcode(cbuf, Assembler::REX_R);
4260     }
4261     emit_opcode(cbuf, 0x0F);
4262     emit_opcode(cbuf, 0x11);
4263     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4264 
4265     // call f2i_fixup
4266     cbuf.set_inst_mark();
4267     emit_opcode(cbuf, 0xE8);
4268     emit_d32_reloc(cbuf,
4269                    (int)
4270                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
4271                    runtime_call_Relocation::spec(),
4272                    RELOC_DISP32);
4273 
4274     // popq $dst
4275     if (dstenc >= 8) {
4276       emit_opcode(cbuf, Assembler::REX_B);
4277     }
4278     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4279 
4280     // done:
4281   %}
4282 
4283   enc_class f2l_fixup(rRegL dst, regF src)
4284   %{
4285     int dstenc = $dst$$reg;
4286     int srcenc = $src$$reg;
4287     address const_address = (address) StubRoutines::x86::double_sign_flip();
4288 
4289     // cmpq $dst, [0x8000000000000000]
4290     cbuf.set_inst_mark();
4291     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4292     emit_opcode(cbuf, 0x39);
4293     // XXX reg_mem doesn't support RIP-relative addressing yet
4294     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4295     emit_d32_reloc(cbuf, const_address);
4296 
4297 
4298     // jne,s done
4299     emit_opcode(cbuf, 0x75);
4300     if (srcenc < 8 && dstenc < 8) {
4301       emit_d8(cbuf, 0xF);
4302     } else if (srcenc >= 8 && dstenc >= 8) {
4303       emit_d8(cbuf, 0x11);
4304     } else {
4305       emit_d8(cbuf, 0x10);
4306     }
4307 
4308     // subq rsp, #8
4309     emit_opcode(cbuf, Assembler::REX_W);
4310     emit_opcode(cbuf, 0x83);
4311     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4312     emit_d8(cbuf, 8);
4313 
4314     // movss [rsp], $src
4315     emit_opcode(cbuf, 0xF3);
4316     if (srcenc >= 8) {
4317       emit_opcode(cbuf, Assembler::REX_R);
4318     }
4319     emit_opcode(cbuf, 0x0F);
4320     emit_opcode(cbuf, 0x11);
4321     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4322 
4323     // call f2l_fixup
4324     cbuf.set_inst_mark();
4325     emit_opcode(cbuf, 0xE8);
4326     emit_d32_reloc(cbuf,
4327                    (int)
4328                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
4329                    runtime_call_Relocation::spec(),
4330                    RELOC_DISP32);
4331 
4332     // popq $dst
4333     if (dstenc >= 8) {
4334       emit_opcode(cbuf, Assembler::REX_B);
4335     }
4336     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4337 
4338     // done:
4339   %}
4340 
4341   enc_class d2i_fixup(rRegI dst, regD src)
4342   %{
4343     int dstenc = $dst$$reg;
4344     int srcenc = $src$$reg;
4345 
4346     // cmpl $dst, #0x80000000
4347     if (dstenc >= 8) {
4348       emit_opcode(cbuf, Assembler::REX_B);
4349     }
4350     emit_opcode(cbuf, 0x81);
4351     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
4352     emit_d32(cbuf, 0x80000000);
4353 
4354     // jne,s done
4355     emit_opcode(cbuf, 0x75);
4356     if (srcenc < 8 && dstenc < 8) {
4357       emit_d8(cbuf, 0xF);
4358     } else if (srcenc >= 8 && dstenc >= 8) {
4359       emit_d8(cbuf, 0x11);
4360     } else {
4361       emit_d8(cbuf, 0x10);
4362     }
4363 
4364     // subq rsp, #8
4365     emit_opcode(cbuf, Assembler::REX_W);
4366     emit_opcode(cbuf, 0x83);
4367     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4368     emit_d8(cbuf, 8);
4369 
4370     // movsd [rsp], $src
4371     emit_opcode(cbuf, 0xF2);
4372     if (srcenc >= 8) {
4373       emit_opcode(cbuf, Assembler::REX_R);
4374     }
4375     emit_opcode(cbuf, 0x0F);
4376     emit_opcode(cbuf, 0x11);
4377     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4378 
4379     // call d2i_fixup
4380     cbuf.set_inst_mark();
4381     emit_opcode(cbuf, 0xE8);
4382     emit_d32_reloc(cbuf,
4383                    (int)
4384                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
4385                    runtime_call_Relocation::spec(),
4386                    RELOC_DISP32);
4387 
4388     // popq $dst
4389     if (dstenc >= 8) {
4390       emit_opcode(cbuf, Assembler::REX_B);
4391     }
4392     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4393 
4394     // done:
4395   %}
4396 
4397   enc_class d2l_fixup(rRegL dst, regD src)
4398   %{
4399     int dstenc = $dst$$reg;
4400     int srcenc = $src$$reg;
4401     address const_address = (address) StubRoutines::x86::double_sign_flip();
4402 
4403     // cmpq $dst, [0x8000000000000000]
4404     cbuf.set_inst_mark();
4405     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4406     emit_opcode(cbuf, 0x39);
4407     // XXX reg_mem doesn't support RIP-relative addressing yet
4408     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4409     emit_d32_reloc(cbuf, const_address);
4410 
4411 
4412     // jne,s done
4413     emit_opcode(cbuf, 0x75);
4414     if (srcenc < 8 && dstenc < 8) {
4415       emit_d8(cbuf, 0xF);
4416     } else if (srcenc >= 8 && dstenc >= 8) {
4417       emit_d8(cbuf, 0x11);
4418     } else {
4419       emit_d8(cbuf, 0x10);
4420     }
4421 
4422     // subq rsp, #8
4423     emit_opcode(cbuf, Assembler::REX_W);
4424     emit_opcode(cbuf, 0x83);
4425     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4426     emit_d8(cbuf, 8);
4427 
4428     // movsd [rsp], $src
4429     emit_opcode(cbuf, 0xF2);
4430     if (srcenc >= 8) {
4431       emit_opcode(cbuf, Assembler::REX_R);
4432     }
4433     emit_opcode(cbuf, 0x0F);
4434     emit_opcode(cbuf, 0x11);
4435     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4436 
4437     // call d2l_fixup
4438     cbuf.set_inst_mark();
4439     emit_opcode(cbuf, 0xE8);
4440     emit_d32_reloc(cbuf,
4441                    (int)
4442                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4443                    runtime_call_Relocation::spec(),
4444                    RELOC_DISP32);
4445 
4446     // popq $dst
4447     if (dstenc >= 8) {
4448       emit_opcode(cbuf, Assembler::REX_B);
4449     }
4450     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4451 
4452     // done:
4453   %}
4454 
4455   // Safepoint Poll.  This polls the safepoint page, and causes an
4456   // exception if it is not readable. Unfortunately, it kills
4457   // RFLAGS in the process.
4458   enc_class enc_safepoint_poll
4459   %{
4460     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4461     // XXX reg_mem doesn't support RIP-relative addressing yet
4462     cbuf.set_inst_mark();
4463     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4464     emit_opcode(cbuf, 0x85); // testl
4465     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4466     // cbuf.inst_mark() is beginning of instruction
4467     emit_d32_reloc(cbuf, os::get_polling_page());
4468 //                    relocInfo::poll_type,
4469   %}
4470 %}
4471 
4472 
4473 
4474 //----------FRAME--------------------------------------------------------------
4475 // Definition of frame structure and management information.
4476 //
4477 //  S T A C K   L A Y O U T    Allocators stack-slot number
4478 //                             |   (to get allocators register number
4479 //  G  Owned by    |        |  v    add OptoReg::stack0())
4480 //  r   CALLER     |        |
4481 //  o     |        +--------+      pad to even-align allocators stack-slot
4482 //  w     V        |  pad0  |        numbers; owned by CALLER
4483 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4484 //  h     ^        |   in   |  5
4485 //        |        |  args  |  4   Holes in incoming args owned by SELF
4486 //  |     |        |        |  3
4487 //  |     |        +--------+
4488 //  V     |        | old out|      Empty on Intel, window on Sparc
4489 //        |    old |preserve|      Must be even aligned.
4490 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4491 //        |        |   in   |  3   area for Intel ret address
4492 //     Owned by    |preserve|      Empty on Sparc.
4493 //       SELF      +--------+
4494 //        |        |  pad2  |  2   pad to align old SP
4495 //        |        +--------+  1
4496 //        |        | locks  |  0
4497 //        |        +--------+----> OptoReg::stack0(), even aligned
4498 //        |        |  pad1  | 11   pad to align new SP
4499 //        |        +--------+
4500 //        |        |        | 10
4501 //        |        | spills |  9   spills
4502 //        V        |        |  8   (pad0 slot for callee)
4503 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4504 //        ^        |  out   |  7
4505 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4506 //     Owned by    +--------+
4507 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4508 //        |    new |preserve|      Must be even-aligned.
4509 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4510 //        |        |        |
4511 //
4512 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4513 //         known from SELF's arguments and the Java calling convention.
4514 //         Region 6-7 is determined per call site.
4515 // Note 2: If the calling convention leaves holes in the incoming argument
4516 //         area, those holes are owned by SELF.  Holes in the outgoing area
4517 //         are owned by the CALLEE.  Holes should not be nessecary in the
4518 //         incoming area, as the Java calling convention is completely under
4519 //         the control of the AD file.  Doubles can be sorted and packed to
4520 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4521 //         varargs C calling conventions.
4522 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4523 //         even aligned with pad0 as needed.
4524 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4525 //         region 6-11 is even aligned; it may be padded out more so that
4526 //         the region from SP to FP meets the minimum stack alignment.
4527 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4528 //         alignment.  Region 11, pad1, may be dynamically extended so that
4529 //         SP meets the minimum alignment.
4530 
4531 frame
4532 %{
4533   // What direction does stack grow in (assumed to be same for C & Java)
4534   stack_direction(TOWARDS_LOW);
4535 
4536   // These three registers define part of the calling convention
4537   // between compiled code and the interpreter.
4538   inline_cache_reg(RAX);                // Inline Cache Register
4539   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4540                                         // calling interpreter
4541 
4542   // Optional: name the operand used by cisc-spilling to access
4543   // [stack_pointer + offset]
4544   cisc_spilling_operand_name(indOffset32);
4545 
4546   // Number of stack slots consumed by locking an object
4547   sync_stack_slots(2);
4548 
4549   // Compiled code's Frame Pointer
4550   frame_pointer(RSP);
4551 
4552   // Interpreter stores its frame pointer in a register which is
4553   // stored to the stack by I2CAdaptors.
4554   // I2CAdaptors convert from interpreted java to compiled java.
4555   interpreter_frame_pointer(RBP);
4556 
4557   // Stack alignment requirement
4558   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4559 
4560   // Number of stack slots between incoming argument block and the start of
4561   // a new frame.  The PROLOG must add this many slots to the stack.  The
4562   // EPILOG must remove this many slots.  amd64 needs two slots for
4563   // return address.
4564   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4565 
4566   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4567   // for calls to C.  Supports the var-args backing area for register parms.
4568   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4569 
4570   // The after-PROLOG location of the return address.  Location of
4571   // return address specifies a type (REG or STACK) and a number
4572   // representing the register number (i.e. - use a register name) or
4573   // stack slot.
4574   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4575   // Otherwise, it is above the locks and verification slot and alignment word
4576   return_addr(STACK - 2 +
4577               round_to(2 + 2 * VerifyStackAtCalls +
4578                        Compile::current()->fixed_slots(),
4579                        WordsPerLong * 2));
4580 
4581   // Body of function which returns an integer array locating
4582   // arguments either in registers or in stack slots.  Passed an array
4583   // of ideal registers called "sig" and a "length" count.  Stack-slot
4584   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4585   // arguments for a CALLEE.  Incoming stack arguments are
4586   // automatically biased by the preserve_stack_slots field above.
4587 
4588   calling_convention
4589   %{
4590     // No difference between ingoing/outgoing just pass false
4591     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4592   %}
4593 
4594   c_calling_convention
4595   %{
4596     // This is obviously always outgoing
4597     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4598   %}
4599 
4600   // Location of compiled Java return values.  Same as C for now.
4601   return_value
4602   %{
4603     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4604            "only return normal values");
4605 
4606     static const int lo[Op_RegL + 1] = {
4607       0,
4608       0,
4609       RAX_num,  // Op_RegN
4610       RAX_num,  // Op_RegI
4611       RAX_num,  // Op_RegP
4612       XMM0_num, // Op_RegF
4613       XMM0_num, // Op_RegD
4614       RAX_num   // Op_RegL
4615     };
4616     static const int hi[Op_RegL + 1] = {
4617       0,
4618       0,
4619       OptoReg::Bad, // Op_RegN
4620       OptoReg::Bad, // Op_RegI
4621       RAX_H_num,    // Op_RegP
4622       OptoReg::Bad, // Op_RegF
4623       XMM0_H_num,   // Op_RegD
4624       RAX_H_num     // Op_RegL
4625     };
4626     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4627     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4628   %}
4629 %}
4630 
4631 //----------ATTRIBUTES---------------------------------------------------------
4632 //----------Operand Attributes-------------------------------------------------
4633 op_attrib op_cost(0);        // Required cost attribute
4634 
4635 //----------Instruction Attributes---------------------------------------------
4636 ins_attrib ins_cost(100);       // Required cost attribute
4637 ins_attrib ins_size(8);         // Required size attribute (in bits)
4638 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4639 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4640                                 // a non-matching short branch variant
4641                                 // of some long branch?
4642 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4643                                 // be a power of 2) specifies the
4644                                 // alignment that some part of the
4645                                 // instruction (not necessarily the
4646                                 // start) requires.  If > 1, a
4647                                 // compute_padding() function must be
4648                                 // provided for the instruction
4649 
4650 //----------OPERANDS-----------------------------------------------------------
4651 // Operand definitions must precede instruction definitions for correct parsing
4652 // in the ADLC because operands constitute user defined types which are used in
4653 // instruction definitions.
4654 
4655 //----------Simple Operands----------------------------------------------------
4656 // Immediate Operands
4657 // Integer Immediate
4658 operand immI()
4659 %{
4660   match(ConI);
4661 
4662   op_cost(10);
4663   format %{ %}
4664   interface(CONST_INTER);
4665 %}
4666 
4667 // Constant for test vs zero
4668 operand immI0()
4669 %{
4670   predicate(n->get_int() == 0);
4671   match(ConI);
4672 
4673   op_cost(0);
4674   format %{ %}
4675   interface(CONST_INTER);
4676 %}
4677 
4678 // Constant for increment
4679 operand immI1()
4680 %{
4681   predicate(n->get_int() == 1);
4682   match(ConI);
4683 
4684   op_cost(0);
4685   format %{ %}
4686   interface(CONST_INTER);
4687 %}
4688 
4689 // Constant for decrement
4690 operand immI_M1()
4691 %{
4692   predicate(n->get_int() == -1);
4693   match(ConI);
4694 
4695   op_cost(0);
4696   format %{ %}
4697   interface(CONST_INTER);
4698 %}
4699 
4700 // Valid scale values for addressing modes
4701 operand immI2()
4702 %{
4703   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4704   match(ConI);
4705 
4706   format %{ %}
4707   interface(CONST_INTER);
4708 %}
4709 
4710 operand immI8()
4711 %{
4712   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4713   match(ConI);
4714 
4715   op_cost(5);
4716   format %{ %}
4717   interface(CONST_INTER);
4718 %}
4719 
4720 operand immI16()
4721 %{
4722   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4723   match(ConI);
4724 
4725   op_cost(10);
4726   format %{ %}
4727   interface(CONST_INTER);
4728 %}
4729 
4730 // Constant for long shifts
4731 operand immI_32()
4732 %{
4733   predicate( n->get_int() == 32 );
4734   match(ConI);
4735 
4736   op_cost(0);
4737   format %{ %}
4738   interface(CONST_INTER);
4739 %}
4740 
4741 // Constant for long shifts
4742 operand immI_64()
4743 %{
4744   predicate( n->get_int() == 64 );
4745   match(ConI);
4746 
4747   op_cost(0);
4748   format %{ %}
4749   interface(CONST_INTER);
4750 %}
4751 
4752 // Pointer Immediate
4753 operand immP()
4754 %{
4755   match(ConP);
4756 
4757   op_cost(10);
4758   format %{ %}
4759   interface(CONST_INTER);
4760 %}
4761 
4762 // NULL Pointer Immediate
4763 operand immP0()
4764 %{
4765   predicate(n->get_ptr() == 0);
4766   match(ConP);
4767 
4768   op_cost(5);
4769   format %{ %}
4770   interface(CONST_INTER);
4771 %}
4772 
4773 // Pointer Immediate
4774 operand immN() %{
4775   match(ConN);
4776 
4777   op_cost(10);
4778   format %{ %}
4779   interface(CONST_INTER);
4780 %}
4781 
4782 // NULL Pointer Immediate
4783 operand immN0() %{
4784   predicate(n->get_narrowcon() == 0);
4785   match(ConN);
4786 
4787   op_cost(5);
4788   format %{ %}
4789   interface(CONST_INTER);
4790 %}
4791 
4792 operand immP31()
4793 %{
4794   predicate(!n->as_Type()->type()->isa_oopptr()
4795             && (n->get_ptr() >> 31) == 0);
4796   match(ConP);
4797 
4798   op_cost(5);
4799   format %{ %}
4800   interface(CONST_INTER);
4801 %}
4802 
4803 
4804 // Long Immediate
4805 operand immL()
4806 %{
4807   match(ConL);
4808 
4809   op_cost(20);
4810   format %{ %}
4811   interface(CONST_INTER);
4812 %}
4813 
4814 // Long Immediate 8-bit
4815 operand immL8()
4816 %{
4817   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4818   match(ConL);
4819 
4820   op_cost(5);
4821   format %{ %}
4822   interface(CONST_INTER);
4823 %}
4824 
4825 // Long Immediate 32-bit unsigned
4826 operand immUL32()
4827 %{
4828   predicate(n->get_long() == (unsigned int) (n->get_long()));
4829   match(ConL);
4830 
4831   op_cost(10);
4832   format %{ %}
4833   interface(CONST_INTER);
4834 %}
4835 
4836 // Long Immediate 32-bit signed
4837 operand immL32()
4838 %{
4839   predicate(n->get_long() == (int) (n->get_long()));
4840   match(ConL);
4841 
4842   op_cost(15);
4843   format %{ %}
4844   interface(CONST_INTER);
4845 %}
4846 
4847 // Long Immediate zero
4848 operand immL0()
4849 %{
4850   predicate(n->get_long() == 0L);
4851   match(ConL);
4852 
4853   op_cost(10);
4854   format %{ %}
4855   interface(CONST_INTER);
4856 %}
4857 
4858 // Constant for increment
4859 operand immL1()
4860 %{
4861   predicate(n->get_long() == 1);
4862   match(ConL);
4863 
4864   format %{ %}
4865   interface(CONST_INTER);
4866 %}
4867 
4868 // Constant for decrement
4869 operand immL_M1()
4870 %{
4871   predicate(n->get_long() == -1);
4872   match(ConL);
4873 
4874   format %{ %}
4875   interface(CONST_INTER);
4876 %}
4877 
4878 // Long Immediate: the value 10
4879 operand immL10()
4880 %{
4881   predicate(n->get_long() == 10);
4882   match(ConL);
4883 
4884   format %{ %}
4885   interface(CONST_INTER);
4886 %}
4887 
4888 // Long immediate from 0 to 127.
4889 // Used for a shorter form of long mul by 10.
4890 operand immL_127()
4891 %{
4892   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4893   match(ConL);
4894 
4895   op_cost(10);
4896   format %{ %}
4897   interface(CONST_INTER);
4898 %}
4899 
4900 // Long Immediate: low 32-bit mask
4901 operand immL_32bits()
4902 %{
4903   predicate(n->get_long() == 0xFFFFFFFFL);
4904   match(ConL);
4905   op_cost(20);
4906 
4907   format %{ %}
4908   interface(CONST_INTER);
4909 %}
4910 
4911 // Float Immediate zero
4912 operand immF0()
4913 %{
4914   predicate(jint_cast(n->getf()) == 0);
4915   match(ConF);
4916 
4917   op_cost(5);
4918   format %{ %}
4919   interface(CONST_INTER);
4920 %}
4921 
4922 // Float Immediate
4923 operand immF()
4924 %{
4925   match(ConF);
4926 
4927   op_cost(15);
4928   format %{ %}
4929   interface(CONST_INTER);
4930 %}
4931 
4932 // Double Immediate zero
4933 operand immD0()
4934 %{
4935   predicate(jlong_cast(n->getd()) == 0);
4936   match(ConD);
4937 
4938   op_cost(5);
4939   format %{ %}
4940   interface(CONST_INTER);
4941 %}
4942 
4943 // Double Immediate
4944 operand immD()
4945 %{
4946   match(ConD);
4947 
4948   op_cost(15);
4949   format %{ %}
4950   interface(CONST_INTER);
4951 %}
4952 
4953 // Immediates for special shifts (sign extend)
4954 
4955 // Constants for increment
4956 operand immI_16()
4957 %{
4958   predicate(n->get_int() == 16);
4959   match(ConI);
4960 
4961   format %{ %}
4962   interface(CONST_INTER);
4963 %}
4964 
4965 operand immI_24()
4966 %{
4967   predicate(n->get_int() == 24);
4968   match(ConI);
4969 
4970   format %{ %}
4971   interface(CONST_INTER);
4972 %}
4973 
4974 // Constant for byte-wide masking
4975 operand immI_255()
4976 %{
4977   predicate(n->get_int() == 255);
4978   match(ConI);
4979 
4980   format %{ %}
4981   interface(CONST_INTER);
4982 %}
4983 
4984 // Constant for short-wide masking
4985 operand immI_65535()
4986 %{
4987   predicate(n->get_int() == 65535);
4988   match(ConI);
4989 
4990   format %{ %}
4991   interface(CONST_INTER);
4992 %}
4993 
4994 // Constant for byte-wide masking
4995 operand immL_255()
4996 %{
4997   predicate(n->get_long() == 255);
4998   match(ConL);
4999 
5000   format %{ %}
5001   interface(CONST_INTER);
5002 %}
5003 
5004 // Constant for short-wide masking
5005 operand immL_65535()
5006 %{
5007   predicate(n->get_long() == 65535);
5008   match(ConL);
5009 
5010   format %{ %}
5011   interface(CONST_INTER);
5012 %}
5013 
5014 // Register Operands
5015 // Integer Register
5016 operand rRegI()
5017 %{
5018   constraint(ALLOC_IN_RC(int_reg));
5019   match(RegI);
5020 
5021   match(rax_RegI);
5022   match(rbx_RegI);
5023   match(rcx_RegI);
5024   match(rdx_RegI);
5025   match(rdi_RegI);
5026 
5027   format %{ %}
5028   interface(REG_INTER);
5029 %}
5030 
5031 // Special Registers
5032 operand rax_RegI()
5033 %{
5034   constraint(ALLOC_IN_RC(int_rax_reg));
5035   match(RegI);
5036   match(rRegI);
5037 
5038   format %{ "RAX" %}
5039   interface(REG_INTER);
5040 %}
5041 
5042 // Special Registers
5043 operand rbx_RegI()
5044 %{
5045   constraint(ALLOC_IN_RC(int_rbx_reg));
5046   match(RegI);
5047   match(rRegI);
5048 
5049   format %{ "RBX" %}
5050   interface(REG_INTER);
5051 %}
5052 
5053 operand rcx_RegI()
5054 %{
5055   constraint(ALLOC_IN_RC(int_rcx_reg));
5056   match(RegI);
5057   match(rRegI);
5058 
5059   format %{ "RCX" %}
5060   interface(REG_INTER);
5061 %}
5062 
5063 operand rdx_RegI()
5064 %{
5065   constraint(ALLOC_IN_RC(int_rdx_reg));
5066   match(RegI);
5067   match(rRegI);
5068 
5069   format %{ "RDX" %}
5070   interface(REG_INTER);
5071 %}
5072 
5073 operand rdi_RegI()
5074 %{
5075   constraint(ALLOC_IN_RC(int_rdi_reg));
5076   match(RegI);
5077   match(rRegI);
5078 
5079   format %{ "RDI" %}
5080   interface(REG_INTER);
5081 %}
5082 
5083 operand no_rcx_RegI()
5084 %{
5085   constraint(ALLOC_IN_RC(int_no_rcx_reg));
5086   match(RegI);
5087   match(rax_RegI);
5088   match(rbx_RegI);
5089   match(rdx_RegI);
5090   match(rdi_RegI);
5091 
5092   format %{ %}
5093   interface(REG_INTER);
5094 %}
5095 
5096 operand no_rax_rdx_RegI()
5097 %{
5098   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5099   match(RegI);
5100   match(rbx_RegI);
5101   match(rcx_RegI);
5102   match(rdi_RegI);
5103 
5104   format %{ %}
5105   interface(REG_INTER);
5106 %}
5107 
5108 // Pointer Register
5109 operand any_RegP()
5110 %{
5111   constraint(ALLOC_IN_RC(any_reg));
5112   match(RegP);
5113   match(rax_RegP);
5114   match(rbx_RegP);
5115   match(rdi_RegP);
5116   match(rsi_RegP);
5117   match(rbp_RegP);
5118   match(r15_RegP);
5119   match(rRegP);
5120 
5121   format %{ %}
5122   interface(REG_INTER);
5123 %}
5124 
5125 operand rRegP()
5126 %{
5127   constraint(ALLOC_IN_RC(ptr_reg));
5128   match(RegP);
5129   match(rax_RegP);
5130   match(rbx_RegP);
5131   match(rdi_RegP);
5132   match(rsi_RegP);
5133   match(rbp_RegP);
5134   match(r15_RegP);  // See Q&A below about r15_RegP.
5135 
5136   format %{ %}
5137   interface(REG_INTER);
5138 %}
5139 
5140 operand rRegN() %{
5141   constraint(ALLOC_IN_RC(int_reg));
5142   match(RegN);
5143 
5144   format %{ %}
5145   interface(REG_INTER);
5146 %}
5147 
5148 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5149 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5150 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
5151 // The output of an instruction is controlled by the allocator, which respects
5152 // register class masks, not match rules.  Unless an instruction mentions
5153 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5154 // by the allocator as an input.
5155 
5156 operand no_rax_RegP()
5157 %{
5158   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
5159   match(RegP);
5160   match(rbx_RegP);
5161   match(rsi_RegP);
5162   match(rdi_RegP);
5163 
5164   format %{ %}
5165   interface(REG_INTER);
5166 %}
5167 
5168 operand no_rbp_RegP()
5169 %{
5170   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
5171   match(RegP);
5172   match(rbx_RegP);
5173   match(rsi_RegP);
5174   match(rdi_RegP);
5175 
5176   format %{ %}
5177   interface(REG_INTER);
5178 %}
5179 
5180 operand no_rax_rbx_RegP()
5181 %{
5182   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
5183   match(RegP);
5184   match(rsi_RegP);
5185   match(rdi_RegP);
5186 
5187   format %{ %}
5188   interface(REG_INTER);
5189 %}
5190 
5191 // Special Registers
5192 // Return a pointer value
5193 operand rax_RegP()
5194 %{
5195   constraint(ALLOC_IN_RC(ptr_rax_reg));
5196   match(RegP);
5197   match(rRegP);
5198 
5199   format %{ %}
5200   interface(REG_INTER);
5201 %}
5202 
5203 // Special Registers
5204 // Return a compressed pointer value
5205 operand rax_RegN()
5206 %{
5207   constraint(ALLOC_IN_RC(int_rax_reg));
5208   match(RegN);
5209   match(rRegN);
5210 
5211   format %{ %}
5212   interface(REG_INTER);
5213 %}
5214 
5215 // Used in AtomicAdd
5216 operand rbx_RegP()
5217 %{
5218   constraint(ALLOC_IN_RC(ptr_rbx_reg));
5219   match(RegP);
5220   match(rRegP);
5221 
5222   format %{ %}
5223   interface(REG_INTER);
5224 %}
5225 
5226 operand rsi_RegP()
5227 %{
5228   constraint(ALLOC_IN_RC(ptr_rsi_reg));
5229   match(RegP);
5230   match(rRegP);
5231 
5232   format %{ %}
5233   interface(REG_INTER);
5234 %}
5235 
5236 // Used in rep stosq
5237 operand rdi_RegP()
5238 %{
5239   constraint(ALLOC_IN_RC(ptr_rdi_reg));
5240   match(RegP);
5241   match(rRegP);
5242 
5243   format %{ %}
5244   interface(REG_INTER);
5245 %}
5246 
5247 operand rbp_RegP()
5248 %{
5249   constraint(ALLOC_IN_RC(ptr_rbp_reg));
5250   match(RegP);
5251   match(rRegP);
5252 
5253   format %{ %}
5254   interface(REG_INTER);
5255 %}
5256 
5257 operand r15_RegP()
5258 %{
5259   constraint(ALLOC_IN_RC(ptr_r15_reg));
5260   match(RegP);
5261   match(rRegP);
5262 
5263   format %{ %}
5264   interface(REG_INTER);
5265 %}
5266 
5267 operand rRegL()
5268 %{
5269   constraint(ALLOC_IN_RC(long_reg));
5270   match(RegL);
5271   match(rax_RegL);
5272   match(rdx_RegL);
5273 
5274   format %{ %}
5275   interface(REG_INTER);
5276 %}
5277 
5278 // Special Registers
5279 operand no_rax_rdx_RegL()
5280 %{
5281   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5282   match(RegL);
5283   match(rRegL);
5284 
5285   format %{ %}
5286   interface(REG_INTER);
5287 %}
5288 
5289 operand no_rax_RegL()
5290 %{
5291   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5292   match(RegL);
5293   match(rRegL);
5294   match(rdx_RegL);
5295 
5296   format %{ %}
5297   interface(REG_INTER);
5298 %}
5299 
5300 operand no_rcx_RegL()
5301 %{
5302   constraint(ALLOC_IN_RC(long_no_rcx_reg));
5303   match(RegL);
5304   match(rRegL);
5305 
5306   format %{ %}
5307   interface(REG_INTER);
5308 %}
5309 
5310 operand rax_RegL()
5311 %{
5312   constraint(ALLOC_IN_RC(long_rax_reg));
5313   match(RegL);
5314   match(rRegL);
5315 
5316   format %{ "RAX" %}
5317   interface(REG_INTER);
5318 %}
5319 
5320 operand rcx_RegL()
5321 %{
5322   constraint(ALLOC_IN_RC(long_rcx_reg));
5323   match(RegL);
5324   match(rRegL);
5325 
5326   format %{ %}
5327   interface(REG_INTER);
5328 %}
5329 
5330 operand rdx_RegL()
5331 %{
5332   constraint(ALLOC_IN_RC(long_rdx_reg));
5333   match(RegL);
5334   match(rRegL);
5335 
5336   format %{ %}
5337   interface(REG_INTER);
5338 %}
5339 
5340 // Flags register, used as output of compare instructions
5341 operand rFlagsReg()
5342 %{
5343   constraint(ALLOC_IN_RC(int_flags));
5344   match(RegFlags);
5345 
5346   format %{ "RFLAGS" %}
5347   interface(REG_INTER);
5348 %}
5349 
5350 // Flags register, used as output of FLOATING POINT compare instructions
5351 operand rFlagsRegU()
5352 %{
5353   constraint(ALLOC_IN_RC(int_flags));
5354   match(RegFlags);
5355 
5356   format %{ "RFLAGS_U" %}
5357   interface(REG_INTER);
5358 %}
5359 
5360 operand rFlagsRegUCF() %{
5361   constraint(ALLOC_IN_RC(int_flags));
5362   match(RegFlags);
5363   predicate(false);
5364 
5365   format %{ "RFLAGS_U_CF" %}
5366   interface(REG_INTER);
5367 %}
5368 
5369 // Float register operands
5370 operand regF()
5371 %{
5372   constraint(ALLOC_IN_RC(float_reg));
5373   match(RegF);
5374 
5375   format %{ %}
5376   interface(REG_INTER);
5377 %}
5378 
5379 // Double register operands
5380 operand regD() 
5381 %{
5382   constraint(ALLOC_IN_RC(double_reg));
5383   match(RegD);
5384 
5385   format %{ %}
5386   interface(REG_INTER);
5387 %}
5388 
5389 
5390 //----------Memory Operands----------------------------------------------------
5391 // Direct Memory Operand
5392 // operand direct(immP addr)
5393 // %{
5394 //   match(addr);
5395 
5396 //   format %{ "[$addr]" %}
5397 //   interface(MEMORY_INTER) %{
5398 //     base(0xFFFFFFFF);
5399 //     index(0x4);
5400 //     scale(0x0);
5401 //     disp($addr);
5402 //   %}
5403 // %}
5404 
5405 // Indirect Memory Operand
5406 operand indirect(any_RegP reg)
5407 %{
5408   constraint(ALLOC_IN_RC(ptr_reg));
5409   match(reg);
5410 
5411   format %{ "[$reg]" %}
5412   interface(MEMORY_INTER) %{
5413     base($reg);
5414     index(0x4);
5415     scale(0x0);
5416     disp(0x0);
5417   %}
5418 %}
5419 
5420 // Indirect Memory Plus Short Offset Operand
5421 operand indOffset8(any_RegP reg, immL8 off)
5422 %{
5423   constraint(ALLOC_IN_RC(ptr_reg));
5424   match(AddP reg off);
5425 
5426   format %{ "[$reg + $off (8-bit)]" %}
5427   interface(MEMORY_INTER) %{
5428     base($reg);
5429     index(0x4);
5430     scale(0x0);
5431     disp($off);
5432   %}
5433 %}
5434 
5435 // Indirect Memory Plus Long Offset Operand
5436 operand indOffset32(any_RegP reg, immL32 off)
5437 %{
5438   constraint(ALLOC_IN_RC(ptr_reg));
5439   match(AddP reg off);
5440 
5441   format %{ "[$reg + $off (32-bit)]" %}
5442   interface(MEMORY_INTER) %{
5443     base($reg);
5444     index(0x4);
5445     scale(0x0);
5446     disp($off);
5447   %}
5448 %}
5449 
5450 // Indirect Memory Plus Index Register Plus Offset Operand
5451 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5452 %{
5453   constraint(ALLOC_IN_RC(ptr_reg));
5454   match(AddP (AddP reg lreg) off);
5455 
5456   op_cost(10);
5457   format %{"[$reg + $off + $lreg]" %}
5458   interface(MEMORY_INTER) %{
5459     base($reg);
5460     index($lreg);
5461     scale(0x0);
5462     disp($off);
5463   %}
5464 %}
5465 
5466 // Indirect Memory Plus Index Register Plus Offset Operand
5467 operand indIndex(any_RegP reg, rRegL lreg)
5468 %{
5469   constraint(ALLOC_IN_RC(ptr_reg));
5470   match(AddP reg lreg);
5471 
5472   op_cost(10);
5473   format %{"[$reg + $lreg]" %}
5474   interface(MEMORY_INTER) %{
5475     base($reg);
5476     index($lreg);
5477     scale(0x0);
5478     disp(0x0);
5479   %}
5480 %}
5481 
5482 // Indirect Memory Times Scale Plus Index Register
5483 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5484 %{
5485   constraint(ALLOC_IN_RC(ptr_reg));
5486   match(AddP reg (LShiftL lreg scale));
5487 
5488   op_cost(10);
5489   format %{"[$reg + $lreg << $scale]" %}
5490   interface(MEMORY_INTER) %{
5491     base($reg);
5492     index($lreg);
5493     scale($scale);
5494     disp(0x0);
5495   %}
5496 %}
5497 
5498 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5499 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5500 %{
5501   constraint(ALLOC_IN_RC(ptr_reg));
5502   match(AddP (AddP reg (LShiftL lreg scale)) off);
5503 
5504   op_cost(10);
5505   format %{"[$reg + $off + $lreg << $scale]" %}
5506   interface(MEMORY_INTER) %{
5507     base($reg);
5508     index($lreg);
5509     scale($scale);
5510     disp($off);
5511   %}
5512 %}
5513 
5514 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5515 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5516 %{
5517   constraint(ALLOC_IN_RC(ptr_reg));
5518   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5519   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5520 
5521   op_cost(10);
5522   format %{"[$reg + $off + $idx << $scale]" %}
5523   interface(MEMORY_INTER) %{
5524     base($reg);
5525     index($idx);
5526     scale($scale);
5527     disp($off);
5528   %}
5529 %}
5530 
5531 // Indirect Narrow Oop Plus Offset Operand
5532 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5533 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5534 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5535   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5536   constraint(ALLOC_IN_RC(ptr_reg));
5537   match(AddP (DecodeN reg) off);
5538 
5539   op_cost(10);
5540   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5541   interface(MEMORY_INTER) %{
5542     base(0xc); // R12
5543     index($reg);
5544     scale(0x3);
5545     disp($off);
5546   %}
5547 %}
5548 
5549 // Indirect Memory Operand
5550 operand indirectNarrow(rRegN reg)
5551 %{
5552   predicate(Universe::narrow_oop_shift() == 0);
5553   constraint(ALLOC_IN_RC(ptr_reg));
5554   match(DecodeN reg);
5555 
5556   format %{ "[$reg]" %}
5557   interface(MEMORY_INTER) %{
5558     base($reg);
5559     index(0x4);
5560     scale(0x0);
5561     disp(0x0);
5562   %}
5563 %}
5564 
5565 // Indirect Memory Plus Short Offset Operand
5566 operand indOffset8Narrow(rRegN reg, immL8 off)
5567 %{
5568   predicate(Universe::narrow_oop_shift() == 0);
5569   constraint(ALLOC_IN_RC(ptr_reg));
5570   match(AddP (DecodeN reg) off);
5571 
5572   format %{ "[$reg + $off (8-bit)]" %}
5573   interface(MEMORY_INTER) %{
5574     base($reg);
5575     index(0x4);
5576     scale(0x0);
5577     disp($off);
5578   %}
5579 %}
5580 
5581 // Indirect Memory Plus Long Offset Operand
5582 operand indOffset32Narrow(rRegN reg, immL32 off)
5583 %{
5584   predicate(Universe::narrow_oop_shift() == 0);
5585   constraint(ALLOC_IN_RC(ptr_reg));
5586   match(AddP (DecodeN reg) off);
5587 
5588   format %{ "[$reg + $off (32-bit)]" %}
5589   interface(MEMORY_INTER) %{
5590     base($reg);
5591     index(0x4);
5592     scale(0x0);
5593     disp($off);
5594   %}
5595 %}
5596 
5597 // Indirect Memory Plus Index Register Plus Offset Operand
5598 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5599 %{
5600   predicate(Universe::narrow_oop_shift() == 0);
5601   constraint(ALLOC_IN_RC(ptr_reg));
5602   match(AddP (AddP (DecodeN reg) lreg) off);
5603 
5604   op_cost(10);
5605   format %{"[$reg + $off + $lreg]" %}
5606   interface(MEMORY_INTER) %{
5607     base($reg);
5608     index($lreg);
5609     scale(0x0);
5610     disp($off);
5611   %}
5612 %}
5613 
5614 // Indirect Memory Plus Index Register Plus Offset Operand
5615 operand indIndexNarrow(rRegN reg, rRegL lreg)
5616 %{
5617   predicate(Universe::narrow_oop_shift() == 0);
5618   constraint(ALLOC_IN_RC(ptr_reg));
5619   match(AddP (DecodeN reg) lreg);
5620 
5621   op_cost(10);
5622   format %{"[$reg + $lreg]" %}
5623   interface(MEMORY_INTER) %{
5624     base($reg);
5625     index($lreg);
5626     scale(0x0);
5627     disp(0x0);
5628   %}
5629 %}
5630 
5631 // Indirect Memory Times Scale Plus Index Register
5632 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5633 %{
5634   predicate(Universe::narrow_oop_shift() == 0);
5635   constraint(ALLOC_IN_RC(ptr_reg));
5636   match(AddP (DecodeN reg) (LShiftL lreg scale));
5637 
5638   op_cost(10);
5639   format %{"[$reg + $lreg << $scale]" %}
5640   interface(MEMORY_INTER) %{
5641     base($reg);
5642     index($lreg);
5643     scale($scale);
5644     disp(0x0);
5645   %}
5646 %}
5647 
5648 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5649 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5650 %{
5651   predicate(Universe::narrow_oop_shift() == 0);
5652   constraint(ALLOC_IN_RC(ptr_reg));
5653   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5654 
5655   op_cost(10);
5656   format %{"[$reg + $off + $lreg << $scale]" %}
5657   interface(MEMORY_INTER) %{
5658     base($reg);
5659     index($lreg);
5660     scale($scale);
5661     disp($off);
5662   %}
5663 %}
5664 
5665 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5666 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5667 %{
5668   constraint(ALLOC_IN_RC(ptr_reg));
5669   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5670   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5671 
5672   op_cost(10);
5673   format %{"[$reg + $off + $idx << $scale]" %}
5674   interface(MEMORY_INTER) %{
5675     base($reg);
5676     index($idx);
5677     scale($scale);
5678     disp($off);
5679   %}
5680 %}
5681 
5682 
5683 //----------Special Memory Operands--------------------------------------------
5684 // Stack Slot Operand - This operand is used for loading and storing temporary
5685 //                      values on the stack where a match requires a value to
5686 //                      flow through memory.
5687 operand stackSlotP(sRegP reg)
5688 %{
5689   constraint(ALLOC_IN_RC(stack_slots));
5690   // No match rule because this operand is only generated in matching
5691 
5692   format %{ "[$reg]" %}
5693   interface(MEMORY_INTER) %{
5694     base(0x4);   // RSP
5695     index(0x4);  // No Index
5696     scale(0x0);  // No Scale
5697     disp($reg);  // Stack Offset
5698   %}
5699 %}
5700 
5701 operand stackSlotI(sRegI reg)
5702 %{
5703   constraint(ALLOC_IN_RC(stack_slots));
5704   // No match rule because this operand is only generated in matching
5705 
5706   format %{ "[$reg]" %}
5707   interface(MEMORY_INTER) %{
5708     base(0x4);   // RSP
5709     index(0x4);  // No Index
5710     scale(0x0);  // No Scale
5711     disp($reg);  // Stack Offset
5712   %}
5713 %}
5714 
5715 operand stackSlotF(sRegF reg)
5716 %{
5717   constraint(ALLOC_IN_RC(stack_slots));
5718   // No match rule because this operand is only generated in matching
5719 
5720   format %{ "[$reg]" %}
5721   interface(MEMORY_INTER) %{
5722     base(0x4);   // RSP
5723     index(0x4);  // No Index
5724     scale(0x0);  // No Scale
5725     disp($reg);  // Stack Offset
5726   %}
5727 %}
5728 
5729 operand stackSlotD(sRegD reg)
5730 %{
5731   constraint(ALLOC_IN_RC(stack_slots));
5732   // No match rule because this operand is only generated in matching
5733 
5734   format %{ "[$reg]" %}
5735   interface(MEMORY_INTER) %{
5736     base(0x4);   // RSP
5737     index(0x4);  // No Index
5738     scale(0x0);  // No Scale
5739     disp($reg);  // Stack Offset
5740   %}
5741 %}
5742 operand stackSlotL(sRegL reg)
5743 %{
5744   constraint(ALLOC_IN_RC(stack_slots));
5745   // No match rule because this operand is only generated in matching
5746 
5747   format %{ "[$reg]" %}
5748   interface(MEMORY_INTER) %{
5749     base(0x4);   // RSP
5750     index(0x4);  // No Index
5751     scale(0x0);  // No Scale
5752     disp($reg);  // Stack Offset
5753   %}
5754 %}
5755 
5756 //----------Conditional Branch Operands----------------------------------------
5757 // Comparison Op  - This is the operation of the comparison, and is limited to
5758 //                  the following set of codes:
5759 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5760 //
5761 // Other attributes of the comparison, such as unsignedness, are specified
5762 // by the comparison instruction that sets a condition code flags register.
5763 // That result is represented by a flags operand whose subtype is appropriate
5764 // to the unsignedness (etc.) of the comparison.
5765 //
5766 // Later, the instruction which matches both the Comparison Op (a Bool) and
5767 // the flags (produced by the Cmp) specifies the coding of the comparison op
5768 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5769 
5770 // Comparision Code
5771 operand cmpOp()
5772 %{
5773   match(Bool);
5774 
5775   format %{ "" %}
5776   interface(COND_INTER) %{
5777     equal(0x4, "e");
5778     not_equal(0x5, "ne");
5779     less(0xC, "l");
5780     greater_equal(0xD, "ge");
5781     less_equal(0xE, "le");
5782     greater(0xF, "g");
5783   %}
5784 %}
5785 
5786 // Comparison Code, unsigned compare.  Used by FP also, with
5787 // C2 (unordered) turned into GT or LT already.  The other bits
5788 // C0 and C3 are turned into Carry & Zero flags.
5789 operand cmpOpU()
5790 %{
5791   match(Bool);
5792 
5793   format %{ "" %}
5794   interface(COND_INTER) %{
5795     equal(0x4, "e");
5796     not_equal(0x5, "ne");
5797     less(0x2, "b");
5798     greater_equal(0x3, "nb");
5799     less_equal(0x6, "be");
5800     greater(0x7, "nbe");
5801   %}
5802 %}
5803 
5804 
5805 // Floating comparisons that don't require any fixup for the unordered case
5806 operand cmpOpUCF() %{
5807   match(Bool);
5808   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5809             n->as_Bool()->_test._test == BoolTest::ge ||
5810             n->as_Bool()->_test._test == BoolTest::le ||
5811             n->as_Bool()->_test._test == BoolTest::gt);
5812   format %{ "" %}
5813   interface(COND_INTER) %{
5814     equal(0x4, "e");
5815     not_equal(0x5, "ne");
5816     less(0x2, "b");
5817     greater_equal(0x3, "nb");
5818     less_equal(0x6, "be");
5819     greater(0x7, "nbe");
5820   %}
5821 %}
5822 
5823 
5824 // Floating comparisons that can be fixed up with extra conditional jumps
5825 operand cmpOpUCF2() %{
5826   match(Bool);
5827   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5828             n->as_Bool()->_test._test == BoolTest::eq);
5829   format %{ "" %}
5830   interface(COND_INTER) %{
5831     equal(0x4, "e");
5832     not_equal(0x5, "ne");
5833     less(0x2, "b");
5834     greater_equal(0x3, "nb");
5835     less_equal(0x6, "be");
5836     greater(0x7, "nbe");
5837   %}
5838 %}
5839 
5840 
5841 //----------OPERAND CLASSES----------------------------------------------------
5842 // Operand Classes are groups of operands that are used as to simplify
5843 // instruction definitions by not requiring the AD writer to specify separate
5844 // instructions for every form of operand when the instruction accepts
5845 // multiple operand types with the same basic encoding and format.  The classic
5846 // case of this is memory operands.
5847 
5848 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5849                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5850                indCompressedOopOffset,
5851                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5852                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5853                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5854 
5855 //----------PIPELINE-----------------------------------------------------------
5856 // Rules which define the behavior of the target architectures pipeline.
5857 pipeline %{
5858 
5859 //----------ATTRIBUTES---------------------------------------------------------
5860 attributes %{
5861   variable_size_instructions;        // Fixed size instructions
5862   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5863   instruction_unit_size = 1;         // An instruction is 1 bytes long
5864   instruction_fetch_unit_size = 16;  // The processor fetches one line
5865   instruction_fetch_units = 1;       // of 16 bytes
5866 
5867   // List of nop instructions
5868   nops( MachNop );
5869 %}
5870 
5871 //----------RESOURCES----------------------------------------------------------
5872 // Resources are the functional units available to the machine
5873 
5874 // Generic P2/P3 pipeline
5875 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5876 // 3 instructions decoded per cycle.
5877 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5878 // 3 ALU op, only ALU0 handles mul instructions.
5879 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5880            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5881            BR, FPU,
5882            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5883 
5884 //----------PIPELINE DESCRIPTION-----------------------------------------------
5885 // Pipeline Description specifies the stages in the machine's pipeline
5886 
5887 // Generic P2/P3 pipeline
5888 pipe_desc(S0, S1, S2, S3, S4, S5);
5889 
5890 //----------PIPELINE CLASSES---------------------------------------------------
5891 // Pipeline Classes describe the stages in which input and output are
5892 // referenced by the hardware pipeline.
5893 
5894 // Naming convention: ialu or fpu
5895 // Then: _reg
5896 // Then: _reg if there is a 2nd register
5897 // Then: _long if it's a pair of instructions implementing a long
5898 // Then: _fat if it requires the big decoder
5899 //   Or: _mem if it requires the big decoder and a memory unit.
5900 
5901 // Integer ALU reg operation
5902 pipe_class ialu_reg(rRegI dst)
5903 %{
5904     single_instruction;
5905     dst    : S4(write);
5906     dst    : S3(read);
5907     DECODE : S0;        // any decoder
5908     ALU    : S3;        // any alu
5909 %}
5910 
5911 // Long ALU reg operation
5912 pipe_class ialu_reg_long(rRegL dst)
5913 %{
5914     instruction_count(2);
5915     dst    : S4(write);
5916     dst    : S3(read);
5917     DECODE : S0(2);     // any 2 decoders
5918     ALU    : S3(2);     // both alus
5919 %}
5920 
5921 // Integer ALU reg operation using big decoder
5922 pipe_class ialu_reg_fat(rRegI dst)
5923 %{
5924     single_instruction;
5925     dst    : S4(write);
5926     dst    : S3(read);
5927     D0     : S0;        // big decoder only
5928     ALU    : S3;        // any alu
5929 %}
5930 
5931 // Long ALU reg operation using big decoder
5932 pipe_class ialu_reg_long_fat(rRegL dst)
5933 %{
5934     instruction_count(2);
5935     dst    : S4(write);
5936     dst    : S3(read);
5937     D0     : S0(2);     // big decoder only; twice
5938     ALU    : S3(2);     // any 2 alus
5939 %}
5940 
5941 // Integer ALU reg-reg operation
5942 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5943 %{
5944     single_instruction;
5945     dst    : S4(write);
5946     src    : S3(read);
5947     DECODE : S0;        // any decoder
5948     ALU    : S3;        // any alu
5949 %}
5950 
5951 // Long ALU reg-reg operation
5952 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5953 %{
5954     instruction_count(2);
5955     dst    : S4(write);
5956     src    : S3(read);
5957     DECODE : S0(2);     // any 2 decoders
5958     ALU    : S3(2);     // both alus
5959 %}
5960 
5961 // Integer ALU reg-reg operation
5962 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5963 %{
5964     single_instruction;
5965     dst    : S4(write);
5966     src    : S3(read);
5967     D0     : S0;        // big decoder only
5968     ALU    : S3;        // any alu
5969 %}
5970 
5971 // Long ALU reg-reg operation
5972 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5973 %{
5974     instruction_count(2);
5975     dst    : S4(write);
5976     src    : S3(read);
5977     D0     : S0(2);     // big decoder only; twice
5978     ALU    : S3(2);     // both alus
5979 %}
5980 
5981 // Integer ALU reg-mem operation
5982 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5983 %{
5984     single_instruction;
5985     dst    : S5(write);
5986     mem    : S3(read);
5987     D0     : S0;        // big decoder only
5988     ALU    : S4;        // any alu
5989     MEM    : S3;        // any mem
5990 %}
5991 
5992 // Integer mem operation (prefetch)
5993 pipe_class ialu_mem(memory mem)
5994 %{
5995     single_instruction;
5996     mem    : S3(read);
5997     D0     : S0;        // big decoder only
5998     MEM    : S3;        // any mem
5999 %}
6000 
6001 // Integer Store to Memory
6002 pipe_class ialu_mem_reg(memory mem, rRegI src)
6003 %{
6004     single_instruction;
6005     mem    : S3(read);
6006     src    : S5(read);
6007     D0     : S0;        // big decoder only
6008     ALU    : S4;        // any alu
6009     MEM    : S3;
6010 %}
6011 
6012 // // Long Store to Memory
6013 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6014 // %{
6015 //     instruction_count(2);
6016 //     mem    : S3(read);
6017 //     src    : S5(read);
6018 //     D0     : S0(2);          // big decoder only; twice
6019 //     ALU    : S4(2);     // any 2 alus
6020 //     MEM    : S3(2);  // Both mems
6021 // %}
6022 
6023 // Integer Store to Memory
6024 pipe_class ialu_mem_imm(memory mem)
6025 %{
6026     single_instruction;
6027     mem    : S3(read);
6028     D0     : S0;        // big decoder only
6029     ALU    : S4;        // any alu
6030     MEM    : S3;
6031 %}
6032 
6033 // Integer ALU0 reg-reg operation
6034 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6035 %{
6036     single_instruction;
6037     dst    : S4(write);
6038     src    : S3(read);
6039     D0     : S0;        // Big decoder only
6040     ALU0   : S3;        // only alu0
6041 %}
6042 
6043 // Integer ALU0 reg-mem operation
6044 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6045 %{
6046     single_instruction;
6047     dst    : S5(write);
6048     mem    : S3(read);
6049     D0     : S0;        // big decoder only
6050     ALU0   : S4;        // ALU0 only
6051     MEM    : S3;        // any mem
6052 %}
6053 
6054 // Integer ALU reg-reg operation
6055 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6056 %{
6057     single_instruction;
6058     cr     : S4(write);
6059     src1   : S3(read);
6060     src2   : S3(read);
6061     DECODE : S0;        // any decoder
6062     ALU    : S3;        // any alu
6063 %}
6064 
6065 // Integer ALU reg-imm operation
6066 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6067 %{
6068     single_instruction;
6069     cr     : S4(write);
6070     src1   : S3(read);
6071     DECODE : S0;        // any decoder
6072     ALU    : S3;        // any alu
6073 %}
6074 
6075 // Integer ALU reg-mem operation
6076 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6077 %{
6078     single_instruction;
6079     cr     : S4(write);
6080     src1   : S3(read);
6081     src2   : S3(read);
6082     D0     : S0;        // big decoder only
6083     ALU    : S4;        // any alu
6084     MEM    : S3;
6085 %}
6086 
6087 // Conditional move reg-reg
6088 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6089 %{
6090     instruction_count(4);
6091     y      : S4(read);
6092     q      : S3(read);
6093     p      : S3(read);
6094     DECODE : S0(4);     // any decoder
6095 %}
6096 
6097 // Conditional move reg-reg
6098 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6099 %{
6100     single_instruction;
6101     dst    : S4(write);
6102     src    : S3(read);
6103     cr     : S3(read);
6104     DECODE : S0;        // any decoder
6105 %}
6106 
6107 // Conditional move reg-mem
6108 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6109 %{
6110     single_instruction;
6111     dst    : S4(write);
6112     src    : S3(read);
6113     cr     : S3(read);
6114     DECODE : S0;        // any decoder
6115     MEM    : S3;
6116 %}
6117 
6118 // Conditional move reg-reg long
6119 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6120 %{
6121     single_instruction;
6122     dst    : S4(write);
6123     src    : S3(read);
6124     cr     : S3(read);
6125     DECODE : S0(2);     // any 2 decoders
6126 %}
6127 
6128 // XXX
6129 // // Conditional move double reg-reg
6130 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
6131 // %{
6132 //     single_instruction;
6133 //     dst    : S4(write);
6134 //     src    : S3(read);
6135 //     cr     : S3(read);
6136 //     DECODE : S0;     // any decoder
6137 // %}
6138 
6139 // Float reg-reg operation
6140 pipe_class fpu_reg(regD dst)
6141 %{
6142     instruction_count(2);
6143     dst    : S3(read);
6144     DECODE : S0(2);     // any 2 decoders
6145     FPU    : S3;
6146 %}
6147 
6148 // Float reg-reg operation
6149 pipe_class fpu_reg_reg(regD dst, regD src)
6150 %{
6151     instruction_count(2);
6152     dst    : S4(write);
6153     src    : S3(read);
6154     DECODE : S0(2);     // any 2 decoders
6155     FPU    : S3;
6156 %}
6157 
6158 // Float reg-reg operation
6159 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6160 %{
6161     instruction_count(3);
6162     dst    : S4(write);
6163     src1   : S3(read);
6164     src2   : S3(read);
6165     DECODE : S0(3);     // any 3 decoders
6166     FPU    : S3(2);
6167 %}
6168 
6169 // Float reg-reg operation
6170 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6171 %{
6172     instruction_count(4);
6173     dst    : S4(write);
6174     src1   : S3(read);
6175     src2   : S3(read);
6176     src3   : S3(read);
6177     DECODE : S0(4);     // any 3 decoders
6178     FPU    : S3(2);
6179 %}
6180 
6181 // Float reg-reg operation
6182 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6183 %{
6184     instruction_count(4);
6185     dst    : S4(write);
6186     src1   : S3(read);
6187     src2   : S3(read);
6188     src3   : S3(read);
6189     DECODE : S1(3);     // any 3 decoders
6190     D0     : S0;        // Big decoder only
6191     FPU    : S3(2);
6192     MEM    : S3;
6193 %}
6194 
6195 // Float reg-mem operation
6196 pipe_class fpu_reg_mem(regD dst, memory mem)
6197 %{
6198     instruction_count(2);
6199     dst    : S5(write);
6200     mem    : S3(read);
6201     D0     : S0;        // big decoder only
6202     DECODE : S1;        // any decoder for FPU POP
6203     FPU    : S4;
6204     MEM    : S3;        // any mem
6205 %}
6206 
6207 // Float reg-mem operation
6208 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6209 %{
6210     instruction_count(3);
6211     dst    : S5(write);
6212     src1   : S3(read);
6213     mem    : S3(read);
6214     D0     : S0;        // big decoder only
6215     DECODE : S1(2);     // any decoder for FPU POP
6216     FPU    : S4;
6217     MEM    : S3;        // any mem
6218 %}
6219 
6220 // Float mem-reg operation
6221 pipe_class fpu_mem_reg(memory mem, regD src)
6222 %{
6223     instruction_count(2);
6224     src    : S5(read);
6225     mem    : S3(read);
6226     DECODE : S0;        // any decoder for FPU PUSH
6227     D0     : S1;        // big decoder only
6228     FPU    : S4;
6229     MEM    : S3;        // any mem
6230 %}
6231 
6232 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6233 %{
6234     instruction_count(3);
6235     src1   : S3(read);
6236     src2   : S3(read);
6237     mem    : S3(read);
6238     DECODE : S0(2);     // any decoder for FPU PUSH
6239     D0     : S1;        // big decoder only
6240     FPU    : S4;
6241     MEM    : S3;        // any mem
6242 %}
6243 
6244 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6245 %{
6246     instruction_count(3);
6247     src1   : S3(read);
6248     src2   : S3(read);
6249     mem    : S4(read);
6250     DECODE : S0;        // any decoder for FPU PUSH
6251     D0     : S0(2);     // big decoder only
6252     FPU    : S4;
6253     MEM    : S3(2);     // any mem
6254 %}
6255 
6256 pipe_class fpu_mem_mem(memory dst, memory src1)
6257 %{
6258     instruction_count(2);
6259     src1   : S3(read);
6260     dst    : S4(read);
6261     D0     : S0(2);     // big decoder only
6262     MEM    : S3(2);     // any mem
6263 %}
6264 
6265 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6266 %{
6267     instruction_count(3);
6268     src1   : S3(read);
6269     src2   : S3(read);
6270     dst    : S4(read);
6271     D0     : S0(3);     // big decoder only
6272     FPU    : S4;
6273     MEM    : S3(3);     // any mem
6274 %}
6275 
6276 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6277 %{
6278     instruction_count(3);
6279     src1   : S4(read);
6280     mem    : S4(read);
6281     DECODE : S0;        // any decoder for FPU PUSH
6282     D0     : S0(2);     // big decoder only
6283     FPU    : S4;
6284     MEM    : S3(2);     // any mem
6285 %}
6286 
6287 // Float load constant
6288 pipe_class fpu_reg_con(regD dst)
6289 %{
6290     instruction_count(2);
6291     dst    : S5(write);
6292     D0     : S0;        // big decoder only for the load
6293     DECODE : S1;        // any decoder for FPU POP
6294     FPU    : S4;
6295     MEM    : S3;        // any mem
6296 %}
6297 
6298 // Float load constant
6299 pipe_class fpu_reg_reg_con(regD dst, regD src)
6300 %{
6301     instruction_count(3);
6302     dst    : S5(write);
6303     src    : S3(read);
6304     D0     : S0;        // big decoder only for the load
6305     DECODE : S1(2);     // any decoder for FPU POP
6306     FPU    : S4;
6307     MEM    : S3;        // any mem
6308 %}
6309 
6310 // UnConditional branch
6311 pipe_class pipe_jmp(label labl)
6312 %{
6313     single_instruction;
6314     BR   : S3;
6315 %}
6316 
6317 // Conditional branch
6318 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6319 %{
6320     single_instruction;
6321     cr    : S1(read);
6322     BR    : S3;
6323 %}
6324 
6325 // Allocation idiom
6326 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6327 %{
6328     instruction_count(1); force_serialization;
6329     fixed_latency(6);
6330     heap_ptr : S3(read);
6331     DECODE   : S0(3);
6332     D0       : S2;
6333     MEM      : S3;
6334     ALU      : S3(2);
6335     dst      : S5(write);
6336     BR       : S5;
6337 %}
6338 
6339 // Generic big/slow expanded idiom
6340 pipe_class pipe_slow()
6341 %{
6342     instruction_count(10); multiple_bundles; force_serialization;
6343     fixed_latency(100);
6344     D0  : S0(2);
6345     MEM : S3(2);
6346 %}
6347 
6348 // The real do-nothing guy
6349 pipe_class empty()
6350 %{
6351     instruction_count(0);
6352 %}
6353 
6354 // Define the class for the Nop node
6355 define
6356 %{
6357    MachNop = empty;
6358 %}
6359 
6360 %}
6361 
6362 //----------INSTRUCTIONS-------------------------------------------------------
6363 //
6364 // match      -- States which machine-independent subtree may be replaced
6365 //               by this instruction.
6366 // ins_cost   -- The estimated cost of this instruction is used by instruction
6367 //               selection to identify a minimum cost tree of machine
6368 //               instructions that matches a tree of machine-independent
6369 //               instructions.
6370 // format     -- A string providing the disassembly for this instruction.
6371 //               The value of an instruction's operand may be inserted
6372 //               by referring to it with a '$' prefix.
6373 // opcode     -- Three instruction opcodes may be provided.  These are referred
6374 //               to within an encode class as $primary, $secondary, and $tertiary
6375 //               rrspectively.  The primary opcode is commonly used to
6376 //               indicate the type of machine instruction, while secondary
6377 //               and tertiary are often used for prefix options or addressing
6378 //               modes.
6379 // ins_encode -- A list of encode classes with parameters. The encode class
6380 //               name must have been defined in an 'enc_class' specification
6381 //               in the encode section of the architecture description.
6382 
6383 
6384 //----------Load/Store/Move Instructions---------------------------------------
6385 //----------Load Instructions--------------------------------------------------
6386 
6387 // Load Byte (8 bit signed)
6388 instruct loadB(rRegI dst, memory mem)
6389 %{
6390   match(Set dst (LoadB mem));
6391 
6392   ins_cost(125);
6393   format %{ "movsbl  $dst, $mem\t# byte" %}
6394 
6395   ins_encode %{
6396     __ movsbl($dst$$Register, $mem$$Address);
6397   %}
6398 
6399   ins_pipe(ialu_reg_mem);
6400 %}
6401 
6402 // Load Byte (8 bit signed) into Long Register
6403 instruct loadB2L(rRegL dst, memory mem)
6404 %{
6405   match(Set dst (ConvI2L (LoadB mem)));
6406 
6407   ins_cost(125);
6408   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6409 
6410   ins_encode %{
6411     __ movsbq($dst$$Register, $mem$$Address);
6412   %}
6413 
6414   ins_pipe(ialu_reg_mem);
6415 %}
6416 
6417 // Load Unsigned Byte (8 bit UNsigned)
6418 instruct loadUB(rRegI dst, memory mem)
6419 %{
6420   match(Set dst (LoadUB mem));
6421 
6422   ins_cost(125);
6423   format %{ "movzbl  $dst, $mem\t# ubyte" %}
6424 
6425   ins_encode %{
6426     __ movzbl($dst$$Register, $mem$$Address);
6427   %}
6428 
6429   ins_pipe(ialu_reg_mem);
6430 %}
6431 
6432 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6433 instruct loadUB2L(rRegL dst, memory mem)
6434 %{
6435   match(Set dst (ConvI2L (LoadUB mem)));
6436 
6437   ins_cost(125);
6438   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6439 
6440   ins_encode %{
6441     __ movzbq($dst$$Register, $mem$$Address);
6442   %}
6443 
6444   ins_pipe(ialu_reg_mem);
6445 %}
6446 
6447 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
6448 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
6449   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6450   effect(KILL cr);
6451 
6452   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
6453             "andl    $dst, $mask" %}
6454   ins_encode %{
6455     Register Rdst = $dst$$Register;
6456     __ movzbq(Rdst, $mem$$Address);
6457     __ andl(Rdst, $mask$$constant);
6458   %}
6459   ins_pipe(ialu_reg_mem);
6460 %}
6461 
6462 // Load Short (16 bit signed)
6463 instruct loadS(rRegI dst, memory mem)
6464 %{
6465   match(Set dst (LoadS mem));
6466 
6467   ins_cost(125);
6468   format %{ "movswl $dst, $mem\t# short" %}
6469 
6470   ins_encode %{
6471     __ movswl($dst$$Register, $mem$$Address);
6472   %}
6473 
6474   ins_pipe(ialu_reg_mem);
6475 %}
6476 
6477 // Load Short (16 bit signed) to Byte (8 bit signed)
6478 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6479   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6480 
6481   ins_cost(125);
6482   format %{ "movsbl $dst, $mem\t# short -> byte" %}
6483   ins_encode %{
6484     __ movsbl($dst$$Register, $mem$$Address);
6485   %}
6486   ins_pipe(ialu_reg_mem);
6487 %}
6488 
6489 // Load Short (16 bit signed) into Long Register
6490 instruct loadS2L(rRegL dst, memory mem)
6491 %{
6492   match(Set dst (ConvI2L (LoadS mem)));
6493 
6494   ins_cost(125);
6495   format %{ "movswq $dst, $mem\t# short -> long" %}
6496 
6497   ins_encode %{
6498     __ movswq($dst$$Register, $mem$$Address);
6499   %}
6500 
6501   ins_pipe(ialu_reg_mem);
6502 %}
6503 
6504 // Load Unsigned Short/Char (16 bit UNsigned)
6505 instruct loadUS(rRegI dst, memory mem)
6506 %{
6507   match(Set dst (LoadUS mem));
6508 
6509   ins_cost(125);
6510   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6511 
6512   ins_encode %{
6513     __ movzwl($dst$$Register, $mem$$Address);
6514   %}
6515 
6516   ins_pipe(ialu_reg_mem);
6517 %}
6518 
6519 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6520 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6521   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6522 
6523   ins_cost(125);
6524   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6525   ins_encode %{
6526     __ movsbl($dst$$Register, $mem$$Address);
6527   %}
6528   ins_pipe(ialu_reg_mem);
6529 %}
6530 
6531 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6532 instruct loadUS2L(rRegL dst, memory mem)
6533 %{
6534   match(Set dst (ConvI2L (LoadUS mem)));
6535 
6536   ins_cost(125);
6537   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6538 
6539   ins_encode %{
6540     __ movzwq($dst$$Register, $mem$$Address);
6541   %}
6542 
6543   ins_pipe(ialu_reg_mem);
6544 %}
6545 
6546 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6547 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6548   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6549 
6550   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6551   ins_encode %{
6552     __ movzbq($dst$$Register, $mem$$Address);
6553   %}
6554   ins_pipe(ialu_reg_mem);
6555 %}
6556 
6557 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6558 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6559   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6560   effect(KILL cr);
6561 
6562   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6563             "andl    $dst, $mask" %}
6564   ins_encode %{
6565     Register Rdst = $dst$$Register;
6566     __ movzwq(Rdst, $mem$$Address);
6567     __ andl(Rdst, $mask$$constant);
6568   %}
6569   ins_pipe(ialu_reg_mem);
6570 %}
6571 
6572 // Load Integer
6573 instruct loadI(rRegI dst, memory mem)
6574 %{
6575   match(Set dst (LoadI mem));
6576 
6577   ins_cost(125);
6578   format %{ "movl    $dst, $mem\t# int" %}
6579 
6580   ins_encode %{
6581     __ movl($dst$$Register, $mem$$Address);
6582   %}
6583 
6584   ins_pipe(ialu_reg_mem);
6585 %}
6586 
6587 // Load Integer (32 bit signed) to Byte (8 bit signed)
6588 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6589   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6590 
6591   ins_cost(125);
6592   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6593   ins_encode %{
6594     __ movsbl($dst$$Register, $mem$$Address);
6595   %}
6596   ins_pipe(ialu_reg_mem);
6597 %}
6598 
6599 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6600 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6601   match(Set dst (AndI (LoadI mem) mask));
6602 
6603   ins_cost(125);
6604   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6605   ins_encode %{
6606     __ movzbl($dst$$Register, $mem$$Address);
6607   %}
6608   ins_pipe(ialu_reg_mem);
6609 %}
6610 
6611 // Load Integer (32 bit signed) to Short (16 bit signed)
6612 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6613   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6614 
6615   ins_cost(125);
6616   format %{ "movswl  $dst, $mem\t# int -> short" %}
6617   ins_encode %{
6618     __ movswl($dst$$Register, $mem$$Address);
6619   %}
6620   ins_pipe(ialu_reg_mem);
6621 %}
6622 
6623 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6624 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6625   match(Set dst (AndI (LoadI mem) mask));
6626 
6627   ins_cost(125);
6628   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6629   ins_encode %{
6630     __ movzwl($dst$$Register, $mem$$Address);
6631   %}
6632   ins_pipe(ialu_reg_mem);
6633 %}
6634 
6635 // Load Integer into Long Register
6636 instruct loadI2L(rRegL dst, memory mem)
6637 %{
6638   match(Set dst (ConvI2L (LoadI mem)));
6639 
6640   ins_cost(125);
6641   format %{ "movslq  $dst, $mem\t# int -> long" %}
6642 
6643   ins_encode %{
6644     __ movslq($dst$$Register, $mem$$Address);
6645   %}
6646 
6647   ins_pipe(ialu_reg_mem);
6648 %}
6649 
6650 // Load Integer with mask 0xFF into Long Register
6651 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6652   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6653 
6654   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6655   ins_encode %{
6656     __ movzbq($dst$$Register, $mem$$Address);
6657   %}
6658   ins_pipe(ialu_reg_mem);
6659 %}
6660 
6661 // Load Integer with mask 0xFFFF into Long Register
6662 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6663   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6664 
6665   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6666   ins_encode %{
6667     __ movzwq($dst$$Register, $mem$$Address);
6668   %}
6669   ins_pipe(ialu_reg_mem);
6670 %}
6671 
6672 // Load Integer with a 32-bit mask into Long Register
6673 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6674   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6675   effect(KILL cr);
6676 
6677   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6678             "andl    $dst, $mask" %}
6679   ins_encode %{
6680     Register Rdst = $dst$$Register;
6681     __ movl(Rdst, $mem$$Address);
6682     __ andl(Rdst, $mask$$constant);
6683   %}
6684   ins_pipe(ialu_reg_mem);
6685 %}
6686 
6687 // Load Unsigned Integer into Long Register
6688 instruct loadUI2L(rRegL dst, memory mem)
6689 %{
6690   match(Set dst (LoadUI2L mem));
6691 
6692   ins_cost(125);
6693   format %{ "movl    $dst, $mem\t# uint -> long" %}
6694 
6695   ins_encode %{
6696     __ movl($dst$$Register, $mem$$Address);
6697   %}
6698 
6699   ins_pipe(ialu_reg_mem);
6700 %}
6701 
6702 // Load Long
6703 instruct loadL(rRegL dst, memory mem)
6704 %{
6705   match(Set dst (LoadL mem));
6706 
6707   ins_cost(125);
6708   format %{ "movq    $dst, $mem\t# long" %}
6709 
6710   ins_encode %{
6711     __ movq($dst$$Register, $mem$$Address);
6712   %}
6713 
6714   ins_pipe(ialu_reg_mem); // XXX
6715 %}
6716 
6717 // Load Range
6718 instruct loadRange(rRegI dst, memory mem)
6719 %{
6720   match(Set dst (LoadRange mem));
6721 
6722   ins_cost(125); // XXX
6723   format %{ "movl    $dst, $mem\t# range" %}
6724   opcode(0x8B);
6725   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6726   ins_pipe(ialu_reg_mem);
6727 %}
6728 
6729 // Load Pointer
6730 instruct loadP(rRegP dst, memory mem)
6731 %{
6732   match(Set dst (LoadP mem));
6733 
6734   ins_cost(125); // XXX
6735   format %{ "movq    $dst, $mem\t# ptr" %}
6736   opcode(0x8B);
6737   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6738   ins_pipe(ialu_reg_mem); // XXX
6739 %}
6740 
6741 // Load Compressed Pointer
6742 instruct loadN(rRegN dst, memory mem)
6743 %{
6744    match(Set dst (LoadN mem));
6745 
6746    ins_cost(125); // XXX
6747    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6748    ins_encode %{
6749      __ movl($dst$$Register, $mem$$Address);
6750    %}
6751    ins_pipe(ialu_reg_mem); // XXX
6752 %}
6753 
6754 
6755 // Load Klass Pointer
6756 instruct loadKlass(rRegP dst, memory mem)
6757 %{
6758   match(Set dst (LoadKlass mem));
6759 
6760   ins_cost(125); // XXX
6761   format %{ "movq    $dst, $mem\t# class" %}
6762   opcode(0x8B);
6763   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6764   ins_pipe(ialu_reg_mem); // XXX
6765 %}
6766 
6767 // Load narrow Klass Pointer
6768 instruct loadNKlass(rRegN dst, memory mem)
6769 %{
6770   match(Set dst (LoadNKlass mem));
6771 
6772   ins_cost(125); // XXX
6773   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6774   ins_encode %{
6775     __ movl($dst$$Register, $mem$$Address);
6776   %}
6777   ins_pipe(ialu_reg_mem); // XXX
6778 %}
6779 
6780 // Load Float
6781 instruct loadF(regF dst, memory mem)
6782 %{
6783   match(Set dst (LoadF mem));
6784 
6785   ins_cost(145); // XXX
6786   format %{ "movss   $dst, $mem\t# float" %}
6787   opcode(0xF3, 0x0F, 0x10);
6788   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6789   ins_pipe(pipe_slow); // XXX
6790 %}
6791 
6792 // Load Double
6793 instruct loadD_partial(regD dst, memory mem)
6794 %{
6795   predicate(!UseXmmLoadAndClearUpper);
6796   match(Set dst (LoadD mem));
6797 
6798   ins_cost(145); // XXX
6799   format %{ "movlpd  $dst, $mem\t# double" %}
6800   opcode(0x66, 0x0F, 0x12);
6801   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6802   ins_pipe(pipe_slow); // XXX
6803 %}
6804 
6805 instruct loadD(regD dst, memory mem)
6806 %{
6807   predicate(UseXmmLoadAndClearUpper);
6808   match(Set dst (LoadD mem));
6809 
6810   ins_cost(145); // XXX
6811   format %{ "movsd   $dst, $mem\t# double" %}
6812   opcode(0xF2, 0x0F, 0x10);
6813   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6814   ins_pipe(pipe_slow); // XXX
6815 %}
6816 
6817 // Load Aligned Packed Byte to XMM register
6818 instruct loadA8B(regD dst, memory mem) %{
6819   match(Set dst (Load8B mem));
6820   ins_cost(125);
6821   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6822   ins_encode( movq_ld(dst, mem));
6823   ins_pipe( pipe_slow );
6824 %}
6825 
6826 // Load Aligned Packed Short to XMM register
6827 instruct loadA4S(regD dst, memory mem) %{
6828   match(Set dst (Load4S mem));
6829   ins_cost(125);
6830   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6831   ins_encode( movq_ld(dst, mem));
6832   ins_pipe( pipe_slow );
6833 %}
6834 
6835 // Load Aligned Packed Char to XMM register
6836 instruct loadA4C(regD dst, memory mem) %{
6837   match(Set dst (Load4C mem));
6838   ins_cost(125);
6839   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6840   ins_encode( movq_ld(dst, mem));
6841   ins_pipe( pipe_slow );
6842 %}
6843 
6844 // Load Aligned Packed Integer to XMM register
6845 instruct load2IU(regD dst, memory mem) %{
6846   match(Set dst (Load2I mem));
6847   ins_cost(125);
6848   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6849   ins_encode( movq_ld(dst, mem));
6850   ins_pipe( pipe_slow );
6851 %}
6852 
6853 // Load Aligned Packed Single to XMM
6854 instruct loadA2F(regD dst, memory mem) %{
6855   match(Set dst (Load2F mem));
6856   ins_cost(145);
6857   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6858   ins_encode( movq_ld(dst, mem));
6859   ins_pipe( pipe_slow );
6860 %}
6861 
6862 // Load Effective Address
6863 instruct leaP8(rRegP dst, indOffset8 mem)
6864 %{
6865   match(Set dst mem);
6866 
6867   ins_cost(110); // XXX
6868   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6869   opcode(0x8D);
6870   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6871   ins_pipe(ialu_reg_reg_fat);
6872 %}
6873 
6874 instruct leaP32(rRegP dst, indOffset32 mem)
6875 %{
6876   match(Set dst mem);
6877 
6878   ins_cost(110);
6879   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6880   opcode(0x8D);
6881   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6882   ins_pipe(ialu_reg_reg_fat);
6883 %}
6884 
6885 // instruct leaPIdx(rRegP dst, indIndex mem)
6886 // %{
6887 //   match(Set dst mem);
6888 
6889 //   ins_cost(110);
6890 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6891 //   opcode(0x8D);
6892 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6893 //   ins_pipe(ialu_reg_reg_fat);
6894 // %}
6895 
6896 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6897 %{
6898   match(Set dst mem);
6899 
6900   ins_cost(110);
6901   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6902   opcode(0x8D);
6903   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6904   ins_pipe(ialu_reg_reg_fat);
6905 %}
6906 
6907 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6908 %{
6909   match(Set dst mem);
6910 
6911   ins_cost(110);
6912   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6913   opcode(0x8D);
6914   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6915   ins_pipe(ialu_reg_reg_fat);
6916 %}
6917 
6918 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6919 %{
6920   match(Set dst mem);
6921 
6922   ins_cost(110);
6923   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6924   opcode(0x8D);
6925   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6926   ins_pipe(ialu_reg_reg_fat);
6927 %}
6928 
6929 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6930 %{
6931   match(Set dst mem);
6932 
6933   ins_cost(110);
6934   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6935   opcode(0x8D);
6936   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6937   ins_pipe(ialu_reg_reg_fat);
6938 %}
6939 
6940 // Load Effective Address which uses Narrow (32-bits) oop
6941 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6942 %{
6943   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6944   match(Set dst mem);
6945 
6946   ins_cost(110);
6947   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6948   opcode(0x8D);
6949   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6950   ins_pipe(ialu_reg_reg_fat);
6951 %}
6952 
6953 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6954 %{
6955   predicate(Universe::narrow_oop_shift() == 0);
6956   match(Set dst mem);
6957 
6958   ins_cost(110); // XXX
6959   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6960   opcode(0x8D);
6961   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6962   ins_pipe(ialu_reg_reg_fat);
6963 %}
6964 
6965 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6966 %{
6967   predicate(Universe::narrow_oop_shift() == 0);
6968   match(Set dst mem);
6969 
6970   ins_cost(110);
6971   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6972   opcode(0x8D);
6973   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6974   ins_pipe(ialu_reg_reg_fat);
6975 %}
6976 
6977 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6978 %{
6979   predicate(Universe::narrow_oop_shift() == 0);
6980   match(Set dst mem);
6981 
6982   ins_cost(110);
6983   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6984   opcode(0x8D);
6985   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6986   ins_pipe(ialu_reg_reg_fat);
6987 %}
6988 
6989 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6990 %{
6991   predicate(Universe::narrow_oop_shift() == 0);
6992   match(Set dst mem);
6993 
6994   ins_cost(110);
6995   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6996   opcode(0x8D);
6997   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6998   ins_pipe(ialu_reg_reg_fat);
6999 %}
7000 
7001 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7002 %{
7003   predicate(Universe::narrow_oop_shift() == 0);
7004   match(Set dst mem);
7005 
7006   ins_cost(110);
7007   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
7008   opcode(0x8D);
7009   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7010   ins_pipe(ialu_reg_reg_fat);
7011 %}
7012 
7013 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7014 %{
7015   predicate(Universe::narrow_oop_shift() == 0);
7016   match(Set dst mem);
7017 
7018   ins_cost(110);
7019   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7020   opcode(0x8D);
7021   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7022   ins_pipe(ialu_reg_reg_fat);
7023 %}
7024 
7025 instruct loadConI(rRegI dst, immI src)
7026 %{
7027   match(Set dst src);
7028 
7029   format %{ "movl    $dst, $src\t# int" %}
7030   ins_encode(load_immI(dst, src));
7031   ins_pipe(ialu_reg_fat); // XXX
7032 %}
7033 
7034 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
7035 %{
7036   match(Set dst src);
7037   effect(KILL cr);
7038 
7039   ins_cost(50);
7040   format %{ "xorl    $dst, $dst\t# int" %}
7041   opcode(0x33); /* + rd */
7042   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
7043   ins_pipe(ialu_reg);
7044 %}
7045 
7046 instruct loadConL(rRegL dst, immL src)
7047 %{
7048   match(Set dst src);
7049 
7050   ins_cost(150);
7051   format %{ "movq    $dst, $src\t# long" %}
7052   ins_encode(load_immL(dst, src));
7053   ins_pipe(ialu_reg);
7054 %}
7055 
7056 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7057 %{
7058   match(Set dst src);
7059   effect(KILL cr);
7060 
7061   ins_cost(50);
7062   format %{ "xorl    $dst, $dst\t# long" %}
7063   opcode(0x33); /* + rd */
7064   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
7065   ins_pipe(ialu_reg); // XXX
7066 %}
7067 
7068 instruct loadConUL32(rRegL dst, immUL32 src)
7069 %{
7070   match(Set dst src);
7071 
7072   ins_cost(60);
7073   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
7074   ins_encode(load_immUL32(dst, src));
7075   ins_pipe(ialu_reg);
7076 %}
7077 
7078 instruct loadConL32(rRegL dst, immL32 src)
7079 %{
7080   match(Set dst src);
7081 
7082   ins_cost(70);
7083   format %{ "movq    $dst, $src\t# long (32-bit)" %}
7084   ins_encode(load_immL32(dst, src));
7085   ins_pipe(ialu_reg);
7086 %}
7087 
7088 instruct loadConP(rRegP dst, immP src)
7089 %{
7090   match(Set dst src);
7091 
7092   format %{ "movq    $dst, $src\t# ptr" %}
7093   ins_encode(load_immP(dst, src));
7094   ins_pipe(ialu_reg_fat); // XXX
7095 %}
7096 
7097 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7098 %{
7099   match(Set dst src);
7100   effect(KILL cr);
7101 
7102   ins_cost(50);
7103   format %{ "xorl    $dst, $dst\t# ptr" %}
7104   opcode(0x33); /* + rd */
7105   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
7106   ins_pipe(ialu_reg);
7107 %}
7108 
7109 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7110 %{
7111   match(Set dst src);
7112   effect(KILL cr);
7113 
7114   ins_cost(60);
7115   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
7116   ins_encode(load_immP31(dst, src));
7117   ins_pipe(ialu_reg);
7118 %}
7119 
7120 instruct loadConF(regF dst, immF src)
7121 %{
7122   match(Set dst src);
7123   ins_cost(125);
7124 
7125   format %{ "movss   $dst, [$src]" %}
7126   ins_encode(load_conF(dst, src));
7127   ins_pipe(pipe_slow);
7128 %}
7129 
7130 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7131   match(Set dst src);
7132   effect(KILL cr);
7133   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
7134   ins_encode %{
7135     __ xorq($dst$$Register, $dst$$Register);
7136   %}
7137   ins_pipe(ialu_reg);
7138 %}
7139 
7140 instruct loadConN(rRegN dst, immN src) %{
7141   match(Set dst src);
7142 
7143   ins_cost(125);
7144   format %{ "movl    $dst, $src\t# compressed ptr" %}
7145   ins_encode %{
7146     address con = (address)$src$$constant;
7147     if (con == NULL) {
7148       ShouldNotReachHere();
7149     } else {
7150       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7151     }
7152   %}
7153   ins_pipe(ialu_reg_fat); // XXX
7154 %}
7155 
7156 instruct loadConF0(regF dst, immF0 src)
7157 %{
7158   match(Set dst src);
7159   ins_cost(100);
7160 
7161   format %{ "xorps   $dst, $dst\t# float 0.0" %}
7162   opcode(0x0F, 0x57);
7163   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
7164   ins_pipe(pipe_slow);
7165 %}
7166 
7167 // Use the same format since predicate() can not be used here.
7168 instruct loadConD(regD dst, immD src)
7169 %{
7170   match(Set dst src);
7171   ins_cost(125);
7172 
7173   format %{ "movsd   $dst, [$src]" %}
7174   ins_encode(load_conD(dst, src));
7175   ins_pipe(pipe_slow);
7176 %}
7177 
7178 instruct loadConD0(regD dst, immD0 src)
7179 %{
7180   match(Set dst src);
7181   ins_cost(100);
7182 
7183   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
7184   opcode(0x66, 0x0F, 0x57);
7185   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
7186   ins_pipe(pipe_slow);
7187 %}
7188 
7189 instruct loadSSI(rRegI dst, stackSlotI src)
7190 %{
7191   match(Set dst src);
7192 
7193   ins_cost(125);
7194   format %{ "movl    $dst, $src\t# int stk" %}
7195   opcode(0x8B);
7196   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7197   ins_pipe(ialu_reg_mem);
7198 %}
7199 
7200 instruct loadSSL(rRegL dst, stackSlotL src)
7201 %{
7202   match(Set dst src);
7203 
7204   ins_cost(125);
7205   format %{ "movq    $dst, $src\t# long stk" %}
7206   opcode(0x8B);
7207   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7208   ins_pipe(ialu_reg_mem);
7209 %}
7210 
7211 instruct loadSSP(rRegP dst, stackSlotP src)
7212 %{
7213   match(Set dst src);
7214 
7215   ins_cost(125);
7216   format %{ "movq    $dst, $src\t# ptr stk" %}
7217   opcode(0x8B);
7218   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7219   ins_pipe(ialu_reg_mem);
7220 %}
7221 
7222 instruct loadSSF(regF dst, stackSlotF src)
7223 %{
7224   match(Set dst src);
7225 
7226   ins_cost(125);
7227   format %{ "movss   $dst, $src\t# float stk" %}
7228   opcode(0xF3, 0x0F, 0x10);
7229   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
7230   ins_pipe(pipe_slow); // XXX
7231 %}
7232 
7233 // Use the same format since predicate() can not be used here.
7234 instruct loadSSD(regD dst, stackSlotD src)
7235 %{
7236   match(Set dst src);
7237 
7238   ins_cost(125);
7239   format %{ "movsd   $dst, $src\t# double stk" %}
7240   ins_encode  %{
7241     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7242   %}
7243   ins_pipe(pipe_slow); // XXX
7244 %}
7245 
7246 // Prefetch instructions.
7247 // Must be safe to execute with invalid address (cannot fault).
7248 
7249 instruct prefetchr( memory mem ) %{
7250   predicate(ReadPrefetchInstr==3);
7251   match(PrefetchRead mem);
7252   ins_cost(125);
7253 
7254   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
7255   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
7256   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7257   ins_pipe(ialu_mem);
7258 %}
7259 
7260 instruct prefetchrNTA( memory mem ) %{
7261   predicate(ReadPrefetchInstr==0);
7262   match(PrefetchRead mem);
7263   ins_cost(125);
7264 
7265   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
7266   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7267   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7268   ins_pipe(ialu_mem);
7269 %}
7270 
7271 instruct prefetchrT0( memory mem ) %{
7272   predicate(ReadPrefetchInstr==1);
7273   match(PrefetchRead mem);
7274   ins_cost(125);
7275 
7276   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
7277   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
7278   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7279   ins_pipe(ialu_mem);
7280 %}
7281 
7282 instruct prefetchrT2( memory mem ) %{
7283   predicate(ReadPrefetchInstr==2);
7284   match(PrefetchRead mem);
7285   ins_cost(125);
7286 
7287   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
7288   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
7289   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
7290   ins_pipe(ialu_mem);
7291 %}
7292 
7293 instruct prefetchw( memory mem ) %{
7294   predicate(AllocatePrefetchInstr==3);
7295   match(PrefetchWrite mem);
7296   ins_cost(125);
7297 
7298   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
7299   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7300   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7301   ins_pipe(ialu_mem);
7302 %}
7303 
7304 instruct prefetchwNTA( memory mem ) %{
7305   predicate(AllocatePrefetchInstr==0);
7306   match(PrefetchWrite mem);
7307   ins_cost(125);
7308 
7309   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
7310   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7311   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
7312   ins_pipe(ialu_mem);
7313 %}
7314 
7315 instruct prefetchwT0( memory mem ) %{
7316   predicate(AllocatePrefetchInstr==1);
7317   match(PrefetchWrite mem);
7318   ins_cost(125);
7319 
7320   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
7321   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7322   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
7323   ins_pipe(ialu_mem);
7324 %}
7325 
7326 instruct prefetchwT2( memory mem ) %{
7327   predicate(AllocatePrefetchInstr==2);
7328   match(PrefetchWrite mem);
7329   ins_cost(125);
7330 
7331   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
7332   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7333   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
7334   ins_pipe(ialu_mem);
7335 %}
7336 
7337 //----------Store Instructions-------------------------------------------------
7338 
7339 // Store Byte
7340 instruct storeB(memory mem, rRegI src)
7341 %{
7342   match(Set mem (StoreB mem src));
7343 
7344   ins_cost(125); // XXX
7345   format %{ "movb    $mem, $src\t# byte" %}
7346   opcode(0x88);
7347   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
7348   ins_pipe(ialu_mem_reg);
7349 %}
7350 
7351 // Store Char/Short
7352 instruct storeC(memory mem, rRegI src)
7353 %{
7354   match(Set mem (StoreC mem src));
7355 
7356   ins_cost(125); // XXX
7357   format %{ "movw    $mem, $src\t# char/short" %}
7358   opcode(0x89);
7359   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
7360   ins_pipe(ialu_mem_reg);
7361 %}
7362 
7363 // Store Integer
7364 instruct storeI(memory mem, rRegI src)
7365 %{
7366   match(Set mem (StoreI mem src));
7367 
7368   ins_cost(125); // XXX
7369   format %{ "movl    $mem, $src\t# int" %}
7370   opcode(0x89);
7371   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
7372   ins_pipe(ialu_mem_reg);
7373 %}
7374 
7375 // Store Long
7376 instruct storeL(memory mem, rRegL src)
7377 %{
7378   match(Set mem (StoreL mem src));
7379 
7380   ins_cost(125); // XXX
7381   format %{ "movq    $mem, $src\t# long" %}
7382   opcode(0x89);
7383   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
7384   ins_pipe(ialu_mem_reg); // XXX
7385 %}
7386 
7387 // Store Pointer
7388 instruct storeP(memory mem, any_RegP src)
7389 %{
7390   match(Set mem (StoreP mem src));
7391 
7392   ins_cost(125); // XXX
7393   format %{ "movq    $mem, $src\t# ptr" %}
7394   opcode(0x89);
7395   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
7396   ins_pipe(ialu_mem_reg);
7397 %}
7398 
7399 instruct storeImmP0(memory mem, immP0 zero)
7400 %{
7401   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7402   match(Set mem (StoreP mem zero));
7403 
7404   ins_cost(125); // XXX
7405   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
7406   ins_encode %{
7407     __ movq($mem$$Address, r12);
7408   %}
7409   ins_pipe(ialu_mem_reg);
7410 %}
7411 
7412 // Store NULL Pointer, mark word, or other simple pointer constant.
7413 instruct storeImmP(memory mem, immP31 src)
7414 %{
7415   match(Set mem (StoreP mem src));
7416 
7417   ins_cost(150); // XXX
7418   format %{ "movq    $mem, $src\t# ptr" %}
7419   opcode(0xC7); /* C7 /0 */
7420   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7421   ins_pipe(ialu_mem_imm);
7422 %}
7423 
7424 // Store Compressed Pointer
7425 instruct storeN(memory mem, rRegN src)
7426 %{
7427   match(Set mem (StoreN mem src));
7428 
7429   ins_cost(125); // XXX
7430   format %{ "movl    $mem, $src\t# compressed ptr" %}
7431   ins_encode %{
7432     __ movl($mem$$Address, $src$$Register);
7433   %}
7434   ins_pipe(ialu_mem_reg);
7435 %}
7436 
7437 instruct storeImmN0(memory mem, immN0 zero)
7438 %{
7439   predicate(Universe::narrow_oop_base() == NULL);
7440   match(Set mem (StoreN mem zero));
7441 
7442   ins_cost(125); // XXX
7443   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7444   ins_encode %{
7445     __ movl($mem$$Address, r12);
7446   %}
7447   ins_pipe(ialu_mem_reg);
7448 %}
7449 
7450 instruct storeImmN(memory mem, immN src)
7451 %{
7452   match(Set mem (StoreN mem src));
7453 
7454   ins_cost(150); // XXX
7455   format %{ "movl    $mem, $src\t# compressed ptr" %}
7456   ins_encode %{
7457     address con = (address)$src$$constant;
7458     if (con == NULL) {
7459       __ movl($mem$$Address, (int32_t)0);
7460     } else {
7461       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7462     }
7463   %}
7464   ins_pipe(ialu_mem_imm);
7465 %}
7466 
7467 // Store Integer Immediate
7468 instruct storeImmI0(memory mem, immI0 zero)
7469 %{
7470   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7471   match(Set mem (StoreI mem zero));
7472 
7473   ins_cost(125); // XXX
7474   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7475   ins_encode %{
7476     __ movl($mem$$Address, r12);
7477   %}
7478   ins_pipe(ialu_mem_reg);
7479 %}
7480 
7481 instruct storeImmI(memory mem, immI src)
7482 %{
7483   match(Set mem (StoreI mem src));
7484 
7485   ins_cost(150);
7486   format %{ "movl    $mem, $src\t# int" %}
7487   opcode(0xC7); /* C7 /0 */
7488   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7489   ins_pipe(ialu_mem_imm);
7490 %}
7491 
7492 // Store Long Immediate
7493 instruct storeImmL0(memory mem, immL0 zero)
7494 %{
7495   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7496   match(Set mem (StoreL mem zero));
7497 
7498   ins_cost(125); // XXX
7499   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7500   ins_encode %{
7501     __ movq($mem$$Address, r12);
7502   %}
7503   ins_pipe(ialu_mem_reg);
7504 %}
7505 
7506 instruct storeImmL(memory mem, immL32 src)
7507 %{
7508   match(Set mem (StoreL mem src));
7509 
7510   ins_cost(150);
7511   format %{ "movq    $mem, $src\t# long" %}
7512   opcode(0xC7); /* C7 /0 */
7513   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7514   ins_pipe(ialu_mem_imm);
7515 %}
7516 
7517 // Store Short/Char Immediate
7518 instruct storeImmC0(memory mem, immI0 zero)
7519 %{
7520   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7521   match(Set mem (StoreC mem zero));
7522 
7523   ins_cost(125); // XXX
7524   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7525   ins_encode %{
7526     __ movw($mem$$Address, r12);
7527   %}
7528   ins_pipe(ialu_mem_reg);
7529 %}
7530 
7531 instruct storeImmI16(memory mem, immI16 src)
7532 %{
7533   predicate(UseStoreImmI16);
7534   match(Set mem (StoreC mem src));
7535 
7536   ins_cost(150);
7537   format %{ "movw    $mem, $src\t# short/char" %}
7538   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7539   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7540   ins_pipe(ialu_mem_imm);
7541 %}
7542 
7543 // Store Byte Immediate
7544 instruct storeImmB0(memory mem, immI0 zero)
7545 %{
7546   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7547   match(Set mem (StoreB mem zero));
7548 
7549   ins_cost(125); // XXX
7550   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7551   ins_encode %{
7552     __ movb($mem$$Address, r12);
7553   %}
7554   ins_pipe(ialu_mem_reg);
7555 %}
7556 
7557 instruct storeImmB(memory mem, immI8 src)
7558 %{
7559   match(Set mem (StoreB mem src));
7560 
7561   ins_cost(150); // XXX
7562   format %{ "movb    $mem, $src\t# byte" %}
7563   opcode(0xC6); /* C6 /0 */
7564   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7565   ins_pipe(ialu_mem_imm);
7566 %}
7567 
7568 // Store Aligned Packed Byte XMM register to memory
7569 instruct storeA8B(memory mem, regD src) %{
7570   match(Set mem (Store8B mem src));
7571   ins_cost(145);
7572   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7573   ins_encode( movq_st(mem, src));
7574   ins_pipe( pipe_slow );
7575 %}
7576 
7577 // Store Aligned Packed Char/Short XMM register to memory
7578 instruct storeA4C(memory mem, regD src) %{
7579   match(Set mem (Store4C mem src));
7580   ins_cost(145);
7581   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7582   ins_encode( movq_st(mem, src));
7583   ins_pipe( pipe_slow );
7584 %}
7585 
7586 // Store Aligned Packed Integer XMM register to memory
7587 instruct storeA2I(memory mem, regD src) %{
7588   match(Set mem (Store2I mem src));
7589   ins_cost(145);
7590   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7591   ins_encode( movq_st(mem, src));
7592   ins_pipe( pipe_slow );
7593 %}
7594 
7595 // Store CMS card-mark Immediate
7596 instruct storeImmCM0_reg(memory mem, immI0 zero)
7597 %{
7598   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7599   match(Set mem (StoreCM mem zero));
7600 
7601   ins_cost(125); // XXX
7602   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7603   ins_encode %{
7604     __ movb($mem$$Address, r12);
7605   %}
7606   ins_pipe(ialu_mem_reg);
7607 %}
7608 
7609 instruct storeImmCM0(memory mem, immI0 src)
7610 %{
7611   match(Set mem (StoreCM mem src));
7612 
7613   ins_cost(150); // XXX
7614   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7615   opcode(0xC6); /* C6 /0 */
7616   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7617   ins_pipe(ialu_mem_imm);
7618 %}
7619 
7620 // Store Aligned Packed Single Float XMM register to memory
7621 instruct storeA2F(memory mem, regD src) %{
7622   match(Set mem (Store2F mem src));
7623   ins_cost(145);
7624   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7625   ins_encode( movq_st(mem, src));
7626   ins_pipe( pipe_slow );
7627 %}
7628 
7629 // Store Float
7630 instruct storeF(memory mem, regF src)
7631 %{
7632   match(Set mem (StoreF mem src));
7633 
7634   ins_cost(95); // XXX
7635   format %{ "movss   $mem, $src\t# float" %}
7636   opcode(0xF3, 0x0F, 0x11);
7637   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7638   ins_pipe(pipe_slow); // XXX
7639 %}
7640 
7641 // Store immediate Float value (it is faster than store from XMM register)
7642 instruct storeF0(memory mem, immF0 zero)
7643 %{
7644   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7645   match(Set mem (StoreF mem zero));
7646 
7647   ins_cost(25); // XXX
7648   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7649   ins_encode %{
7650     __ movl($mem$$Address, r12);
7651   %}
7652   ins_pipe(ialu_mem_reg);
7653 %}
7654 
7655 instruct storeF_imm(memory mem, immF src)
7656 %{
7657   match(Set mem (StoreF mem src));
7658 
7659   ins_cost(50);
7660   format %{ "movl    $mem, $src\t# float" %}
7661   opcode(0xC7); /* C7 /0 */
7662   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7663   ins_pipe(ialu_mem_imm);
7664 %}
7665 
7666 // Store Double
7667 instruct storeD(memory mem, regD src)
7668 %{
7669   match(Set mem (StoreD mem src));
7670 
7671   ins_cost(95); // XXX
7672   format %{ "movsd   $mem, $src\t# double" %}
7673   opcode(0xF2, 0x0F, 0x11);
7674   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7675   ins_pipe(pipe_slow); // XXX
7676 %}
7677 
7678 // Store immediate double 0.0 (it is faster than store from XMM register)
7679 instruct storeD0_imm(memory mem, immD0 src)
7680 %{
7681   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7682   match(Set mem (StoreD mem src));
7683 
7684   ins_cost(50);
7685   format %{ "movq    $mem, $src\t# double 0." %}
7686   opcode(0xC7); /* C7 /0 */
7687   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7688   ins_pipe(ialu_mem_imm);
7689 %}
7690 
7691 instruct storeD0(memory mem, immD0 zero)
7692 %{
7693   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7694   match(Set mem (StoreD mem zero));
7695 
7696   ins_cost(25); // XXX
7697   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7698   ins_encode %{
7699     __ movq($mem$$Address, r12);
7700   %}
7701   ins_pipe(ialu_mem_reg);
7702 %}
7703 
7704 instruct storeSSI(stackSlotI dst, rRegI src)
7705 %{
7706   match(Set dst src);
7707 
7708   ins_cost(100);
7709   format %{ "movl    $dst, $src\t# int stk" %}
7710   opcode(0x89);
7711   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7712   ins_pipe( ialu_mem_reg );
7713 %}
7714 
7715 instruct storeSSL(stackSlotL dst, rRegL src)
7716 %{
7717   match(Set dst src);
7718 
7719   ins_cost(100);
7720   format %{ "movq    $dst, $src\t# long stk" %}
7721   opcode(0x89);
7722   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7723   ins_pipe(ialu_mem_reg);
7724 %}
7725 
7726 instruct storeSSP(stackSlotP dst, rRegP src)
7727 %{
7728   match(Set dst src);
7729 
7730   ins_cost(100);
7731   format %{ "movq    $dst, $src\t# ptr stk" %}
7732   opcode(0x89);
7733   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7734   ins_pipe(ialu_mem_reg);
7735 %}
7736 
7737 instruct storeSSF(stackSlotF dst, regF src)
7738 %{
7739   match(Set dst src);
7740 
7741   ins_cost(95); // XXX
7742   format %{ "movss   $dst, $src\t# float stk" %}
7743   opcode(0xF3, 0x0F, 0x11);
7744   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7745   ins_pipe(pipe_slow); // XXX
7746 %}
7747 
7748 instruct storeSSD(stackSlotD dst, regD src)
7749 %{
7750   match(Set dst src);
7751 
7752   ins_cost(95); // XXX
7753   format %{ "movsd   $dst, $src\t# double stk" %}
7754   opcode(0xF2, 0x0F, 0x11);
7755   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7756   ins_pipe(pipe_slow); // XXX
7757 %}
7758 
7759 //----------BSWAP Instructions-------------------------------------------------
7760 instruct bytes_reverse_int(rRegI dst) %{
7761   match(Set dst (ReverseBytesI dst));
7762 
7763   format %{ "bswapl  $dst" %}
7764   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7765   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7766   ins_pipe( ialu_reg );
7767 %}
7768 
7769 instruct bytes_reverse_long(rRegL dst) %{
7770   match(Set dst (ReverseBytesL dst));
7771 
7772   format %{ "bswapq  $dst" %}
7773 
7774   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7775   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7776   ins_pipe( ialu_reg);
7777 %}
7778 
7779 instruct loadI_reversed(rRegI dst, memory src) %{
7780   match(Set dst (ReverseBytesI (LoadI src)));
7781 
7782   format %{ "bswap_movl $dst, $src" %}
7783   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7784   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7785   ins_pipe( ialu_reg_mem );
7786 %}
7787 
7788 instruct loadL_reversed(rRegL dst, memory src) %{
7789   match(Set dst (ReverseBytesL (LoadL src)));
7790 
7791   format %{ "bswap_movq $dst, $src" %}
7792   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7793   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7794   ins_pipe( ialu_reg_mem );
7795 %}
7796 
7797 instruct storeI_reversed(memory dst, rRegI src) %{
7798   match(Set dst (StoreI dst (ReverseBytesI  src)));
7799 
7800   format %{ "movl_bswap $dst, $src" %}
7801   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7802   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7803   ins_pipe( ialu_mem_reg );
7804 %}
7805 
7806 instruct storeL_reversed(memory dst, rRegL src) %{
7807   match(Set dst (StoreL dst (ReverseBytesL  src)));
7808 
7809   format %{ "movq_bswap $dst, $src" %}
7810   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7811   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7812   ins_pipe( ialu_mem_reg );
7813 %}
7814 
7815 
7816 //---------- Zeros Count Instructions ------------------------------------------
7817 
7818 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7819   predicate(UseCountLeadingZerosInstruction);
7820   match(Set dst (CountLeadingZerosI src));
7821   effect(KILL cr);
7822 
7823   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7824   ins_encode %{
7825     __ lzcntl($dst$$Register, $src$$Register);
7826   %}
7827   ins_pipe(ialu_reg);
7828 %}
7829 
7830 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7831   predicate(!UseCountLeadingZerosInstruction);
7832   match(Set dst (CountLeadingZerosI src));
7833   effect(KILL cr);
7834 
7835   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7836             "jnz     skip\n\t"
7837             "movl    $dst, -1\n"
7838       "skip:\n\t"
7839             "negl    $dst\n\t"
7840             "addl    $dst, 31" %}
7841   ins_encode %{
7842     Register Rdst = $dst$$Register;
7843     Register Rsrc = $src$$Register;
7844     Label skip;
7845     __ bsrl(Rdst, Rsrc);
7846     __ jccb(Assembler::notZero, skip);
7847     __ movl(Rdst, -1);
7848     __ bind(skip);
7849     __ negl(Rdst);
7850     __ addl(Rdst, BitsPerInt - 1);
7851   %}
7852   ins_pipe(ialu_reg);
7853 %}
7854 
7855 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7856   predicate(UseCountLeadingZerosInstruction);
7857   match(Set dst (CountLeadingZerosL src));
7858   effect(KILL cr);
7859 
7860   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7861   ins_encode %{
7862     __ lzcntq($dst$$Register, $src$$Register);
7863   %}
7864   ins_pipe(ialu_reg);
7865 %}
7866 
7867 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7868   predicate(!UseCountLeadingZerosInstruction);
7869   match(Set dst (CountLeadingZerosL src));
7870   effect(KILL cr);
7871 
7872   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7873             "jnz     skip\n\t"
7874             "movl    $dst, -1\n"
7875       "skip:\n\t"
7876             "negl    $dst\n\t"
7877             "addl    $dst, 63" %}
7878   ins_encode %{
7879     Register Rdst = $dst$$Register;
7880     Register Rsrc = $src$$Register;
7881     Label skip;
7882     __ bsrq(Rdst, Rsrc);
7883     __ jccb(Assembler::notZero, skip);
7884     __ movl(Rdst, -1);
7885     __ bind(skip);
7886     __ negl(Rdst);
7887     __ addl(Rdst, BitsPerLong - 1);
7888   %}
7889   ins_pipe(ialu_reg);
7890 %}
7891 
7892 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7893   match(Set dst (CountTrailingZerosI src));
7894   effect(KILL cr);
7895 
7896   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7897             "jnz     done\n\t"
7898             "movl    $dst, 32\n"
7899       "done:" %}
7900   ins_encode %{
7901     Register Rdst = $dst$$Register;
7902     Label done;
7903     __ bsfl(Rdst, $src$$Register);
7904     __ jccb(Assembler::notZero, done);
7905     __ movl(Rdst, BitsPerInt);
7906     __ bind(done);
7907   %}
7908   ins_pipe(ialu_reg);
7909 %}
7910 
7911 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7912   match(Set dst (CountTrailingZerosL src));
7913   effect(KILL cr);
7914 
7915   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7916             "jnz     done\n\t"
7917             "movl    $dst, 64\n"
7918       "done:" %}
7919   ins_encode %{
7920     Register Rdst = $dst$$Register;
7921     Label done;
7922     __ bsfq(Rdst, $src$$Register);
7923     __ jccb(Assembler::notZero, done);
7924     __ movl(Rdst, BitsPerLong);
7925     __ bind(done);
7926   %}
7927   ins_pipe(ialu_reg);
7928 %}
7929 
7930 
7931 //---------- Population Count Instructions -------------------------------------
7932 
7933 instruct popCountI(rRegI dst, rRegI src) %{
7934   predicate(UsePopCountInstruction);
7935   match(Set dst (PopCountI src));
7936 
7937   format %{ "popcnt  $dst, $src" %}
7938   ins_encode %{
7939     __ popcntl($dst$$Register, $src$$Register);
7940   %}
7941   ins_pipe(ialu_reg);
7942 %}
7943 
7944 instruct popCountI_mem(rRegI dst, memory mem) %{
7945   predicate(UsePopCountInstruction);
7946   match(Set dst (PopCountI (LoadI mem)));
7947 
7948   format %{ "popcnt  $dst, $mem" %}
7949   ins_encode %{
7950     __ popcntl($dst$$Register, $mem$$Address);
7951   %}
7952   ins_pipe(ialu_reg);
7953 %}
7954 
7955 // Note: Long.bitCount(long) returns an int.
7956 instruct popCountL(rRegI dst, rRegL src) %{
7957   predicate(UsePopCountInstruction);
7958   match(Set dst (PopCountL src));
7959 
7960   format %{ "popcnt  $dst, $src" %}
7961   ins_encode %{
7962     __ popcntq($dst$$Register, $src$$Register);
7963   %}
7964   ins_pipe(ialu_reg);
7965 %}
7966 
7967 // Note: Long.bitCount(long) returns an int.
7968 instruct popCountL_mem(rRegI dst, memory mem) %{
7969   predicate(UsePopCountInstruction);
7970   match(Set dst (PopCountL (LoadL mem)));
7971 
7972   format %{ "popcnt  $dst, $mem" %}
7973   ins_encode %{
7974     __ popcntq($dst$$Register, $mem$$Address);
7975   %}
7976   ins_pipe(ialu_reg);
7977 %}
7978 
7979 
7980 //----------MemBar Instructions-----------------------------------------------
7981 // Memory barrier flavors
7982 
7983 instruct membar_acquire()
7984 %{
7985   match(MemBarAcquire);
7986   ins_cost(0);
7987 
7988   size(0);
7989   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7990   ins_encode();
7991   ins_pipe(empty);
7992 %}
7993 
7994 instruct membar_acquire_lock()
7995 %{
7996   match(MemBarAcquire);
7997   predicate(Matcher::prior_fast_lock(n));
7998   ins_cost(0);
7999 
8000   size(0);
8001   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8002   ins_encode();
8003   ins_pipe(empty);
8004 %}
8005 
8006 instruct membar_release()
8007 %{
8008   match(MemBarRelease);
8009   ins_cost(0);
8010 
8011   size(0);
8012   format %{ "MEMBAR-release ! (empty encoding)" %}
8013   ins_encode();
8014   ins_pipe(empty);
8015 %}
8016 
8017 instruct membar_release_lock()
8018 %{
8019   match(MemBarRelease);
8020   predicate(Matcher::post_fast_unlock(n));
8021   ins_cost(0);
8022 
8023   size(0);
8024   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8025   ins_encode();
8026   ins_pipe(empty);
8027 %}
8028 
8029 instruct membar_volatile(rFlagsReg cr) %{
8030   match(MemBarVolatile);
8031   effect(KILL cr);
8032   ins_cost(400);
8033 
8034   format %{ 
8035     $$template
8036     if (os::is_MP()) {
8037       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8038     } else {
8039       $$emit$$"MEMBAR-volatile ! (empty encoding)"
8040     }
8041   %}
8042   ins_encode %{
8043     __ membar(Assembler::StoreLoad);
8044   %}
8045   ins_pipe(pipe_slow);
8046 %}
8047 
8048 instruct unnecessary_membar_volatile()
8049 %{
8050   match(MemBarVolatile);
8051   predicate(Matcher::post_store_load_barrier(n));
8052   ins_cost(0);
8053 
8054   size(0);
8055   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8056   ins_encode();
8057   ins_pipe(empty);
8058 %}
8059 
8060 //----------Move Instructions--------------------------------------------------
8061 
8062 instruct castX2P(rRegP dst, rRegL src)
8063 %{
8064   match(Set dst (CastX2P src));
8065 
8066   format %{ "movq    $dst, $src\t# long->ptr" %}
8067   ins_encode(enc_copy_wide(dst, src));
8068   ins_pipe(ialu_reg_reg); // XXX
8069 %}
8070 
8071 instruct castP2X(rRegL dst, rRegP src)
8072 %{
8073   match(Set dst (CastP2X src));
8074 
8075   format %{ "movq    $dst, $src\t# ptr -> long" %}
8076   ins_encode(enc_copy_wide(dst, src));
8077   ins_pipe(ialu_reg_reg); // XXX
8078 %}
8079 
8080 
8081 // Convert oop pointer into compressed form
8082 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8083   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8084   match(Set dst (EncodeP src));
8085   effect(KILL cr);
8086   format %{ "encode_heap_oop $dst,$src" %}
8087   ins_encode %{
8088     Register s = $src$$Register;
8089     Register d = $dst$$Register;
8090     if (s != d) {
8091       __ movq(d, s);
8092     }
8093     __ encode_heap_oop(d);
8094   %}
8095   ins_pipe(ialu_reg_long);
8096 %}
8097 
8098 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8099   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8100   match(Set dst (EncodeP src));
8101   effect(KILL cr);
8102   format %{ "encode_heap_oop_not_null $dst,$src" %}
8103   ins_encode %{
8104     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8105   %}
8106   ins_pipe(ialu_reg_long);
8107 %}
8108 
8109 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8110   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
8111             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
8112   match(Set dst (DecodeN src));
8113   effect(KILL cr);
8114   format %{ "decode_heap_oop $dst,$src" %}
8115   ins_encode %{
8116     Register s = $src$$Register;
8117     Register d = $dst$$Register;
8118     if (s != d) {
8119       __ movq(d, s);
8120     }
8121     __ decode_heap_oop(d);
8122   %}
8123   ins_pipe(ialu_reg_long);
8124 %}
8125 
8126 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
8127   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
8128             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
8129   match(Set dst (DecodeN src));
8130   format %{ "decode_heap_oop_not_null $dst,$src" %}
8131   ins_encode %{
8132     Register s = $src$$Register;
8133     Register d = $dst$$Register;
8134     if (s != d) {
8135       __ decode_heap_oop_not_null(d, s);
8136     } else {
8137       __ decode_heap_oop_not_null(d);
8138     }
8139   %}
8140   ins_pipe(ialu_reg_long);
8141 %}
8142 
8143 
8144 //----------Conditional Move---------------------------------------------------
8145 // Jump
8146 // dummy instruction for generating temp registers
8147 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8148   match(Jump (LShiftL switch_val shift));
8149   ins_cost(350);
8150   predicate(false);
8151   effect(TEMP dest);
8152 
8153   format %{ "leaq    $dest, table_base\n\t"
8154             "jmp     [$dest + $switch_val << $shift]\n\t" %}
8155   ins_encode(jump_enc_offset(switch_val, shift, dest));
8156   ins_pipe(pipe_jmp);
8157   ins_pc_relative(1);
8158 %}
8159 
8160 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8161   match(Jump (AddL (LShiftL switch_val shift) offset));
8162   ins_cost(350);
8163   effect(TEMP dest);
8164 
8165   format %{ "leaq    $dest, table_base\n\t"
8166             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
8167   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
8168   ins_pipe(pipe_jmp);
8169   ins_pc_relative(1);
8170 %}
8171 
8172 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
8173   match(Jump switch_val);
8174   ins_cost(350);
8175   effect(TEMP dest);
8176 
8177   format %{ "leaq    $dest, table_base\n\t"
8178             "jmp     [$dest + $switch_val]\n\t" %}
8179   ins_encode(jump_enc(switch_val, dest));
8180   ins_pipe(pipe_jmp);
8181   ins_pc_relative(1);
8182 %}
8183 
8184 // Conditional move
8185 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
8186 %{
8187   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8188 
8189   ins_cost(200); // XXX
8190   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8191   opcode(0x0F, 0x40);
8192   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8193   ins_pipe(pipe_cmov_reg);
8194 %}
8195 
8196 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
8197   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8198 
8199   ins_cost(200); // XXX
8200   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
8201   opcode(0x0F, 0x40);
8202   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8203   ins_pipe(pipe_cmov_reg);
8204 %}
8205 
8206 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
8207   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8208   ins_cost(200);
8209   expand %{
8210     cmovI_regU(cop, cr, dst, src);
8211   %}
8212 %}
8213 
8214 // Conditional move
8215 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
8216   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8217 
8218   ins_cost(250); // XXX
8219   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8220   opcode(0x0F, 0x40);
8221   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
8222   ins_pipe(pipe_cmov_mem);
8223 %}
8224 
8225 // Conditional move
8226 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
8227 %{
8228   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8229 
8230   ins_cost(250); // XXX
8231   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
8232   opcode(0x0F, 0x40);
8233   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
8234   ins_pipe(pipe_cmov_mem);
8235 %}
8236 
8237 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
8238   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
8239   ins_cost(250);
8240   expand %{
8241     cmovI_memU(cop, cr, dst, src);
8242   %}
8243 %}
8244 
8245 // Conditional move
8246 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
8247 %{
8248   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8249 
8250   ins_cost(200); // XXX
8251   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
8252   opcode(0x0F, 0x40);
8253   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8254   ins_pipe(pipe_cmov_reg);
8255 %}
8256 
8257 // Conditional move
8258 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
8259 %{
8260   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8261 
8262   ins_cost(200); // XXX
8263   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
8264   opcode(0x0F, 0x40);
8265   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
8266   ins_pipe(pipe_cmov_reg);
8267 %}
8268 
8269 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
8270   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
8271   ins_cost(200);
8272   expand %{
8273     cmovN_regU(cop, cr, dst, src);
8274   %}
8275 %}
8276 
8277 // Conditional move
8278 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
8279 %{
8280   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8281 
8282   ins_cost(200); // XXX
8283   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
8284   opcode(0x0F, 0x40);
8285   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8286   ins_pipe(pipe_cmov_reg);  // XXX
8287 %}
8288 
8289 // Conditional move
8290 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
8291 %{
8292   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8293 
8294   ins_cost(200); // XXX
8295   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
8296   opcode(0x0F, 0x40);
8297   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8298   ins_pipe(pipe_cmov_reg); // XXX
8299 %}
8300 
8301 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
8302   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
8303   ins_cost(200);
8304   expand %{
8305     cmovP_regU(cop, cr, dst, src);
8306   %}
8307 %}
8308 
8309 // DISABLED: Requires the ADLC to emit a bottom_type call that
8310 // correctly meets the two pointer arguments; one is an incoming
8311 // register but the other is a memory operand.  ALSO appears to
8312 // be buggy with implicit null checks.
8313 //
8314 //// Conditional move
8315 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
8316 //%{
8317 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8318 //  ins_cost(250);
8319 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8320 //  opcode(0x0F,0x40);
8321 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
8322 //  ins_pipe( pipe_cmov_mem );
8323 //%}
8324 //
8325 //// Conditional move
8326 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
8327 //%{
8328 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8329 //  ins_cost(250);
8330 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8331 //  opcode(0x0F,0x40);
8332 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
8333 //  ins_pipe( pipe_cmov_mem );
8334 //%}
8335 
8336 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
8337 %{
8338   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8339 
8340   ins_cost(200); // XXX
8341   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
8342   opcode(0x0F, 0x40);
8343   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8344   ins_pipe(pipe_cmov_reg);  // XXX
8345 %}
8346 
8347 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
8348 %{
8349   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8350 
8351   ins_cost(200); // XXX
8352   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
8353   opcode(0x0F, 0x40);
8354   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8355   ins_pipe(pipe_cmov_mem);  // XXX
8356 %}
8357 
8358 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
8359 %{
8360   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8361 
8362   ins_cost(200); // XXX
8363   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8364   opcode(0x0F, 0x40);
8365   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
8366   ins_pipe(pipe_cmov_reg); // XXX
8367 %}
8368 
8369 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
8370   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8371   ins_cost(200);
8372   expand %{
8373     cmovL_regU(cop, cr, dst, src);
8374   %}
8375 %}
8376 
8377 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
8378 %{
8379   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8380 
8381   ins_cost(200); // XXX
8382   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8383   opcode(0x0F, 0x40);
8384   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8385   ins_pipe(pipe_cmov_mem); // XXX
8386 %}
8387 
8388 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
8389   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8390   ins_cost(200);
8391   expand %{
8392     cmovL_memU(cop, cr, dst, src);
8393   %}
8394 %}
8395 
8396 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
8397 %{
8398   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8399 
8400   ins_cost(200); // XXX
8401   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8402             "movss     $dst, $src\n"
8403     "skip:" %}
8404   ins_encode(enc_cmovf_branch(cop, dst, src));
8405   ins_pipe(pipe_slow);
8406 %}
8407 
8408 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
8409 // %{
8410 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
8411 
8412 //   ins_cost(200); // XXX
8413 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8414 //             "movss     $dst, $src\n"
8415 //     "skip:" %}
8416 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
8417 //   ins_pipe(pipe_slow);
8418 // %}
8419 
8420 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
8421 %{
8422   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8423 
8424   ins_cost(200); // XXX
8425   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
8426             "movss     $dst, $src\n"
8427     "skip:" %}
8428   ins_encode(enc_cmovf_branch(cop, dst, src));
8429   ins_pipe(pipe_slow);
8430 %}
8431 
8432 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
8433   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8434   ins_cost(200);
8435   expand %{
8436     cmovF_regU(cop, cr, dst, src);
8437   %}
8438 %}
8439 
8440 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
8441 %{
8442   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8443 
8444   ins_cost(200); // XXX
8445   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8446             "movsd     $dst, $src\n"
8447     "skip:" %}
8448   ins_encode(enc_cmovd_branch(cop, dst, src));
8449   ins_pipe(pipe_slow);
8450 %}
8451 
8452 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8453 %{
8454   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8455 
8456   ins_cost(200); // XXX
8457   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8458             "movsd     $dst, $src\n"
8459     "skip:" %}
8460   ins_encode(enc_cmovd_branch(cop, dst, src));
8461   ins_pipe(pipe_slow);
8462 %}
8463 
8464 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8465   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8466   ins_cost(200);
8467   expand %{
8468     cmovD_regU(cop, cr, dst, src);
8469   %}
8470 %}
8471 
8472 //----------Arithmetic Instructions--------------------------------------------
8473 //----------Addition Instructions----------------------------------------------
8474 
8475 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8476 %{
8477   match(Set dst (AddI dst src));
8478   effect(KILL cr);
8479 
8480   format %{ "addl    $dst, $src\t# int" %}
8481   opcode(0x03);
8482   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8483   ins_pipe(ialu_reg_reg);
8484 %}
8485 
8486 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8487 %{
8488   match(Set dst (AddI dst src));
8489   effect(KILL cr);
8490 
8491   format %{ "addl    $dst, $src\t# int" %}
8492   opcode(0x81, 0x00); /* /0 id */
8493   ins_encode(OpcSErm(dst, src), Con8or32(src));
8494   ins_pipe( ialu_reg );
8495 %}
8496 
8497 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8498 %{
8499   match(Set dst (AddI dst (LoadI src)));
8500   effect(KILL cr);
8501 
8502   ins_cost(125); // XXX
8503   format %{ "addl    $dst, $src\t# int" %}
8504   opcode(0x03);
8505   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8506   ins_pipe(ialu_reg_mem);
8507 %}
8508 
8509 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8510 %{
8511   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8512   effect(KILL cr);
8513 
8514   ins_cost(150); // XXX
8515   format %{ "addl    $dst, $src\t# int" %}
8516   opcode(0x01); /* Opcode 01 /r */
8517   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8518   ins_pipe(ialu_mem_reg);
8519 %}
8520 
8521 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8522 %{
8523   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8524   effect(KILL cr);
8525 
8526   ins_cost(125); // XXX
8527   format %{ "addl    $dst, $src\t# int" %}
8528   opcode(0x81); /* Opcode 81 /0 id */
8529   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8530   ins_pipe(ialu_mem_imm);
8531 %}
8532 
8533 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8534 %{
8535   predicate(UseIncDec);
8536   match(Set dst (AddI dst src));
8537   effect(KILL cr);
8538 
8539   format %{ "incl    $dst\t# int" %}
8540   opcode(0xFF, 0x00); // FF /0
8541   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8542   ins_pipe(ialu_reg);
8543 %}
8544 
8545 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8546 %{
8547   predicate(UseIncDec);
8548   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8549   effect(KILL cr);
8550 
8551   ins_cost(125); // XXX
8552   format %{ "incl    $dst\t# int" %}
8553   opcode(0xFF); /* Opcode FF /0 */
8554   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8555   ins_pipe(ialu_mem_imm);
8556 %}
8557 
8558 // XXX why does that use AddI
8559 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8560 %{
8561   predicate(UseIncDec);
8562   match(Set dst (AddI dst src));
8563   effect(KILL cr);
8564 
8565   format %{ "decl    $dst\t# int" %}
8566   opcode(0xFF, 0x01); // FF /1
8567   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8568   ins_pipe(ialu_reg);
8569 %}
8570 
8571 // XXX why does that use AddI
8572 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8573 %{
8574   predicate(UseIncDec);
8575   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8576   effect(KILL cr);
8577 
8578   ins_cost(125); // XXX
8579   format %{ "decl    $dst\t# int" %}
8580   opcode(0xFF); /* Opcode FF /1 */
8581   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8582   ins_pipe(ialu_mem_imm);
8583 %}
8584 
8585 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8586 %{
8587   match(Set dst (AddI src0 src1));
8588 
8589   ins_cost(110);
8590   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8591   opcode(0x8D); /* 0x8D /r */
8592   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8593   ins_pipe(ialu_reg_reg);
8594 %}
8595 
8596 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8597 %{
8598   match(Set dst (AddL dst src));
8599   effect(KILL cr);
8600 
8601   format %{ "addq    $dst, $src\t# long" %}
8602   opcode(0x03);
8603   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8604   ins_pipe(ialu_reg_reg);
8605 %}
8606 
8607 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8608 %{
8609   match(Set dst (AddL dst src));
8610   effect(KILL cr);
8611 
8612   format %{ "addq    $dst, $src\t# long" %}
8613   opcode(0x81, 0x00); /* /0 id */
8614   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8615   ins_pipe( ialu_reg );
8616 %}
8617 
8618 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8619 %{
8620   match(Set dst (AddL dst (LoadL src)));
8621   effect(KILL cr);
8622 
8623   ins_cost(125); // XXX
8624   format %{ "addq    $dst, $src\t# long" %}
8625   opcode(0x03);
8626   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8627   ins_pipe(ialu_reg_mem);
8628 %}
8629 
8630 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8631 %{
8632   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8633   effect(KILL cr);
8634 
8635   ins_cost(150); // XXX
8636   format %{ "addq    $dst, $src\t# long" %}
8637   opcode(0x01); /* Opcode 01 /r */
8638   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8639   ins_pipe(ialu_mem_reg);
8640 %}
8641 
8642 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8643 %{
8644   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8645   effect(KILL cr);
8646 
8647   ins_cost(125); // XXX
8648   format %{ "addq    $dst, $src\t# long" %}
8649   opcode(0x81); /* Opcode 81 /0 id */
8650   ins_encode(REX_mem_wide(dst),
8651              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8652   ins_pipe(ialu_mem_imm);
8653 %}
8654 
8655 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8656 %{
8657   predicate(UseIncDec);
8658   match(Set dst (AddL dst src));
8659   effect(KILL cr);
8660 
8661   format %{ "incq    $dst\t# long" %}
8662   opcode(0xFF, 0x00); // FF /0
8663   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8664   ins_pipe(ialu_reg);
8665 %}
8666 
8667 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8668 %{
8669   predicate(UseIncDec);
8670   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8671   effect(KILL cr);
8672 
8673   ins_cost(125); // XXX
8674   format %{ "incq    $dst\t# long" %}
8675   opcode(0xFF); /* Opcode FF /0 */
8676   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8677   ins_pipe(ialu_mem_imm);
8678 %}
8679 
8680 // XXX why does that use AddL
8681 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8682 %{
8683   predicate(UseIncDec);
8684   match(Set dst (AddL dst src));
8685   effect(KILL cr);
8686 
8687   format %{ "decq    $dst\t# long" %}
8688   opcode(0xFF, 0x01); // FF /1
8689   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8690   ins_pipe(ialu_reg);
8691 %}
8692 
8693 // XXX why does that use AddL
8694 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8695 %{
8696   predicate(UseIncDec);
8697   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8698   effect(KILL cr);
8699 
8700   ins_cost(125); // XXX
8701   format %{ "decq    $dst\t# long" %}
8702   opcode(0xFF); /* Opcode FF /1 */
8703   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8704   ins_pipe(ialu_mem_imm);
8705 %}
8706 
8707 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8708 %{
8709   match(Set dst (AddL src0 src1));
8710 
8711   ins_cost(110);
8712   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8713   opcode(0x8D); /* 0x8D /r */
8714   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8715   ins_pipe(ialu_reg_reg);
8716 %}
8717 
8718 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8719 %{
8720   match(Set dst (AddP dst src));
8721   effect(KILL cr);
8722 
8723   format %{ "addq    $dst, $src\t# ptr" %}
8724   opcode(0x03);
8725   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8726   ins_pipe(ialu_reg_reg);
8727 %}
8728 
8729 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8730 %{
8731   match(Set dst (AddP dst src));
8732   effect(KILL cr);
8733 
8734   format %{ "addq    $dst, $src\t# ptr" %}
8735   opcode(0x81, 0x00); /* /0 id */
8736   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8737   ins_pipe( ialu_reg );
8738 %}
8739 
8740 // XXX addP mem ops ????
8741 
8742 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8743 %{
8744   match(Set dst (AddP src0 src1));
8745 
8746   ins_cost(110);
8747   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8748   opcode(0x8D); /* 0x8D /r */
8749   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8750   ins_pipe(ialu_reg_reg);
8751 %}
8752 
8753 instruct checkCastPP(rRegP dst)
8754 %{
8755   match(Set dst (CheckCastPP dst));
8756 
8757   size(0);
8758   format %{ "# checkcastPP of $dst" %}
8759   ins_encode(/* empty encoding */);
8760   ins_pipe(empty);
8761 %}
8762 
8763 instruct castPP(rRegP dst)
8764 %{
8765   match(Set dst (CastPP dst));
8766 
8767   size(0);
8768   format %{ "# castPP of $dst" %}
8769   ins_encode(/* empty encoding */);
8770   ins_pipe(empty);
8771 %}
8772 
8773 instruct castII(rRegI dst)
8774 %{
8775   match(Set dst (CastII dst));
8776 
8777   size(0);
8778   format %{ "# castII of $dst" %}
8779   ins_encode(/* empty encoding */);
8780   ins_cost(0);
8781   ins_pipe(empty);
8782 %}
8783 
8784 // LoadP-locked same as a regular LoadP when used with compare-swap
8785 instruct loadPLocked(rRegP dst, memory mem)
8786 %{
8787   match(Set dst (LoadPLocked mem));
8788 
8789   ins_cost(125); // XXX
8790   format %{ "movq    $dst, $mem\t# ptr locked" %}
8791   opcode(0x8B);
8792   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8793   ins_pipe(ialu_reg_mem); // XXX
8794 %}
8795 
8796 // LoadL-locked - same as a regular LoadL when used with compare-swap
8797 instruct loadLLocked(rRegL dst, memory mem)
8798 %{
8799   match(Set dst (LoadLLocked mem));
8800 
8801   ins_cost(125); // XXX
8802   format %{ "movq    $dst, $mem\t# long locked" %}
8803   opcode(0x8B);
8804   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8805   ins_pipe(ialu_reg_mem); // XXX
8806 %}
8807 
8808 // Conditional-store of the updated heap-top.
8809 // Used during allocation of the shared heap.
8810 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8811 
8812 instruct storePConditional(memory heap_top_ptr,
8813                            rax_RegP oldval, rRegP newval,
8814                            rFlagsReg cr)
8815 %{
8816   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8817  
8818   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8819             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8820   opcode(0x0F, 0xB1);
8821   ins_encode(lock_prefix,
8822              REX_reg_mem_wide(newval, heap_top_ptr),
8823              OpcP, OpcS,
8824              reg_mem(newval, heap_top_ptr));
8825   ins_pipe(pipe_cmpxchg);
8826 %}
8827 
8828 // Conditional-store of an int value.
8829 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8830 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8831 %{
8832   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8833   effect(KILL oldval);
8834 
8835   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8836   opcode(0x0F, 0xB1);
8837   ins_encode(lock_prefix,
8838              REX_reg_mem(newval, mem),
8839              OpcP, OpcS,
8840              reg_mem(newval, mem));
8841   ins_pipe(pipe_cmpxchg);
8842 %}
8843 
8844 // Conditional-store of a long value.
8845 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8846 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8847 %{
8848   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8849   effect(KILL oldval);
8850 
8851   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8852   opcode(0x0F, 0xB1);
8853   ins_encode(lock_prefix,
8854              REX_reg_mem_wide(newval, mem),
8855              OpcP, OpcS,
8856              reg_mem(newval, mem));
8857   ins_pipe(pipe_cmpxchg);
8858 %}
8859 
8860 
8861 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8862 instruct compareAndSwapP(rRegI res,
8863                          memory mem_ptr,
8864                          rax_RegP oldval, rRegP newval,
8865                          rFlagsReg cr)
8866 %{
8867   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8868   effect(KILL cr, KILL oldval);
8869 
8870   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8871             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8872             "sete    $res\n\t"
8873             "movzbl  $res, $res" %}
8874   opcode(0x0F, 0xB1);
8875   ins_encode(lock_prefix,
8876              REX_reg_mem_wide(newval, mem_ptr),
8877              OpcP, OpcS,
8878              reg_mem(newval, mem_ptr),
8879              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8880              REX_reg_breg(res, res), // movzbl
8881              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8882   ins_pipe( pipe_cmpxchg );
8883 %}
8884 
8885 instruct compareAndSwapL(rRegI res,
8886                          memory mem_ptr,
8887                          rax_RegL oldval, rRegL newval,
8888                          rFlagsReg cr)
8889 %{
8890   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8891   effect(KILL cr, KILL oldval);
8892 
8893   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8894             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8895             "sete    $res\n\t"
8896             "movzbl  $res, $res" %}
8897   opcode(0x0F, 0xB1);
8898   ins_encode(lock_prefix,
8899              REX_reg_mem_wide(newval, mem_ptr),
8900              OpcP, OpcS,
8901              reg_mem(newval, mem_ptr),
8902              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8903              REX_reg_breg(res, res), // movzbl
8904              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8905   ins_pipe( pipe_cmpxchg );
8906 %}
8907 
8908 instruct compareAndSwapI(rRegI res,
8909                          memory mem_ptr,
8910                          rax_RegI oldval, rRegI newval,
8911                          rFlagsReg cr)
8912 %{
8913   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8914   effect(KILL cr, KILL oldval);
8915 
8916   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8917             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8918             "sete    $res\n\t"
8919             "movzbl  $res, $res" %}
8920   opcode(0x0F, 0xB1);
8921   ins_encode(lock_prefix,
8922              REX_reg_mem(newval, mem_ptr),
8923              OpcP, OpcS,
8924              reg_mem(newval, mem_ptr),
8925              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8926              REX_reg_breg(res, res), // movzbl
8927              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8928   ins_pipe( pipe_cmpxchg );
8929 %}
8930 
8931 
8932 instruct compareAndSwapN(rRegI res,
8933                           memory mem_ptr,
8934                           rax_RegN oldval, rRegN newval,
8935                           rFlagsReg cr) %{
8936   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8937   effect(KILL cr, KILL oldval);
8938 
8939   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8940             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8941             "sete    $res\n\t"
8942             "movzbl  $res, $res" %}
8943   opcode(0x0F, 0xB1);
8944   ins_encode(lock_prefix,
8945              REX_reg_mem(newval, mem_ptr),
8946              OpcP, OpcS,
8947              reg_mem(newval, mem_ptr),
8948              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8949              REX_reg_breg(res, res), // movzbl
8950              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8951   ins_pipe( pipe_cmpxchg );
8952 %}
8953 
8954 //----------Subtraction Instructions-------------------------------------------
8955 
8956 // Integer Subtraction Instructions
8957 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8958 %{
8959   match(Set dst (SubI dst src));
8960   effect(KILL cr);
8961 
8962   format %{ "subl    $dst, $src\t# int" %}
8963   opcode(0x2B);
8964   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8965   ins_pipe(ialu_reg_reg);
8966 %}
8967 
8968 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8969 %{
8970   match(Set dst (SubI dst src));
8971   effect(KILL cr);
8972 
8973   format %{ "subl    $dst, $src\t# int" %}
8974   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8975   ins_encode(OpcSErm(dst, src), Con8or32(src));
8976   ins_pipe(ialu_reg);
8977 %}
8978 
8979 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8980 %{
8981   match(Set dst (SubI dst (LoadI src)));
8982   effect(KILL cr);
8983 
8984   ins_cost(125);
8985   format %{ "subl    $dst, $src\t# int" %}
8986   opcode(0x2B);
8987   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8988   ins_pipe(ialu_reg_mem);
8989 %}
8990 
8991 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8992 %{
8993   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8994   effect(KILL cr);
8995 
8996   ins_cost(150);
8997   format %{ "subl    $dst, $src\t# int" %}
8998   opcode(0x29); /* Opcode 29 /r */
8999   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9000   ins_pipe(ialu_mem_reg);
9001 %}
9002 
9003 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
9004 %{
9005   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
9006   effect(KILL cr);
9007 
9008   ins_cost(125); // XXX
9009   format %{ "subl    $dst, $src\t# int" %}
9010   opcode(0x81); /* Opcode 81 /5 id */
9011   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
9012   ins_pipe(ialu_mem_imm);
9013 %}
9014 
9015 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9016 %{
9017   match(Set dst (SubL dst src));
9018   effect(KILL cr);
9019 
9020   format %{ "subq    $dst, $src\t# long" %}
9021   opcode(0x2B);
9022   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9023   ins_pipe(ialu_reg_reg);
9024 %}
9025 
9026 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
9027 %{
9028   match(Set dst (SubL dst src));
9029   effect(KILL cr);
9030 
9031   format %{ "subq    $dst, $src\t# long" %}
9032   opcode(0x81, 0x05);  /* Opcode 81 /5 */
9033   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9034   ins_pipe(ialu_reg);
9035 %}
9036 
9037 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9038 %{
9039   match(Set dst (SubL dst (LoadL src)));
9040   effect(KILL cr);
9041 
9042   ins_cost(125);
9043   format %{ "subq    $dst, $src\t# long" %}
9044   opcode(0x2B);
9045   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9046   ins_pipe(ialu_reg_mem);
9047 %}
9048 
9049 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9050 %{
9051   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
9052   effect(KILL cr);
9053 
9054   ins_cost(150);
9055   format %{ "subq    $dst, $src\t# long" %}
9056   opcode(0x29); /* Opcode 29 /r */
9057   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9058   ins_pipe(ialu_mem_reg);
9059 %}
9060 
9061 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9062 %{
9063   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
9064   effect(KILL cr);
9065 
9066   ins_cost(125); // XXX
9067   format %{ "subq    $dst, $src\t# long" %}
9068   opcode(0x81); /* Opcode 81 /5 id */
9069   ins_encode(REX_mem_wide(dst),
9070              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
9071   ins_pipe(ialu_mem_imm);
9072 %}
9073 
9074 // Subtract from a pointer
9075 // XXX hmpf???
9076 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
9077 %{
9078   match(Set dst (AddP dst (SubI zero src)));
9079   effect(KILL cr);
9080 
9081   format %{ "subq    $dst, $src\t# ptr - int" %}
9082   opcode(0x2B);
9083   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9084   ins_pipe(ialu_reg_reg);
9085 %}
9086 
9087 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
9088 %{
9089   match(Set dst (SubI zero dst));
9090   effect(KILL cr);
9091 
9092   format %{ "negl    $dst\t# int" %}
9093   opcode(0xF7, 0x03);  // Opcode F7 /3
9094   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9095   ins_pipe(ialu_reg);
9096 %}
9097 
9098 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
9099 %{
9100   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
9101   effect(KILL cr);
9102 
9103   format %{ "negl    $dst\t# int" %}
9104   opcode(0xF7, 0x03);  // Opcode F7 /3
9105   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9106   ins_pipe(ialu_reg);
9107 %}
9108 
9109 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
9110 %{
9111   match(Set dst (SubL zero dst));
9112   effect(KILL cr);
9113 
9114   format %{ "negq    $dst\t# long" %}
9115   opcode(0xF7, 0x03);  // Opcode F7 /3
9116   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9117   ins_pipe(ialu_reg);
9118 %}
9119 
9120 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
9121 %{
9122   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
9123   effect(KILL cr);
9124 
9125   format %{ "negq    $dst\t# long" %}
9126   opcode(0xF7, 0x03);  // Opcode F7 /3
9127   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9128   ins_pipe(ialu_reg);
9129 %}
9130 
9131 
9132 //----------Multiplication/Division Instructions-------------------------------
9133 // Integer Multiplication Instructions
9134 // Multiply Register
9135 
9136 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9137 %{
9138   match(Set dst (MulI dst src));
9139   effect(KILL cr);
9140 
9141   ins_cost(300);
9142   format %{ "imull   $dst, $src\t# int" %}
9143   opcode(0x0F, 0xAF);
9144   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9145   ins_pipe(ialu_reg_reg_alu0);
9146 %}
9147 
9148 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
9149 %{
9150   match(Set dst (MulI src imm));
9151   effect(KILL cr);
9152 
9153   ins_cost(300);
9154   format %{ "imull   $dst, $src, $imm\t# int" %}
9155   opcode(0x69); /* 69 /r id */
9156   ins_encode(REX_reg_reg(dst, src),
9157              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
9158   ins_pipe(ialu_reg_reg_alu0);
9159 %}
9160 
9161 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
9162 %{
9163   match(Set dst (MulI dst (LoadI src)));
9164   effect(KILL cr);
9165 
9166   ins_cost(350);
9167   format %{ "imull   $dst, $src\t# int" %}
9168   opcode(0x0F, 0xAF);
9169   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
9170   ins_pipe(ialu_reg_mem_alu0);
9171 %}
9172 
9173 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
9174 %{
9175   match(Set dst (MulI (LoadI src) imm));
9176   effect(KILL cr);
9177 
9178   ins_cost(300);
9179   format %{ "imull   $dst, $src, $imm\t# int" %}
9180   opcode(0x69); /* 69 /r id */
9181   ins_encode(REX_reg_mem(dst, src),
9182              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
9183   ins_pipe(ialu_reg_mem_alu0);
9184 %}
9185 
9186 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9187 %{
9188   match(Set dst (MulL dst src));
9189   effect(KILL cr);
9190 
9191   ins_cost(300);
9192   format %{ "imulq   $dst, $src\t# long" %}
9193   opcode(0x0F, 0xAF);
9194   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
9195   ins_pipe(ialu_reg_reg_alu0);
9196 %}
9197 
9198 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
9199 %{
9200   match(Set dst (MulL src imm));
9201   effect(KILL cr);
9202 
9203   ins_cost(300);
9204   format %{ "imulq   $dst, $src, $imm\t# long" %}
9205   opcode(0x69); /* 69 /r id */
9206   ins_encode(REX_reg_reg_wide(dst, src),
9207              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
9208   ins_pipe(ialu_reg_reg_alu0);
9209 %}
9210 
9211 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
9212 %{
9213   match(Set dst (MulL dst (LoadL src)));
9214   effect(KILL cr);
9215 
9216   ins_cost(350);
9217   format %{ "imulq   $dst, $src\t# long" %}
9218   opcode(0x0F, 0xAF);
9219   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
9220   ins_pipe(ialu_reg_mem_alu0);
9221 %}
9222 
9223 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
9224 %{
9225   match(Set dst (MulL (LoadL src) imm));
9226   effect(KILL cr);
9227 
9228   ins_cost(300);
9229   format %{ "imulq   $dst, $src, $imm\t# long" %}
9230   opcode(0x69); /* 69 /r id */
9231   ins_encode(REX_reg_mem_wide(dst, src),
9232              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
9233   ins_pipe(ialu_reg_mem_alu0);
9234 %}
9235 
9236 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
9237 %{
9238   match(Set dst (MulHiL src rax));
9239   effect(USE_KILL rax, KILL cr);
9240 
9241   ins_cost(300);
9242   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
9243   opcode(0xF7, 0x5); /* Opcode F7 /5 */
9244   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
9245   ins_pipe(ialu_reg_reg_alu0);
9246 %}
9247 
9248 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
9249                    rFlagsReg cr)
9250 %{
9251   match(Set rax (DivI rax div));
9252   effect(KILL rdx, KILL cr);
9253 
9254   ins_cost(30*100+10*100); // XXX
9255   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
9256             "jne,s   normal\n\t"
9257             "xorl    rdx, rdx\n\t"
9258             "cmpl    $div, -1\n\t"
9259             "je,s    done\n"
9260     "normal: cdql\n\t"
9261             "idivl   $div\n"
9262     "done:"        %}
9263   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9264   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9265   ins_pipe(ialu_reg_reg_alu0);
9266 %}
9267 
9268 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
9269                    rFlagsReg cr)
9270 %{
9271   match(Set rax (DivL rax div));
9272   effect(KILL rdx, KILL cr);
9273 
9274   ins_cost(30*100+10*100); // XXX
9275   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
9276             "cmpq    rax, rdx\n\t"
9277             "jne,s   normal\n\t"
9278             "xorl    rdx, rdx\n\t"
9279             "cmpq    $div, -1\n\t"
9280             "je,s    done\n"
9281     "normal: cdqq\n\t"
9282             "idivq   $div\n"
9283     "done:"        %}
9284   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9285   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9286   ins_pipe(ialu_reg_reg_alu0);
9287 %}
9288 
9289 // Integer DIVMOD with Register, both quotient and mod results
9290 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
9291                              rFlagsReg cr)
9292 %{
9293   match(DivModI rax div);
9294   effect(KILL cr);
9295 
9296   ins_cost(30*100+10*100); // XXX
9297   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
9298             "jne,s   normal\n\t"
9299             "xorl    rdx, rdx\n\t"
9300             "cmpl    $div, -1\n\t"
9301             "je,s    done\n"
9302     "normal: cdql\n\t"
9303             "idivl   $div\n"
9304     "done:"        %}
9305   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9306   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9307   ins_pipe(pipe_slow);
9308 %}
9309 
9310 // Long DIVMOD with Register, both quotient and mod results
9311 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
9312                              rFlagsReg cr)
9313 %{
9314   match(DivModL rax div);
9315   effect(KILL cr);
9316 
9317   ins_cost(30*100+10*100); // XXX
9318   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
9319             "cmpq    rax, rdx\n\t"
9320             "jne,s   normal\n\t"
9321             "xorl    rdx, rdx\n\t"
9322             "cmpq    $div, -1\n\t"
9323             "je,s    done\n"
9324     "normal: cdqq\n\t"
9325             "idivq   $div\n"
9326     "done:"        %}
9327   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9328   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9329   ins_pipe(pipe_slow);
9330 %}
9331 
9332 //----------- DivL-By-Constant-Expansions--------------------------------------
9333 // DivI cases are handled by the compiler
9334 
9335 // Magic constant, reciprocal of 10
9336 instruct loadConL_0x6666666666666667(rRegL dst)
9337 %{
9338   effect(DEF dst);
9339 
9340   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
9341   ins_encode(load_immL(dst, 0x6666666666666667));
9342   ins_pipe(ialu_reg);
9343 %}
9344 
9345 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
9346 %{
9347   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
9348 
9349   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
9350   opcode(0xF7, 0x5); /* Opcode F7 /5 */
9351   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
9352   ins_pipe(ialu_reg_reg_alu0);
9353 %}
9354 
9355 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
9356 %{
9357   effect(USE_DEF dst, KILL cr);
9358 
9359   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
9360   opcode(0xC1, 0x7); /* C1 /7 ib */
9361   ins_encode(reg_opc_imm_wide(dst, 0x3F));
9362   ins_pipe(ialu_reg);
9363 %}
9364 
9365 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
9366 %{
9367   effect(USE_DEF dst, KILL cr);
9368 
9369   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
9370   opcode(0xC1, 0x7); /* C1 /7 ib */
9371   ins_encode(reg_opc_imm_wide(dst, 0x2));
9372   ins_pipe(ialu_reg);
9373 %}
9374 
9375 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
9376 %{
9377   match(Set dst (DivL src div));
9378 
9379   ins_cost((5+8)*100);
9380   expand %{
9381     rax_RegL rax;                     // Killed temp
9382     rFlagsReg cr;                     // Killed
9383     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
9384     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
9385     sarL_rReg_63(src, cr);            // sarq  src, 63
9386     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
9387     subL_rReg(dst, src, cr);          // subl  rdx, src
9388   %}
9389 %}
9390 
9391 //-----------------------------------------------------------------------------
9392 
9393 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
9394                    rFlagsReg cr)
9395 %{
9396   match(Set rdx (ModI rax div));
9397   effect(KILL rax, KILL cr);
9398 
9399   ins_cost(300); // XXX
9400   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
9401             "jne,s   normal\n\t"
9402             "xorl    rdx, rdx\n\t"
9403             "cmpl    $div, -1\n\t"
9404             "je,s    done\n"
9405     "normal: cdql\n\t"
9406             "idivl   $div\n"
9407     "done:"        %}
9408   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9409   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9410   ins_pipe(ialu_reg_reg_alu0);
9411 %}
9412 
9413 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
9414                    rFlagsReg cr)
9415 %{
9416   match(Set rdx (ModL rax div));
9417   effect(KILL rax, KILL cr);
9418 
9419   ins_cost(300); // XXX
9420   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
9421             "cmpq    rax, rdx\n\t"
9422             "jne,s   normal\n\t"
9423             "xorl    rdx, rdx\n\t"
9424             "cmpq    $div, -1\n\t"
9425             "je,s    done\n"
9426     "normal: cdqq\n\t"
9427             "idivq   $div\n"
9428     "done:"        %}
9429   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9430   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9431   ins_pipe(ialu_reg_reg_alu0);
9432 %}
9433 
9434 // Integer Shift Instructions
9435 // Shift Left by one
9436 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9437 %{
9438   match(Set dst (LShiftI dst shift));
9439   effect(KILL cr);
9440 
9441   format %{ "sall    $dst, $shift" %}
9442   opcode(0xD1, 0x4); /* D1 /4 */
9443   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9444   ins_pipe(ialu_reg);
9445 %}
9446 
9447 // Shift Left by one
9448 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9449 %{
9450   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9451   effect(KILL cr);
9452 
9453   format %{ "sall    $dst, $shift\t" %}
9454   opcode(0xD1, 0x4); /* D1 /4 */
9455   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9456   ins_pipe(ialu_mem_imm);
9457 %}
9458 
9459 // Shift Left by 8-bit immediate
9460 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9461 %{
9462   match(Set dst (LShiftI dst shift));
9463   effect(KILL cr);
9464 
9465   format %{ "sall    $dst, $shift" %}
9466   opcode(0xC1, 0x4); /* C1 /4 ib */
9467   ins_encode(reg_opc_imm(dst, shift));
9468   ins_pipe(ialu_reg);
9469 %}
9470 
9471 // Shift Left by 8-bit immediate
9472 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9473 %{
9474   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9475   effect(KILL cr);
9476 
9477   format %{ "sall    $dst, $shift" %}
9478   opcode(0xC1, 0x4); /* C1 /4 ib */
9479   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9480   ins_pipe(ialu_mem_imm);
9481 %}
9482 
9483 // Shift Left by variable
9484 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9485 %{
9486   match(Set dst (LShiftI dst shift));
9487   effect(KILL cr);
9488 
9489   format %{ "sall    $dst, $shift" %}
9490   opcode(0xD3, 0x4); /* D3 /4 */
9491   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9492   ins_pipe(ialu_reg_reg);
9493 %}
9494 
9495 // Shift Left by variable
9496 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9497 %{
9498   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9499   effect(KILL cr);
9500 
9501   format %{ "sall    $dst, $shift" %}
9502   opcode(0xD3, 0x4); /* D3 /4 */
9503   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9504   ins_pipe(ialu_mem_reg);
9505 %}
9506 
9507 // Arithmetic shift right by one
9508 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9509 %{
9510   match(Set dst (RShiftI dst shift));
9511   effect(KILL cr);
9512 
9513   format %{ "sarl    $dst, $shift" %}
9514   opcode(0xD1, 0x7); /* D1 /7 */
9515   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9516   ins_pipe(ialu_reg);
9517 %}
9518 
9519 // Arithmetic shift right by one
9520 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9521 %{
9522   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9523   effect(KILL cr);
9524 
9525   format %{ "sarl    $dst, $shift" %}
9526   opcode(0xD1, 0x7); /* D1 /7 */
9527   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9528   ins_pipe(ialu_mem_imm);
9529 %}
9530 
9531 // Arithmetic Shift Right by 8-bit immediate
9532 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9533 %{
9534   match(Set dst (RShiftI dst shift));
9535   effect(KILL cr);
9536 
9537   format %{ "sarl    $dst, $shift" %}
9538   opcode(0xC1, 0x7); /* C1 /7 ib */
9539   ins_encode(reg_opc_imm(dst, shift));
9540   ins_pipe(ialu_mem_imm);
9541 %}
9542 
9543 // Arithmetic Shift Right by 8-bit immediate
9544 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9545 %{
9546   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9547   effect(KILL cr);
9548 
9549   format %{ "sarl    $dst, $shift" %}
9550   opcode(0xC1, 0x7); /* C1 /7 ib */
9551   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9552   ins_pipe(ialu_mem_imm);
9553 %}
9554 
9555 // Arithmetic Shift Right by variable
9556 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9557 %{
9558   match(Set dst (RShiftI dst shift));
9559   effect(KILL cr);
9560 
9561   format %{ "sarl    $dst, $shift" %}
9562   opcode(0xD3, 0x7); /* D3 /7 */
9563   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9564   ins_pipe(ialu_reg_reg);
9565 %}
9566 
9567 // Arithmetic Shift Right by variable
9568 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9569 %{
9570   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9571   effect(KILL cr);
9572 
9573   format %{ "sarl    $dst, $shift" %}
9574   opcode(0xD3, 0x7); /* D3 /7 */
9575   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9576   ins_pipe(ialu_mem_reg);
9577 %}
9578 
9579 // Logical shift right by one
9580 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9581 %{
9582   match(Set dst (URShiftI dst shift));
9583   effect(KILL cr);
9584 
9585   format %{ "shrl    $dst, $shift" %}
9586   opcode(0xD1, 0x5); /* D1 /5 */
9587   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9588   ins_pipe(ialu_reg);
9589 %}
9590 
9591 // Logical shift right by one
9592 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9593 %{
9594   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9595   effect(KILL cr);
9596 
9597   format %{ "shrl    $dst, $shift" %}
9598   opcode(0xD1, 0x5); /* D1 /5 */
9599   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9600   ins_pipe(ialu_mem_imm);
9601 %}
9602 
9603 // Logical Shift Right by 8-bit immediate
9604 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9605 %{
9606   match(Set dst (URShiftI dst shift));
9607   effect(KILL cr);
9608 
9609   format %{ "shrl    $dst, $shift" %}
9610   opcode(0xC1, 0x5); /* C1 /5 ib */
9611   ins_encode(reg_opc_imm(dst, shift));
9612   ins_pipe(ialu_reg);
9613 %}
9614 
9615 // Logical Shift Right by 8-bit immediate
9616 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9617 %{
9618   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9619   effect(KILL cr);
9620 
9621   format %{ "shrl    $dst, $shift" %}
9622   opcode(0xC1, 0x5); /* C1 /5 ib */
9623   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9624   ins_pipe(ialu_mem_imm);
9625 %}
9626 
9627 // Logical Shift Right by variable
9628 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9629 %{
9630   match(Set dst (URShiftI dst shift));
9631   effect(KILL cr);
9632 
9633   format %{ "shrl    $dst, $shift" %}
9634   opcode(0xD3, 0x5); /* D3 /5 */
9635   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9636   ins_pipe(ialu_reg_reg);
9637 %}
9638 
9639 // Logical Shift Right by variable
9640 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9641 %{
9642   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9643   effect(KILL cr);
9644 
9645   format %{ "shrl    $dst, $shift" %}
9646   opcode(0xD3, 0x5); /* D3 /5 */
9647   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9648   ins_pipe(ialu_mem_reg);
9649 %}
9650 
9651 // Long Shift Instructions
9652 // Shift Left by one
9653 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9654 %{
9655   match(Set dst (LShiftL dst shift));
9656   effect(KILL cr);
9657 
9658   format %{ "salq    $dst, $shift" %}
9659   opcode(0xD1, 0x4); /* D1 /4 */
9660   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9661   ins_pipe(ialu_reg);
9662 %}
9663 
9664 // Shift Left by one
9665 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9666 %{
9667   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9668   effect(KILL cr);
9669 
9670   format %{ "salq    $dst, $shift" %}
9671   opcode(0xD1, 0x4); /* D1 /4 */
9672   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9673   ins_pipe(ialu_mem_imm);
9674 %}
9675 
9676 // Shift Left by 8-bit immediate
9677 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9678 %{
9679   match(Set dst (LShiftL dst shift));
9680   effect(KILL cr);
9681 
9682   format %{ "salq    $dst, $shift" %}
9683   opcode(0xC1, 0x4); /* C1 /4 ib */
9684   ins_encode(reg_opc_imm_wide(dst, shift));
9685   ins_pipe(ialu_reg);
9686 %}
9687 
9688 // Shift Left by 8-bit immediate
9689 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9690 %{
9691   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9692   effect(KILL cr);
9693 
9694   format %{ "salq    $dst, $shift" %}
9695   opcode(0xC1, 0x4); /* C1 /4 ib */
9696   ins_encode(REX_mem_wide(dst), OpcP,
9697              RM_opc_mem(secondary, dst), Con8or32(shift));
9698   ins_pipe(ialu_mem_imm);
9699 %}
9700 
9701 // Shift Left by variable
9702 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9703 %{
9704   match(Set dst (LShiftL dst shift));
9705   effect(KILL cr);
9706 
9707   format %{ "salq    $dst, $shift" %}
9708   opcode(0xD3, 0x4); /* D3 /4 */
9709   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9710   ins_pipe(ialu_reg_reg);
9711 %}
9712 
9713 // Shift Left by variable
9714 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9715 %{
9716   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9717   effect(KILL cr);
9718 
9719   format %{ "salq    $dst, $shift" %}
9720   opcode(0xD3, 0x4); /* D3 /4 */
9721   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9722   ins_pipe(ialu_mem_reg);
9723 %}
9724 
9725 // Arithmetic shift right by one
9726 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9727 %{
9728   match(Set dst (RShiftL dst shift));
9729   effect(KILL cr);
9730 
9731   format %{ "sarq    $dst, $shift" %}
9732   opcode(0xD1, 0x7); /* D1 /7 */
9733   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9734   ins_pipe(ialu_reg);
9735 %}
9736 
9737 // Arithmetic shift right by one
9738 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9739 %{
9740   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9741   effect(KILL cr);
9742 
9743   format %{ "sarq    $dst, $shift" %}
9744   opcode(0xD1, 0x7); /* D1 /7 */
9745   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9746   ins_pipe(ialu_mem_imm);
9747 %}
9748 
9749 // Arithmetic Shift Right by 8-bit immediate
9750 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9751 %{
9752   match(Set dst (RShiftL dst shift));
9753   effect(KILL cr);
9754 
9755   format %{ "sarq    $dst, $shift" %}
9756   opcode(0xC1, 0x7); /* C1 /7 ib */
9757   ins_encode(reg_opc_imm_wide(dst, shift));
9758   ins_pipe(ialu_mem_imm);
9759 %}
9760 
9761 // Arithmetic Shift Right by 8-bit immediate
9762 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9763 %{
9764   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9765   effect(KILL cr);
9766 
9767   format %{ "sarq    $dst, $shift" %}
9768   opcode(0xC1, 0x7); /* C1 /7 ib */
9769   ins_encode(REX_mem_wide(dst), OpcP,
9770              RM_opc_mem(secondary, dst), Con8or32(shift));
9771   ins_pipe(ialu_mem_imm);
9772 %}
9773 
9774 // Arithmetic Shift Right by variable
9775 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9776 %{
9777   match(Set dst (RShiftL dst shift));
9778   effect(KILL cr);
9779 
9780   format %{ "sarq    $dst, $shift" %}
9781   opcode(0xD3, 0x7); /* D3 /7 */
9782   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9783   ins_pipe(ialu_reg_reg);
9784 %}
9785 
9786 // Arithmetic Shift Right by variable
9787 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9788 %{
9789   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9790   effect(KILL cr);
9791 
9792   format %{ "sarq    $dst, $shift" %}
9793   opcode(0xD3, 0x7); /* D3 /7 */
9794   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9795   ins_pipe(ialu_mem_reg);
9796 %}
9797 
9798 // Logical shift right by one
9799 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9800 %{
9801   match(Set dst (URShiftL dst shift));
9802   effect(KILL cr);
9803 
9804   format %{ "shrq    $dst, $shift" %}
9805   opcode(0xD1, 0x5); /* D1 /5 */
9806   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9807   ins_pipe(ialu_reg);
9808 %}
9809 
9810 // Logical shift right by one
9811 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9812 %{
9813   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9814   effect(KILL cr);
9815 
9816   format %{ "shrq    $dst, $shift" %}
9817   opcode(0xD1, 0x5); /* D1 /5 */
9818   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9819   ins_pipe(ialu_mem_imm);
9820 %}
9821 
9822 // Logical Shift Right by 8-bit immediate
9823 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9824 %{
9825   match(Set dst (URShiftL dst shift));
9826   effect(KILL cr);
9827 
9828   format %{ "shrq    $dst, $shift" %}
9829   opcode(0xC1, 0x5); /* C1 /5 ib */
9830   ins_encode(reg_opc_imm_wide(dst, shift));
9831   ins_pipe(ialu_reg);
9832 %}
9833 
9834 
9835 // Logical Shift Right by 8-bit immediate
9836 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9837 %{
9838   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9839   effect(KILL cr);
9840 
9841   format %{ "shrq    $dst, $shift" %}
9842   opcode(0xC1, 0x5); /* C1 /5 ib */
9843   ins_encode(REX_mem_wide(dst), OpcP,
9844              RM_opc_mem(secondary, dst), Con8or32(shift));
9845   ins_pipe(ialu_mem_imm);
9846 %}
9847 
9848 // Logical Shift Right by variable
9849 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9850 %{
9851   match(Set dst (URShiftL dst shift));
9852   effect(KILL cr);
9853 
9854   format %{ "shrq    $dst, $shift" %}
9855   opcode(0xD3, 0x5); /* D3 /5 */
9856   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9857   ins_pipe(ialu_reg_reg);
9858 %}
9859 
9860 // Logical Shift Right by variable
9861 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9862 %{
9863   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9864   effect(KILL cr);
9865 
9866   format %{ "shrq    $dst, $shift" %}
9867   opcode(0xD3, 0x5); /* D3 /5 */
9868   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9869   ins_pipe(ialu_mem_reg);
9870 %}
9871 
9872 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9873 // This idiom is used by the compiler for the i2b bytecode.
9874 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9875 %{
9876   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9877 
9878   format %{ "movsbl  $dst, $src\t# i2b" %}
9879   opcode(0x0F, 0xBE);
9880   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9881   ins_pipe(ialu_reg_reg);
9882 %}
9883 
9884 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9885 // This idiom is used by the compiler the i2s bytecode.
9886 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9887 %{
9888   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9889 
9890   format %{ "movswl  $dst, $src\t# i2s" %}
9891   opcode(0x0F, 0xBF);
9892   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9893   ins_pipe(ialu_reg_reg);
9894 %}
9895 
9896 // ROL/ROR instructions
9897 
9898 // ROL expand
9899 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9900   effect(KILL cr, USE_DEF dst);
9901 
9902   format %{ "roll    $dst" %}
9903   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9904   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9905   ins_pipe(ialu_reg);
9906 %}
9907 
9908 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9909   effect(USE_DEF dst, USE shift, KILL cr);
9910 
9911   format %{ "roll    $dst, $shift" %}
9912   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9913   ins_encode( reg_opc_imm(dst, shift) );
9914   ins_pipe(ialu_reg);
9915 %}
9916 
9917 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9918 %{
9919   effect(USE_DEF dst, USE shift, KILL cr);
9920 
9921   format %{ "roll    $dst, $shift" %}
9922   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9923   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9924   ins_pipe(ialu_reg_reg);
9925 %}
9926 // end of ROL expand
9927 
9928 // Rotate Left by one
9929 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9930 %{
9931   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9932 
9933   expand %{
9934     rolI_rReg_imm1(dst, cr);
9935   %}
9936 %}
9937 
9938 // Rotate Left by 8-bit immediate
9939 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9940 %{
9941   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9942   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9943 
9944   expand %{
9945     rolI_rReg_imm8(dst, lshift, cr);
9946   %}
9947 %}
9948 
9949 // Rotate Left by variable
9950 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9951 %{
9952   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9953 
9954   expand %{
9955     rolI_rReg_CL(dst, shift, cr);
9956   %}
9957 %}
9958 
9959 // Rotate Left by variable
9960 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9961 %{
9962   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9963 
9964   expand %{
9965     rolI_rReg_CL(dst, shift, cr);
9966   %}
9967 %}
9968 
9969 // ROR expand
9970 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9971 %{
9972   effect(USE_DEF dst, KILL cr);
9973 
9974   format %{ "rorl    $dst" %}
9975   opcode(0xD1, 0x1); /* D1 /1 */
9976   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9977   ins_pipe(ialu_reg);
9978 %}
9979 
9980 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9981 %{
9982   effect(USE_DEF dst, USE shift, KILL cr);
9983 
9984   format %{ "rorl    $dst, $shift" %}
9985   opcode(0xC1, 0x1); /* C1 /1 ib */
9986   ins_encode(reg_opc_imm(dst, shift));
9987   ins_pipe(ialu_reg);
9988 %}
9989 
9990 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9991 %{
9992   effect(USE_DEF dst, USE shift, KILL cr);
9993 
9994   format %{ "rorl    $dst, $shift" %}
9995   opcode(0xD3, 0x1); /* D3 /1 */
9996   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9997   ins_pipe(ialu_reg_reg);
9998 %}
9999 // end of ROR expand
10000 
10001 // Rotate Right by one
10002 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
10003 %{
10004   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
10005 
10006   expand %{
10007     rorI_rReg_imm1(dst, cr);
10008   %}
10009 %}
10010 
10011 // Rotate Right by 8-bit immediate
10012 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
10013 %{
10014   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
10015   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
10016 
10017   expand %{
10018     rorI_rReg_imm8(dst, rshift, cr);
10019   %}
10020 %}
10021 
10022 // Rotate Right by variable
10023 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
10024 %{
10025   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
10026 
10027   expand %{
10028     rorI_rReg_CL(dst, shift, cr);
10029   %}
10030 %}
10031 
10032 // Rotate Right by variable
10033 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
10034 %{
10035   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
10036 
10037   expand %{
10038     rorI_rReg_CL(dst, shift, cr);
10039   %}
10040 %}
10041 
10042 // for long rotate
10043 // ROL expand
10044 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
10045   effect(USE_DEF dst, KILL cr);
10046 
10047   format %{ "rolq    $dst" %}
10048   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
10049   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
10050   ins_pipe(ialu_reg);
10051 %}
10052 
10053 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
10054   effect(USE_DEF dst, USE shift, KILL cr);
10055 
10056   format %{ "rolq    $dst, $shift" %}
10057   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
10058   ins_encode( reg_opc_imm_wide(dst, shift) );
10059   ins_pipe(ialu_reg);
10060 %}
10061 
10062 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
10063 %{
10064   effect(USE_DEF dst, USE shift, KILL cr);
10065 
10066   format %{ "rolq    $dst, $shift" %}
10067   opcode(0xD3, 0x0); /* Opcode D3 /0 */
10068   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
10069   ins_pipe(ialu_reg_reg);
10070 %}
10071 // end of ROL expand
10072 
10073 // Rotate Left by one
10074 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
10075 %{
10076   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
10077 
10078   expand %{
10079     rolL_rReg_imm1(dst, cr);
10080   %}
10081 %}
10082 
10083 // Rotate Left by 8-bit immediate
10084 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
10085 %{
10086   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
10087   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
10088 
10089   expand %{
10090     rolL_rReg_imm8(dst, lshift, cr);
10091   %}
10092 %}
10093 
10094 // Rotate Left by variable
10095 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
10096 %{
10097   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
10098 
10099   expand %{
10100     rolL_rReg_CL(dst, shift, cr);
10101   %}
10102 %}
10103 
10104 // Rotate Left by variable
10105 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
10106 %{
10107   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
10108 
10109   expand %{
10110     rolL_rReg_CL(dst, shift, cr);
10111   %}
10112 %}
10113 
10114 // ROR expand
10115 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
10116 %{
10117   effect(USE_DEF dst, KILL cr);
10118 
10119   format %{ "rorq    $dst" %}
10120   opcode(0xD1, 0x1); /* D1 /1 */
10121   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
10122   ins_pipe(ialu_reg);
10123 %}
10124 
10125 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
10126 %{
10127   effect(USE_DEF dst, USE shift, KILL cr);
10128 
10129   format %{ "rorq    $dst, $shift" %}
10130   opcode(0xC1, 0x1); /* C1 /1 ib */
10131   ins_encode(reg_opc_imm_wide(dst, shift));
10132   ins_pipe(ialu_reg);
10133 %}
10134 
10135 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
10136 %{
10137   effect(USE_DEF dst, USE shift, KILL cr);
10138 
10139   format %{ "rorq    $dst, $shift" %}
10140   opcode(0xD3, 0x1); /* D3 /1 */
10141   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
10142   ins_pipe(ialu_reg_reg);
10143 %}
10144 // end of ROR expand
10145 
10146 // Rotate Right by one
10147 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
10148 %{
10149   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
10150 
10151   expand %{
10152     rorL_rReg_imm1(dst, cr);
10153   %}
10154 %}
10155 
10156 // Rotate Right by 8-bit immediate
10157 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
10158 %{
10159   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
10160   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
10161 
10162   expand %{
10163     rorL_rReg_imm8(dst, rshift, cr);
10164   %}
10165 %}
10166 
10167 // Rotate Right by variable
10168 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
10169 %{
10170   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
10171 
10172   expand %{
10173     rorL_rReg_CL(dst, shift, cr);
10174   %}
10175 %}
10176 
10177 // Rotate Right by variable
10178 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
10179 %{
10180   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
10181 
10182   expand %{
10183     rorL_rReg_CL(dst, shift, cr);
10184   %}
10185 %}
10186 
10187 // Logical Instructions
10188 
10189 // Integer Logical Instructions
10190 
10191 // And Instructions
10192 // And Register with Register
10193 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10194 %{
10195   match(Set dst (AndI dst src));
10196   effect(KILL cr);
10197 
10198   format %{ "andl    $dst, $src\t# int" %}
10199   opcode(0x23);
10200   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10201   ins_pipe(ialu_reg_reg);
10202 %}
10203 
10204 // And Register with Immediate 255
10205 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
10206 %{
10207   match(Set dst (AndI dst src));
10208 
10209   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
10210   opcode(0x0F, 0xB6);
10211   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10212   ins_pipe(ialu_reg);
10213 %}
10214 
10215 // And Register with Immediate 255 and promote to long
10216 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
10217 %{
10218   match(Set dst (ConvI2L (AndI src mask)));
10219 
10220   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
10221   opcode(0x0F, 0xB6);
10222   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10223   ins_pipe(ialu_reg);
10224 %}
10225 
10226 // And Register with Immediate 65535
10227 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
10228 %{
10229   match(Set dst (AndI dst src));
10230 
10231   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
10232   opcode(0x0F, 0xB7);
10233   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10234   ins_pipe(ialu_reg);
10235 %}
10236 
10237 // And Register with Immediate 65535 and promote to long
10238 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
10239 %{
10240   match(Set dst (ConvI2L (AndI src mask)));
10241 
10242   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
10243   opcode(0x0F, 0xB7);
10244   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
10245   ins_pipe(ialu_reg);
10246 %}
10247 
10248 // And Register with Immediate
10249 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10250 %{
10251   match(Set dst (AndI dst src));
10252   effect(KILL cr);
10253 
10254   format %{ "andl    $dst, $src\t# int" %}
10255   opcode(0x81, 0x04); /* Opcode 81 /4 */
10256   ins_encode(OpcSErm(dst, src), Con8or32(src));
10257   ins_pipe(ialu_reg);
10258 %}
10259 
10260 // And Register with Memory
10261 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10262 %{
10263   match(Set dst (AndI dst (LoadI src)));
10264   effect(KILL cr);
10265 
10266   ins_cost(125);
10267   format %{ "andl    $dst, $src\t# int" %}
10268   opcode(0x23);
10269   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10270   ins_pipe(ialu_reg_mem);
10271 %}
10272 
10273 // And Memory with Register
10274 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10275 %{
10276   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10277   effect(KILL cr);
10278 
10279   ins_cost(150);
10280   format %{ "andl    $dst, $src\t# int" %}
10281   opcode(0x21); /* Opcode 21 /r */
10282   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10283   ins_pipe(ialu_mem_reg);
10284 %}
10285 
10286 // And Memory with Immediate
10287 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
10288 %{
10289   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10290   effect(KILL cr);
10291 
10292   ins_cost(125);
10293   format %{ "andl    $dst, $src\t# int" %}
10294   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10295   ins_encode(REX_mem(dst), OpcSE(src),
10296              RM_opc_mem(secondary, dst), Con8or32(src));
10297   ins_pipe(ialu_mem_imm);
10298 %}
10299 
10300 // Or Instructions
10301 // Or Register with Register
10302 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10303 %{
10304   match(Set dst (OrI dst src));
10305   effect(KILL cr);
10306 
10307   format %{ "orl     $dst, $src\t# int" %}
10308   opcode(0x0B);
10309   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10310   ins_pipe(ialu_reg_reg);
10311 %}
10312 
10313 // Or Register with Immediate
10314 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10315 %{
10316   match(Set dst (OrI dst src));
10317   effect(KILL cr);
10318 
10319   format %{ "orl     $dst, $src\t# int" %}
10320   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10321   ins_encode(OpcSErm(dst, src), Con8or32(src));
10322   ins_pipe(ialu_reg);
10323 %}
10324 
10325 // Or Register with Memory
10326 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10327 %{
10328   match(Set dst (OrI dst (LoadI src)));
10329   effect(KILL cr);
10330 
10331   ins_cost(125);
10332   format %{ "orl     $dst, $src\t# int" %}
10333   opcode(0x0B);
10334   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10335   ins_pipe(ialu_reg_mem);
10336 %}
10337 
10338 // Or Memory with Register
10339 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10340 %{
10341   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10342   effect(KILL cr);
10343 
10344   ins_cost(150);
10345   format %{ "orl     $dst, $src\t# int" %}
10346   opcode(0x09); /* Opcode 09 /r */
10347   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10348   ins_pipe(ialu_mem_reg);
10349 %}
10350 
10351 // Or Memory with Immediate
10352 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10353 %{
10354   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10355   effect(KILL cr);
10356 
10357   ins_cost(125);
10358   format %{ "orl     $dst, $src\t# int" %}
10359   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10360   ins_encode(REX_mem(dst), OpcSE(src),
10361              RM_opc_mem(secondary, dst), Con8or32(src));
10362   ins_pipe(ialu_mem_imm);
10363 %}
10364 
10365 // Xor Instructions
10366 // Xor Register with Register
10367 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10368 %{
10369   match(Set dst (XorI dst src));
10370   effect(KILL cr);
10371 
10372   format %{ "xorl    $dst, $src\t# int" %}
10373   opcode(0x33);
10374   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
10375   ins_pipe(ialu_reg_reg);
10376 %}
10377 
10378 // Xor Register with Immediate -1
10379 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10380   match(Set dst (XorI dst imm));  
10381 
10382   format %{ "not    $dst" %}  
10383   ins_encode %{
10384      __ notl($dst$$Register);
10385   %}
10386   ins_pipe(ialu_reg);
10387 %}
10388 
10389 // Xor Register with Immediate
10390 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10391 %{
10392   match(Set dst (XorI dst src));
10393   effect(KILL cr);
10394 
10395   format %{ "xorl    $dst, $src\t# int" %}
10396   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10397   ins_encode(OpcSErm(dst, src), Con8or32(src));
10398   ins_pipe(ialu_reg);
10399 %}
10400 
10401 // Xor Register with Memory
10402 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10403 %{
10404   match(Set dst (XorI dst (LoadI src)));
10405   effect(KILL cr);
10406 
10407   ins_cost(125);
10408   format %{ "xorl    $dst, $src\t# int" %}
10409   opcode(0x33);
10410   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10411   ins_pipe(ialu_reg_mem);
10412 %}
10413 
10414 // Xor Memory with Register
10415 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10416 %{
10417   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10418   effect(KILL cr);
10419 
10420   ins_cost(150);
10421   format %{ "xorl    $dst, $src\t# int" %}
10422   opcode(0x31); /* Opcode 31 /r */
10423   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10424   ins_pipe(ialu_mem_reg);
10425 %}
10426 
10427 // Xor Memory with Immediate
10428 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10429 %{
10430   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10431   effect(KILL cr);
10432 
10433   ins_cost(125);
10434   format %{ "xorl    $dst, $src\t# int" %}
10435   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10436   ins_encode(REX_mem(dst), OpcSE(src),
10437              RM_opc_mem(secondary, dst), Con8or32(src));
10438   ins_pipe(ialu_mem_imm);
10439 %}
10440 
10441 
10442 // Long Logical Instructions
10443 
10444 // And Instructions
10445 // And Register with Register
10446 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10447 %{
10448   match(Set dst (AndL dst src));
10449   effect(KILL cr);
10450 
10451   format %{ "andq    $dst, $src\t# long" %}
10452   opcode(0x23);
10453   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10454   ins_pipe(ialu_reg_reg);
10455 %}
10456 
10457 // And Register with Immediate 255
10458 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10459 %{
10460   match(Set dst (AndL dst src));
10461 
10462   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10463   opcode(0x0F, 0xB6);
10464   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10465   ins_pipe(ialu_reg);
10466 %}
10467 
10468 // And Register with Immediate 65535
10469 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10470 %{
10471   match(Set dst (AndL dst src));
10472 
10473   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10474   opcode(0x0F, 0xB7);
10475   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10476   ins_pipe(ialu_reg);
10477 %}
10478 
10479 // And Register with Immediate
10480 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10481 %{
10482   match(Set dst (AndL dst src));
10483   effect(KILL cr);
10484 
10485   format %{ "andq    $dst, $src\t# long" %}
10486   opcode(0x81, 0x04); /* Opcode 81 /4 */
10487   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10488   ins_pipe(ialu_reg);
10489 %}
10490 
10491 // And Register with Memory
10492 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10493 %{
10494   match(Set dst (AndL dst (LoadL src)));
10495   effect(KILL cr);
10496 
10497   ins_cost(125);
10498   format %{ "andq    $dst, $src\t# long" %}
10499   opcode(0x23);
10500   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10501   ins_pipe(ialu_reg_mem);
10502 %}
10503 
10504 // And Memory with Register
10505 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10506 %{
10507   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10508   effect(KILL cr);
10509 
10510   ins_cost(150);
10511   format %{ "andq    $dst, $src\t# long" %}
10512   opcode(0x21); /* Opcode 21 /r */
10513   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10514   ins_pipe(ialu_mem_reg);
10515 %}
10516 
10517 // And Memory with Immediate
10518 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10519 %{
10520   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10521   effect(KILL cr);
10522 
10523   ins_cost(125);
10524   format %{ "andq    $dst, $src\t# long" %}
10525   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10526   ins_encode(REX_mem_wide(dst), OpcSE(src),
10527              RM_opc_mem(secondary, dst), Con8or32(src));
10528   ins_pipe(ialu_mem_imm);
10529 %}
10530 
10531 // Or Instructions
10532 // Or Register with Register
10533 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10534 %{
10535   match(Set dst (OrL dst src));
10536   effect(KILL cr);
10537 
10538   format %{ "orq     $dst, $src\t# long" %}
10539   opcode(0x0B);
10540   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10541   ins_pipe(ialu_reg_reg);
10542 %}
10543 
10544 // Use any_RegP to match R15 (TLS register) without spilling.
10545 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10546   match(Set dst (OrL dst (CastP2X src)));
10547   effect(KILL cr);
10548 
10549   format %{ "orq     $dst, $src\t# long" %}
10550   opcode(0x0B);
10551   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10552   ins_pipe(ialu_reg_reg);
10553 %}
10554 
10555 
10556 // Or Register with Immediate
10557 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10558 %{
10559   match(Set dst (OrL dst src));
10560   effect(KILL cr);
10561 
10562   format %{ "orq     $dst, $src\t# long" %}
10563   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10564   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10565   ins_pipe(ialu_reg);
10566 %}
10567 
10568 // Or Register with Memory
10569 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10570 %{
10571   match(Set dst (OrL dst (LoadL src)));
10572   effect(KILL cr);
10573 
10574   ins_cost(125);
10575   format %{ "orq     $dst, $src\t# long" %}
10576   opcode(0x0B);
10577   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10578   ins_pipe(ialu_reg_mem);
10579 %}
10580 
10581 // Or Memory with Register
10582 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10583 %{
10584   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10585   effect(KILL cr);
10586 
10587   ins_cost(150);
10588   format %{ "orq     $dst, $src\t# long" %}
10589   opcode(0x09); /* Opcode 09 /r */
10590   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10591   ins_pipe(ialu_mem_reg);
10592 %}
10593 
10594 // Or Memory with Immediate
10595 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10596 %{
10597   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10598   effect(KILL cr);
10599 
10600   ins_cost(125);
10601   format %{ "orq     $dst, $src\t# long" %}
10602   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10603   ins_encode(REX_mem_wide(dst), OpcSE(src),
10604              RM_opc_mem(secondary, dst), Con8or32(src));
10605   ins_pipe(ialu_mem_imm);
10606 %}
10607 
10608 // Xor Instructions
10609 // Xor Register with Register
10610 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10611 %{
10612   match(Set dst (XorL dst src));
10613   effect(KILL cr);
10614 
10615   format %{ "xorq    $dst, $src\t# long" %}
10616   opcode(0x33);
10617   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10618   ins_pipe(ialu_reg_reg);
10619 %}
10620 
10621 // Xor Register with Immediate -1
10622 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10623   match(Set dst (XorL dst imm));  
10624 
10625   format %{ "notq   $dst" %}  
10626   ins_encode %{
10627      __ notq($dst$$Register);
10628   %}
10629   ins_pipe(ialu_reg);
10630 %}
10631 
10632 // Xor Register with Immediate
10633 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10634 %{
10635   match(Set dst (XorL dst src));
10636   effect(KILL cr);
10637 
10638   format %{ "xorq    $dst, $src\t# long" %}
10639   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10640   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10641   ins_pipe(ialu_reg);
10642 %}
10643 
10644 // Xor Register with Memory
10645 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10646 %{
10647   match(Set dst (XorL dst (LoadL src)));
10648   effect(KILL cr);
10649 
10650   ins_cost(125);
10651   format %{ "xorq    $dst, $src\t# long" %}
10652   opcode(0x33);
10653   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10654   ins_pipe(ialu_reg_mem);
10655 %}
10656 
10657 // Xor Memory with Register
10658 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10659 %{
10660   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10661   effect(KILL cr);
10662 
10663   ins_cost(150);
10664   format %{ "xorq    $dst, $src\t# long" %}
10665   opcode(0x31); /* Opcode 31 /r */
10666   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10667   ins_pipe(ialu_mem_reg);
10668 %}
10669 
10670 // Xor Memory with Immediate
10671 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10672 %{
10673   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10674   effect(KILL cr);
10675 
10676   ins_cost(125);
10677   format %{ "xorq    $dst, $src\t# long" %}
10678   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10679   ins_encode(REX_mem_wide(dst), OpcSE(src),
10680              RM_opc_mem(secondary, dst), Con8or32(src));
10681   ins_pipe(ialu_mem_imm);
10682 %}
10683 
10684 // Convert Int to Boolean
10685 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10686 %{
10687   match(Set dst (Conv2B src));
10688   effect(KILL cr);
10689 
10690   format %{ "testl   $src, $src\t# ci2b\n\t"
10691             "setnz   $dst\n\t"
10692             "movzbl  $dst, $dst" %}
10693   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10694              setNZ_reg(dst),
10695              REX_reg_breg(dst, dst), // movzbl
10696              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10697   ins_pipe(pipe_slow); // XXX
10698 %}
10699 
10700 // Convert Pointer to Boolean
10701 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10702 %{
10703   match(Set dst (Conv2B src));
10704   effect(KILL cr);
10705 
10706   format %{ "testq   $src, $src\t# cp2b\n\t"
10707             "setnz   $dst\n\t"
10708             "movzbl  $dst, $dst" %}
10709   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10710              setNZ_reg(dst),
10711              REX_reg_breg(dst, dst), // movzbl
10712              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10713   ins_pipe(pipe_slow); // XXX
10714 %}
10715 
10716 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10717 %{
10718   match(Set dst (CmpLTMask p q));
10719   effect(KILL cr);
10720 
10721   ins_cost(400); // XXX
10722   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10723             "setlt   $dst\n\t"
10724             "movzbl  $dst, $dst\n\t"
10725             "negl    $dst" %}
10726   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10727              setLT_reg(dst),
10728              REX_reg_breg(dst, dst), // movzbl
10729              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10730              neg_reg(dst));
10731   ins_pipe(pipe_slow);
10732 %}
10733 
10734 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10735 %{
10736   match(Set dst (CmpLTMask dst zero));
10737   effect(KILL cr);
10738 
10739   ins_cost(100); // XXX
10740   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10741   opcode(0xC1, 0x7);  /* C1 /7 ib */
10742   ins_encode(reg_opc_imm(dst, 0x1F));
10743   ins_pipe(ialu_reg);
10744 %}
10745 
10746 
10747 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10748                          rRegI tmp,
10749                          rFlagsReg cr)
10750 %{
10751   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10752   effect(TEMP tmp, KILL cr);
10753 
10754   ins_cost(400); // XXX
10755   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10756             "sbbl    $tmp, $tmp\n\t"
10757             "andl    $tmp, $y\n\t"
10758             "addl    $p, $tmp" %}
10759   ins_encode(enc_cmpLTP(p, q, y, tmp));
10760   ins_pipe(pipe_cmplt);
10761 %}
10762 
10763 /* If I enable this, I encourage spilling in the inner loop of compress.
10764 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10765 %{
10766   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10767   effect( TEMP tmp, KILL cr );
10768   ins_cost(400);
10769 
10770   format %{ "SUB    $p,$q\n\t"
10771             "SBB    RCX,RCX\n\t"
10772             "AND    RCX,$y\n\t"
10773             "ADD    $p,RCX" %}
10774   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10775 %}
10776 */
10777 
10778 //---------- FP Instructions------------------------------------------------
10779 
10780 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10781 %{
10782   match(Set cr (CmpF src1 src2));
10783 
10784   ins_cost(145);
10785   format %{ "ucomiss $src1, $src2\n\t"
10786             "jnp,s   exit\n\t"
10787             "pushfq\t# saw NaN, set CF\n\t"
10788             "andq    [rsp], #0xffffff2b\n\t"
10789             "popfq\n"
10790     "exit:   nop\t# avoid branch to branch" %}
10791   opcode(0x0F, 0x2E);
10792   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10793              cmpfp_fixup);
10794   ins_pipe(pipe_slow);
10795 %}
10796 
10797 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10798   match(Set cr (CmpF src1 src2));
10799 
10800   ins_cost(145);
10801   format %{ "ucomiss $src1, $src2" %}
10802   ins_encode %{
10803     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10804   %}
10805   ins_pipe(pipe_slow);
10806 %}
10807 
10808 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10809 %{
10810   match(Set cr (CmpF src1 (LoadF src2)));
10811 
10812   ins_cost(145);
10813   format %{ "ucomiss $src1, $src2\n\t"
10814             "jnp,s   exit\n\t"
10815             "pushfq\t# saw NaN, set CF\n\t"
10816             "andq    [rsp], #0xffffff2b\n\t"
10817             "popfq\n"
10818     "exit:   nop\t# avoid branch to branch" %}
10819   opcode(0x0F, 0x2E);
10820   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10821              cmpfp_fixup);
10822   ins_pipe(pipe_slow);
10823 %}
10824 
10825 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10826   match(Set cr (CmpF src1 (LoadF src2)));
10827 
10828   ins_cost(100);
10829   format %{ "ucomiss $src1, $src2" %}
10830   opcode(0x0F, 0x2E);
10831   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10832   ins_pipe(pipe_slow);
10833 %}
10834 
10835 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10836 %{
10837   match(Set cr (CmpF src1 src2));
10838 
10839   ins_cost(145);
10840   format %{ "ucomiss $src1, $src2\n\t"
10841             "jnp,s   exit\n\t"
10842             "pushfq\t# saw NaN, set CF\n\t"
10843             "andq    [rsp], #0xffffff2b\n\t"
10844             "popfq\n"
10845     "exit:   nop\t# avoid branch to branch" %}
10846   opcode(0x0F, 0x2E);
10847   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10848              cmpfp_fixup);
10849   ins_pipe(pipe_slow);
10850 %}
10851 
10852 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10853   match(Set cr (CmpF src1 src2));
10854 
10855   ins_cost(100);
10856   format %{ "ucomiss $src1, $src2" %}
10857   opcode(0x0F, 0x2E);
10858   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10859   ins_pipe(pipe_slow);
10860 %}
10861 
10862 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10863 %{
10864   match(Set cr (CmpD src1 src2));
10865 
10866   ins_cost(145);
10867   format %{ "ucomisd $src1, $src2\n\t"
10868             "jnp,s   exit\n\t"
10869             "pushfq\t# saw NaN, set CF\n\t"
10870             "andq    [rsp], #0xffffff2b\n\t"
10871             "popfq\n"
10872     "exit:   nop\t# avoid branch to branch" %}
10873   opcode(0x66, 0x0F, 0x2E);
10874   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10875              cmpfp_fixup);
10876   ins_pipe(pipe_slow);
10877 %}
10878 
10879 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10880   match(Set cr (CmpD src1 src2));
10881 
10882   ins_cost(100);
10883   format %{ "ucomisd $src1, $src2 test" %}
10884   ins_encode %{
10885     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10886   %}
10887   ins_pipe(pipe_slow);
10888 %}
10889 
10890 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10891 %{
10892   match(Set cr (CmpD src1 (LoadD src2)));
10893 
10894   ins_cost(145);
10895   format %{ "ucomisd $src1, $src2\n\t"
10896             "jnp,s   exit\n\t"
10897             "pushfq\t# saw NaN, set CF\n\t"
10898             "andq    [rsp], #0xffffff2b\n\t"
10899             "popfq\n"
10900     "exit:   nop\t# avoid branch to branch" %}
10901   opcode(0x66, 0x0F, 0x2E);
10902   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10903              cmpfp_fixup);
10904   ins_pipe(pipe_slow);
10905 %}
10906 
10907 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10908   match(Set cr (CmpD src1 (LoadD src2)));
10909 
10910   ins_cost(100);
10911   format %{ "ucomisd $src1, $src2" %}
10912   opcode(0x66, 0x0F, 0x2E);
10913   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10914   ins_pipe(pipe_slow);
10915 %}
10916 
10917 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10918 %{
10919   match(Set cr (CmpD src1 src2));
10920 
10921   ins_cost(145);
10922   format %{ "ucomisd $src1, [$src2]\n\t"
10923             "jnp,s   exit\n\t"
10924             "pushfq\t# saw NaN, set CF\n\t"
10925             "andq    [rsp], #0xffffff2b\n\t"
10926             "popfq\n"
10927     "exit:   nop\t# avoid branch to branch" %}
10928   opcode(0x66, 0x0F, 0x2E);
10929   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10930              cmpfp_fixup);
10931   ins_pipe(pipe_slow);
10932 %}
10933 
10934 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10935   match(Set cr (CmpD src1 src2));
10936 
10937   ins_cost(100);
10938   format %{ "ucomisd $src1, [$src2]" %}
10939   opcode(0x66, 0x0F, 0x2E);
10940   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10941   ins_pipe(pipe_slow);
10942 %}
10943 
10944 // Compare into -1,0,1
10945 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10946 %{
10947   match(Set dst (CmpF3 src1 src2));
10948   effect(KILL cr);
10949 
10950   ins_cost(275);
10951   format %{ "ucomiss $src1, $src2\n\t"
10952             "movl    $dst, #-1\n\t"
10953             "jp,s    done\n\t"
10954             "jb,s    done\n\t"
10955             "setne   $dst\n\t"
10956             "movzbl  $dst, $dst\n"
10957     "done:" %}
10958 
10959   opcode(0x0F, 0x2E);
10960   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10961              cmpfp3(dst));
10962   ins_pipe(pipe_slow);
10963 %}
10964 
10965 // Compare into -1,0,1
10966 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10967 %{
10968   match(Set dst (CmpF3 src1 (LoadF src2)));
10969   effect(KILL cr);
10970 
10971   ins_cost(275);
10972   format %{ "ucomiss $src1, $src2\n\t"
10973             "movl    $dst, #-1\n\t"
10974             "jp,s    done\n\t"
10975             "jb,s    done\n\t"
10976             "setne   $dst\n\t"
10977             "movzbl  $dst, $dst\n"
10978     "done:" %}
10979 
10980   opcode(0x0F, 0x2E);
10981   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10982              cmpfp3(dst));
10983   ins_pipe(pipe_slow);
10984 %}
10985 
10986 // Compare into -1,0,1
10987 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10988 %{
10989   match(Set dst (CmpF3 src1 src2));
10990   effect(KILL cr);
10991 
10992   ins_cost(275);
10993   format %{ "ucomiss $src1, [$src2]\n\t"
10994             "movl    $dst, #-1\n\t"
10995             "jp,s    done\n\t"
10996             "jb,s    done\n\t"
10997             "setne   $dst\n\t"
10998             "movzbl  $dst, $dst\n"
10999     "done:" %}
11000 
11001   opcode(0x0F, 0x2E);
11002   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
11003              cmpfp3(dst));
11004   ins_pipe(pipe_slow);
11005 %}
11006 
11007 // Compare into -1,0,1
11008 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
11009 %{
11010   match(Set dst (CmpD3 src1 src2));
11011   effect(KILL cr);
11012 
11013   ins_cost(275);
11014   format %{ "ucomisd $src1, $src2\n\t"
11015             "movl    $dst, #-1\n\t"
11016             "jp,s    done\n\t"
11017             "jb,s    done\n\t"
11018             "setne   $dst\n\t"
11019             "movzbl  $dst, $dst\n"
11020     "done:" %}
11021 
11022   opcode(0x66, 0x0F, 0x2E);
11023   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
11024              cmpfp3(dst));
11025   ins_pipe(pipe_slow);
11026 %}
11027 
11028 // Compare into -1,0,1
11029 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
11030 %{
11031   match(Set dst (CmpD3 src1 (LoadD src2)));
11032   effect(KILL cr);
11033 
11034   ins_cost(275);
11035   format %{ "ucomisd $src1, $src2\n\t"
11036             "movl    $dst, #-1\n\t"
11037             "jp,s    done\n\t"
11038             "jb,s    done\n\t"
11039             "setne   $dst\n\t"
11040             "movzbl  $dst, $dst\n"
11041     "done:" %}
11042 
11043   opcode(0x66, 0x0F, 0x2E);
11044   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
11045              cmpfp3(dst));
11046   ins_pipe(pipe_slow);
11047 %}
11048 
11049 // Compare into -1,0,1
11050 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
11051 %{
11052   match(Set dst (CmpD3 src1 src2));
11053   effect(KILL cr);
11054 
11055   ins_cost(275);
11056   format %{ "ucomisd $src1, [$src2]\n\t"
11057             "movl    $dst, #-1\n\t"
11058             "jp,s    done\n\t"
11059             "jb,s    done\n\t"
11060             "setne   $dst\n\t"
11061             "movzbl  $dst, $dst\n"
11062     "done:" %}
11063 
11064   opcode(0x66, 0x0F, 0x2E);
11065   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
11066              cmpfp3(dst));
11067   ins_pipe(pipe_slow);
11068 %}
11069 
11070 instruct addF_reg(regF dst, regF src)
11071 %{
11072   match(Set dst (AddF dst src));
11073 
11074   format %{ "addss   $dst, $src" %}
11075   ins_cost(150); // XXX
11076   opcode(0xF3, 0x0F, 0x58);
11077   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11078   ins_pipe(pipe_slow);
11079 %}
11080 
11081 instruct addF_mem(regF dst, memory src)
11082 %{
11083   match(Set dst (AddF dst (LoadF src)));
11084 
11085   format %{ "addss   $dst, $src" %}
11086   ins_cost(150); // XXX
11087   opcode(0xF3, 0x0F, 0x58);
11088   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11089   ins_pipe(pipe_slow);
11090 %}
11091 
11092 instruct addF_imm(regF dst, immF src)
11093 %{
11094   match(Set dst (AddF dst src));
11095 
11096   format %{ "addss   $dst, [$src]" %}
11097   ins_cost(150); // XXX
11098   opcode(0xF3, 0x0F, 0x58);
11099   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11100   ins_pipe(pipe_slow);
11101 %}
11102 
11103 instruct addD_reg(regD dst, regD src)
11104 %{
11105   match(Set dst (AddD dst src));
11106 
11107   format %{ "addsd   $dst, $src" %}
11108   ins_cost(150); // XXX
11109   opcode(0xF2, 0x0F, 0x58);
11110   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11111   ins_pipe(pipe_slow);
11112 %}
11113 
11114 instruct addD_mem(regD dst, memory src)
11115 %{
11116   match(Set dst (AddD dst (LoadD src)));
11117 
11118   format %{ "addsd   $dst, $src" %}
11119   ins_cost(150); // XXX
11120   opcode(0xF2, 0x0F, 0x58);
11121   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11122   ins_pipe(pipe_slow);
11123 %}
11124 
11125 instruct addD_imm(regD dst, immD src)
11126 %{
11127   match(Set dst (AddD dst src));
11128 
11129   format %{ "addsd   $dst, [$src]" %}
11130   ins_cost(150); // XXX
11131   opcode(0xF2, 0x0F, 0x58);
11132   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11133   ins_pipe(pipe_slow);
11134 %}
11135 
11136 instruct subF_reg(regF dst, regF src)
11137 %{
11138   match(Set dst (SubF dst src));
11139 
11140   format %{ "subss   $dst, $src" %}
11141   ins_cost(150); // XXX
11142   opcode(0xF3, 0x0F, 0x5C);
11143   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11144   ins_pipe(pipe_slow);
11145 %}
11146 
11147 instruct subF_mem(regF dst, memory src)
11148 %{
11149   match(Set dst (SubF dst (LoadF src)));
11150 
11151   format %{ "subss   $dst, $src" %}
11152   ins_cost(150); // XXX
11153   opcode(0xF3, 0x0F, 0x5C);
11154   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11155   ins_pipe(pipe_slow);
11156 %}
11157 
11158 instruct subF_imm(regF dst, immF src)
11159 %{
11160   match(Set dst (SubF dst src));
11161 
11162   format %{ "subss   $dst, [$src]" %}
11163   ins_cost(150); // XXX
11164   opcode(0xF3, 0x0F, 0x5C);
11165   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11166   ins_pipe(pipe_slow);
11167 %}
11168 
11169 instruct subD_reg(regD dst, regD src)
11170 %{
11171   match(Set dst (SubD dst src));
11172 
11173   format %{ "subsd   $dst, $src" %}
11174   ins_cost(150); // XXX
11175   opcode(0xF2, 0x0F, 0x5C);
11176   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11177   ins_pipe(pipe_slow);
11178 %}
11179 
11180 instruct subD_mem(regD dst, memory src)
11181 %{
11182   match(Set dst (SubD dst (LoadD src)));
11183 
11184   format %{ "subsd   $dst, $src" %}
11185   ins_cost(150); // XXX
11186   opcode(0xF2, 0x0F, 0x5C);
11187   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11188   ins_pipe(pipe_slow);
11189 %}
11190 
11191 instruct subD_imm(regD dst, immD src)
11192 %{
11193   match(Set dst (SubD dst src));
11194 
11195   format %{ "subsd   $dst, [$src]" %}
11196   ins_cost(150); // XXX
11197   opcode(0xF2, 0x0F, 0x5C);
11198   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11199   ins_pipe(pipe_slow);
11200 %}
11201 
11202 instruct mulF_reg(regF dst, regF src)
11203 %{
11204   match(Set dst (MulF dst src));
11205 
11206   format %{ "mulss   $dst, $src" %}
11207   ins_cost(150); // XXX
11208   opcode(0xF3, 0x0F, 0x59);
11209   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11210   ins_pipe(pipe_slow);
11211 %}
11212 
11213 instruct mulF_mem(regF dst, memory src)
11214 %{
11215   match(Set dst (MulF dst (LoadF src)));
11216 
11217   format %{ "mulss   $dst, $src" %}
11218   ins_cost(150); // XXX
11219   opcode(0xF3, 0x0F, 0x59);
11220   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11221   ins_pipe(pipe_slow);
11222 %}
11223 
11224 instruct mulF_imm(regF dst, immF src)
11225 %{
11226   match(Set dst (MulF dst src));
11227 
11228   format %{ "mulss   $dst, [$src]" %}
11229   ins_cost(150); // XXX
11230   opcode(0xF3, 0x0F, 0x59);
11231   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11232   ins_pipe(pipe_slow);
11233 %}
11234 
11235 instruct mulD_reg(regD dst, regD src)
11236 %{
11237   match(Set dst (MulD dst src));
11238 
11239   format %{ "mulsd   $dst, $src" %}
11240   ins_cost(150); // XXX
11241   opcode(0xF2, 0x0F, 0x59);
11242   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11243   ins_pipe(pipe_slow);
11244 %}
11245 
11246 instruct mulD_mem(regD dst, memory src)
11247 %{
11248   match(Set dst (MulD dst (LoadD src)));
11249 
11250   format %{ "mulsd   $dst, $src" %}
11251   ins_cost(150); // XXX
11252   opcode(0xF2, 0x0F, 0x59);
11253   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11254   ins_pipe(pipe_slow);
11255 %}
11256 
11257 instruct mulD_imm(regD dst, immD src)
11258 %{
11259   match(Set dst (MulD dst src));
11260 
11261   format %{ "mulsd   $dst, [$src]" %}
11262   ins_cost(150); // XXX
11263   opcode(0xF2, 0x0F, 0x59);
11264   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11265   ins_pipe(pipe_slow);
11266 %}
11267 
11268 instruct divF_reg(regF dst, regF src)
11269 %{
11270   match(Set dst (DivF dst src));
11271 
11272   format %{ "divss   $dst, $src" %}
11273   ins_cost(150); // XXX
11274   opcode(0xF3, 0x0F, 0x5E);
11275   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11276   ins_pipe(pipe_slow);
11277 %}
11278 
11279 instruct divF_mem(regF dst, memory src)
11280 %{
11281   match(Set dst (DivF dst (LoadF src)));
11282 
11283   format %{ "divss   $dst, $src" %}
11284   ins_cost(150); // XXX
11285   opcode(0xF3, 0x0F, 0x5E);
11286   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11287   ins_pipe(pipe_slow);
11288 %}
11289 
11290 instruct divF_imm(regF dst, immF src)
11291 %{
11292   match(Set dst (DivF dst src));
11293 
11294   format %{ "divss   $dst, [$src]" %}
11295   ins_cost(150); // XXX
11296   opcode(0xF3, 0x0F, 0x5E);
11297   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11298   ins_pipe(pipe_slow);
11299 %}
11300 
11301 instruct divD_reg(regD dst, regD src)
11302 %{
11303   match(Set dst (DivD dst src));
11304 
11305   format %{ "divsd   $dst, $src" %}
11306   ins_cost(150); // XXX
11307   opcode(0xF2, 0x0F, 0x5E);
11308   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11309   ins_pipe(pipe_slow);
11310 %}
11311 
11312 instruct divD_mem(regD dst, memory src)
11313 %{
11314   match(Set dst (DivD dst (LoadD src)));
11315 
11316   format %{ "divsd   $dst, $src" %}
11317   ins_cost(150); // XXX
11318   opcode(0xF2, 0x0F, 0x5E);
11319   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11320   ins_pipe(pipe_slow);
11321 %}
11322 
11323 instruct divD_imm(regD dst, immD src)
11324 %{
11325   match(Set dst (DivD dst src));
11326 
11327   format %{ "divsd   $dst, [$src]" %}
11328   ins_cost(150); // XXX
11329   opcode(0xF2, 0x0F, 0x5E);
11330   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11331   ins_pipe(pipe_slow);
11332 %}
11333 
11334 instruct sqrtF_reg(regF dst, regF src)
11335 %{
11336   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11337 
11338   format %{ "sqrtss  $dst, $src" %}
11339   ins_cost(150); // XXX
11340   opcode(0xF3, 0x0F, 0x51);
11341   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11342   ins_pipe(pipe_slow);
11343 %}
11344 
11345 instruct sqrtF_mem(regF dst, memory src)
11346 %{
11347   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
11348 
11349   format %{ "sqrtss  $dst, $src" %}
11350   ins_cost(150); // XXX
11351   opcode(0xF3, 0x0F, 0x51);
11352   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11353   ins_pipe(pipe_slow);
11354 %}
11355 
11356 instruct sqrtF_imm(regF dst, immF src)
11357 %{
11358   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11359 
11360   format %{ "sqrtss  $dst, [$src]" %}
11361   ins_cost(150); // XXX
11362   opcode(0xF3, 0x0F, 0x51);
11363   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
11364   ins_pipe(pipe_slow);
11365 %}
11366 
11367 instruct sqrtD_reg(regD dst, regD src)
11368 %{
11369   match(Set dst (SqrtD src));
11370 
11371   format %{ "sqrtsd  $dst, $src" %}
11372   ins_cost(150); // XXX
11373   opcode(0xF2, 0x0F, 0x51);
11374   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11375   ins_pipe(pipe_slow);
11376 %}
11377 
11378 instruct sqrtD_mem(regD dst, memory src)
11379 %{
11380   match(Set dst (SqrtD (LoadD src)));
11381 
11382   format %{ "sqrtsd  $dst, $src" %}
11383   ins_cost(150); // XXX
11384   opcode(0xF2, 0x0F, 0x51);
11385   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11386   ins_pipe(pipe_slow);
11387 %}
11388 
11389 instruct sqrtD_imm(regD dst, immD src)
11390 %{
11391   match(Set dst (SqrtD src));
11392 
11393   format %{ "sqrtsd  $dst, [$src]" %}
11394   ins_cost(150); // XXX
11395   opcode(0xF2, 0x0F, 0x51);
11396   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11397   ins_pipe(pipe_slow);
11398 %}
11399 
11400 instruct absF_reg(regF dst)
11401 %{
11402   match(Set dst (AbsF dst));
11403 
11404   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
11405   ins_encode(absF_encoding(dst));
11406   ins_pipe(pipe_slow);
11407 %}
11408 
11409 instruct absD_reg(regD dst)
11410 %{
11411   match(Set dst (AbsD dst));
11412 
11413   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
11414             "# abs double by sign masking" %}
11415   ins_encode(absD_encoding(dst));
11416   ins_pipe(pipe_slow);
11417 %}
11418 
11419 instruct negF_reg(regF dst)
11420 %{
11421   match(Set dst (NegF dst));
11422 
11423   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
11424   ins_encode(negF_encoding(dst));
11425   ins_pipe(pipe_slow);
11426 %}
11427 
11428 instruct negD_reg(regD dst)
11429 %{
11430   match(Set dst (NegD dst));
11431 
11432   format %{ "xorpd   $dst, [0x8000000000000000]\t"
11433             "# neg double by sign flipping" %}
11434   ins_encode(negD_encoding(dst));
11435   ins_pipe(pipe_slow);
11436 %}
11437 
11438 // -----------Trig and Trancendental Instructions------------------------------
11439 instruct cosD_reg(regD dst) %{
11440   match(Set dst (CosD dst));
11441 
11442   format %{ "dcos   $dst\n\t" %}
11443   opcode(0xD9, 0xFF);
11444   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11445   ins_pipe( pipe_slow );
11446 %}
11447 
11448 instruct sinD_reg(regD dst) %{
11449   match(Set dst (SinD dst));
11450 
11451   format %{ "dsin   $dst\n\t" %}
11452   opcode(0xD9, 0xFE);
11453   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11454   ins_pipe( pipe_slow );
11455 %}
11456 
11457 instruct tanD_reg(regD dst) %{
11458   match(Set dst (TanD dst));
11459 
11460   format %{ "dtan   $dst\n\t" %}
11461   ins_encode( Push_SrcXD(dst),
11462               Opcode(0xD9), Opcode(0xF2),   //fptan
11463               Opcode(0xDD), Opcode(0xD8),   //fstp st
11464               Push_ResultXD(dst) );
11465   ins_pipe( pipe_slow );
11466 %}
11467 
11468 instruct log10D_reg(regD dst) %{
11469   // The source and result Double operands in XMM registers
11470   match(Set dst (Log10D dst));
11471   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11472   // fyl2x        ; compute log_10(2) * log_2(x)
11473   format %{ "fldlg2\t\t\t#Log10\n\t"
11474             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11475          %}
11476    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11477               Push_SrcXD(dst),
11478               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11479               Push_ResultXD(dst));
11480 
11481   ins_pipe( pipe_slow );
11482 %}
11483 
11484 instruct logD_reg(regD dst) %{
11485   // The source and result Double operands in XMM registers
11486   match(Set dst (LogD dst));
11487   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11488   // fyl2x        ; compute log_e(2) * log_2(x)
11489   format %{ "fldln2\t\t\t#Log_e\n\t"
11490             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11491          %}
11492   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11493               Push_SrcXD(dst),
11494               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11495               Push_ResultXD(dst));
11496   ins_pipe( pipe_slow );
11497 %}
11498 
11499 
11500 
11501 //----------Arithmetic Conversion Instructions---------------------------------
11502 
11503 instruct roundFloat_nop(regF dst)
11504 %{
11505   match(Set dst (RoundFloat dst));
11506 
11507   ins_cost(0);
11508   ins_encode();
11509   ins_pipe(empty);
11510 %}
11511 
11512 instruct roundDouble_nop(regD dst)
11513 %{
11514   match(Set dst (RoundDouble dst));
11515 
11516   ins_cost(0);
11517   ins_encode();
11518   ins_pipe(empty);
11519 %}
11520 
11521 instruct convF2D_reg_reg(regD dst, regF src)
11522 %{
11523   match(Set dst (ConvF2D src));
11524 
11525   format %{ "cvtss2sd $dst, $src" %}
11526   opcode(0xF3, 0x0F, 0x5A);
11527   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11528   ins_pipe(pipe_slow); // XXX
11529 %}
11530 
11531 instruct convF2D_reg_mem(regD dst, memory src)
11532 %{
11533   match(Set dst (ConvF2D (LoadF src)));
11534 
11535   format %{ "cvtss2sd $dst, $src" %}
11536   opcode(0xF3, 0x0F, 0x5A);
11537   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11538   ins_pipe(pipe_slow); // XXX
11539 %}
11540 
11541 instruct convD2F_reg_reg(regF dst, regD src)
11542 %{
11543   match(Set dst (ConvD2F src));
11544 
11545   format %{ "cvtsd2ss $dst, $src" %}
11546   opcode(0xF2, 0x0F, 0x5A);
11547   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11548   ins_pipe(pipe_slow); // XXX
11549 %}
11550 
11551 instruct convD2F_reg_mem(regF dst, memory src)
11552 %{
11553   match(Set dst (ConvD2F (LoadD src)));
11554 
11555   format %{ "cvtsd2ss $dst, $src" %}
11556   opcode(0xF2, 0x0F, 0x5A);
11557   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11558   ins_pipe(pipe_slow); // XXX
11559 %}
11560 
11561 // XXX do mem variants
11562 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11563 %{
11564   match(Set dst (ConvF2I src));
11565   effect(KILL cr);
11566 
11567   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11568             "cmpl    $dst, #0x80000000\n\t"
11569             "jne,s   done\n\t"
11570             "subq    rsp, #8\n\t"
11571             "movss   [rsp], $src\n\t"
11572             "call    f2i_fixup\n\t"
11573             "popq    $dst\n"
11574     "done:   "%}
11575   opcode(0xF3, 0x0F, 0x2C);
11576   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11577              f2i_fixup(dst, src));
11578   ins_pipe(pipe_slow);
11579 %}
11580 
11581 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11582 %{
11583   match(Set dst (ConvF2L src));
11584   effect(KILL cr);
11585 
11586   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11587             "cmpq    $dst, [0x8000000000000000]\n\t"
11588             "jne,s   done\n\t"
11589             "subq    rsp, #8\n\t"
11590             "movss   [rsp], $src\n\t"
11591             "call    f2l_fixup\n\t"
11592             "popq    $dst\n"
11593     "done:   "%}
11594   opcode(0xF3, 0x0F, 0x2C);
11595   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11596              f2l_fixup(dst, src));
11597   ins_pipe(pipe_slow);
11598 %}
11599 
11600 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11601 %{
11602   match(Set dst (ConvD2I src));
11603   effect(KILL cr);
11604 
11605   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11606             "cmpl    $dst, #0x80000000\n\t"
11607             "jne,s   done\n\t"
11608             "subq    rsp, #8\n\t"
11609             "movsd   [rsp], $src\n\t"
11610             "call    d2i_fixup\n\t"
11611             "popq    $dst\n"
11612     "done:   "%}
11613   opcode(0xF2, 0x0F, 0x2C);
11614   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11615              d2i_fixup(dst, src));
11616   ins_pipe(pipe_slow);
11617 %}
11618 
11619 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11620 %{
11621   match(Set dst (ConvD2L src));
11622   effect(KILL cr);
11623 
11624   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11625             "cmpq    $dst, [0x8000000000000000]\n\t"
11626             "jne,s   done\n\t"
11627             "subq    rsp, #8\n\t"
11628             "movsd   [rsp], $src\n\t"
11629             "call    d2l_fixup\n\t"
11630             "popq    $dst\n"
11631     "done:   "%}
11632   opcode(0xF2, 0x0F, 0x2C);
11633   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11634              d2l_fixup(dst, src));
11635   ins_pipe(pipe_slow);
11636 %}
11637 
11638 instruct convI2F_reg_reg(regF dst, rRegI src)
11639 %{
11640   predicate(!UseXmmI2F);
11641   match(Set dst (ConvI2F src));
11642 
11643   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11644   opcode(0xF3, 0x0F, 0x2A);
11645   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11646   ins_pipe(pipe_slow); // XXX
11647 %}
11648 
11649 instruct convI2F_reg_mem(regF dst, memory src)
11650 %{
11651   match(Set dst (ConvI2F (LoadI src)));
11652 
11653   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11654   opcode(0xF3, 0x0F, 0x2A);
11655   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11656   ins_pipe(pipe_slow); // XXX
11657 %}
11658 
11659 instruct convI2D_reg_reg(regD dst, rRegI src)
11660 %{
11661   predicate(!UseXmmI2D);
11662   match(Set dst (ConvI2D src));
11663 
11664   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11665   opcode(0xF2, 0x0F, 0x2A);
11666   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11667   ins_pipe(pipe_slow); // XXX
11668 %}
11669 
11670 instruct convI2D_reg_mem(regD dst, memory src)
11671 %{
11672   match(Set dst (ConvI2D (LoadI src)));
11673 
11674   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11675   opcode(0xF2, 0x0F, 0x2A);
11676   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11677   ins_pipe(pipe_slow); // XXX
11678 %}
11679 
11680 instruct convXI2F_reg(regF dst, rRegI src)
11681 %{
11682   predicate(UseXmmI2F);
11683   match(Set dst (ConvI2F src));
11684 
11685   format %{ "movdl $dst, $src\n\t"
11686             "cvtdq2psl $dst, $dst\t# i2f" %}
11687   ins_encode %{
11688     __ movdl($dst$$XMMRegister, $src$$Register);
11689     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11690   %}
11691   ins_pipe(pipe_slow); // XXX
11692 %}
11693 
11694 instruct convXI2D_reg(regD dst, rRegI src)
11695 %{
11696   predicate(UseXmmI2D);
11697   match(Set dst (ConvI2D src));
11698 
11699   format %{ "movdl $dst, $src\n\t"
11700             "cvtdq2pdl $dst, $dst\t# i2d" %}
11701   ins_encode %{
11702     __ movdl($dst$$XMMRegister, $src$$Register);
11703     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11704   %}
11705   ins_pipe(pipe_slow); // XXX
11706 %}
11707 
11708 instruct convL2F_reg_reg(regF dst, rRegL src)
11709 %{
11710   match(Set dst (ConvL2F src));
11711 
11712   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11713   opcode(0xF3, 0x0F, 0x2A);
11714   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11715   ins_pipe(pipe_slow); // XXX
11716 %}
11717 
11718 instruct convL2F_reg_mem(regF dst, memory src)
11719 %{
11720   match(Set dst (ConvL2F (LoadL src)));
11721 
11722   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11723   opcode(0xF3, 0x0F, 0x2A);
11724   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11725   ins_pipe(pipe_slow); // XXX
11726 %}
11727 
11728 instruct convL2D_reg_reg(regD dst, rRegL src)
11729 %{
11730   match(Set dst (ConvL2D src));
11731 
11732   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11733   opcode(0xF2, 0x0F, 0x2A);
11734   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11735   ins_pipe(pipe_slow); // XXX
11736 %}
11737 
11738 instruct convL2D_reg_mem(regD dst, memory src)
11739 %{
11740   match(Set dst (ConvL2D (LoadL src)));
11741 
11742   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11743   opcode(0xF2, 0x0F, 0x2A);
11744   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11745   ins_pipe(pipe_slow); // XXX
11746 %}
11747 
11748 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11749 %{
11750   match(Set dst (ConvI2L src));
11751 
11752   ins_cost(125);
11753   format %{ "movslq  $dst, $src\t# i2l" %}
11754   ins_encode %{
11755     __ movslq($dst$$Register, $src$$Register);
11756   %}
11757   ins_pipe(ialu_reg_reg);
11758 %}
11759 
11760 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11761 // %{
11762 //   match(Set dst (ConvI2L src));
11763 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11764 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11765 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11766 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11767 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11768 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11769 
11770 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11771 //   ins_encode(enc_copy(dst, src));
11772 // //   opcode(0x63); // needs REX.W
11773 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11774 //   ins_pipe(ialu_reg_reg);
11775 // %}
11776 
11777 // Zero-extend convert int to long
11778 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11779 %{
11780   match(Set dst (AndL (ConvI2L src) mask));
11781 
11782   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11783   ins_encode(enc_copy(dst, src));
11784   ins_pipe(ialu_reg_reg);
11785 %}
11786 
11787 // Zero-extend convert int to long
11788 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11789 %{
11790   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11791 
11792   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11793   opcode(0x8B);
11794   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11795   ins_pipe(ialu_reg_mem);
11796 %}
11797 
11798 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11799 %{
11800   match(Set dst (AndL src mask));
11801 
11802   format %{ "movl    $dst, $src\t# zero-extend long" %}
11803   ins_encode(enc_copy_always(dst, src));
11804   ins_pipe(ialu_reg_reg);
11805 %}
11806 
11807 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11808 %{
11809   match(Set dst (ConvL2I src));
11810 
11811   format %{ "movl    $dst, $src\t# l2i" %}
11812   ins_encode(enc_copy_always(dst, src));
11813   ins_pipe(ialu_reg_reg);
11814 %}
11815 
11816 
11817 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11818   match(Set dst (MoveF2I src));
11819   effect(DEF dst, USE src);
11820 
11821   ins_cost(125);
11822   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11823   opcode(0x8B);
11824   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11825   ins_pipe(ialu_reg_mem);
11826 %}
11827 
11828 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11829   match(Set dst (MoveI2F src));
11830   effect(DEF dst, USE src);
11831 
11832   ins_cost(125);
11833   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11834   opcode(0xF3, 0x0F, 0x10);
11835   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11836   ins_pipe(pipe_slow);
11837 %}
11838 
11839 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11840   match(Set dst (MoveD2L src));
11841   effect(DEF dst, USE src);
11842 
11843   ins_cost(125);
11844   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11845   opcode(0x8B);
11846   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11847   ins_pipe(ialu_reg_mem);
11848 %}
11849 
11850 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11851   predicate(!UseXmmLoadAndClearUpper);
11852   match(Set dst (MoveL2D src));
11853   effect(DEF dst, USE src);
11854 
11855   ins_cost(125);
11856   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11857   opcode(0x66, 0x0F, 0x12);
11858   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11859   ins_pipe(pipe_slow);
11860 %}
11861 
11862 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11863   predicate(UseXmmLoadAndClearUpper);
11864   match(Set dst (MoveL2D src));
11865   effect(DEF dst, USE src);
11866 
11867   ins_cost(125);
11868   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11869   opcode(0xF2, 0x0F, 0x10);
11870   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11871   ins_pipe(pipe_slow);
11872 %}
11873 
11874 
11875 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11876   match(Set dst (MoveF2I src));
11877   effect(DEF dst, USE src);
11878 
11879   ins_cost(95); // XXX
11880   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11881   opcode(0xF3, 0x0F, 0x11);
11882   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11883   ins_pipe(pipe_slow);
11884 %}
11885 
11886 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11887   match(Set dst (MoveI2F src));
11888   effect(DEF dst, USE src);
11889 
11890   ins_cost(100);
11891   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11892   opcode(0x89);
11893   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11894   ins_pipe( ialu_mem_reg );
11895 %}
11896 
11897 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11898   match(Set dst (MoveD2L src));
11899   effect(DEF dst, USE src);
11900 
11901   ins_cost(95); // XXX
11902   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11903   opcode(0xF2, 0x0F, 0x11);
11904   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11905   ins_pipe(pipe_slow);
11906 %}
11907 
11908 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11909   match(Set dst (MoveL2D src));
11910   effect(DEF dst, USE src);
11911 
11912   ins_cost(100);
11913   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11914   opcode(0x89);
11915   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11916   ins_pipe(ialu_mem_reg);
11917 %}
11918 
11919 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11920   match(Set dst (MoveF2I src));
11921   effect(DEF dst, USE src);
11922   ins_cost(85);
11923   format %{ "movd    $dst,$src\t# MoveF2I" %}
11924   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11925   ins_pipe( pipe_slow );
11926 %}
11927 
11928 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11929   match(Set dst (MoveD2L src));
11930   effect(DEF dst, USE src);
11931   ins_cost(85);
11932   format %{ "movd    $dst,$src\t# MoveD2L" %}
11933   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11934   ins_pipe( pipe_slow );
11935 %}
11936 
11937 // The next instructions have long latency and use Int unit. Set high cost.
11938 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11939   match(Set dst (MoveI2F src));
11940   effect(DEF dst, USE src);
11941   ins_cost(300);
11942   format %{ "movd    $dst,$src\t# MoveI2F" %}
11943   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11944   ins_pipe( pipe_slow );
11945 %}
11946 
11947 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11948   match(Set dst (MoveL2D src));
11949   effect(DEF dst, USE src);
11950   ins_cost(300);
11951   format %{ "movd    $dst,$src\t# MoveL2D" %}
11952   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11953   ins_pipe( pipe_slow );
11954 %}
11955 
11956 // Replicate scalar to packed byte (1 byte) values in xmm
11957 instruct Repl8B_reg(regD dst, regD src) %{
11958   match(Set dst (Replicate8B src));
11959   format %{ "MOVDQA  $dst,$src\n\t"
11960             "PUNPCKLBW $dst,$dst\n\t"
11961             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11962   ins_encode( pshufd_8x8(dst, src));
11963   ins_pipe( pipe_slow );
11964 %}
11965 
11966 // Replicate scalar to packed byte (1 byte) values in xmm
11967 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11968   match(Set dst (Replicate8B src));
11969   format %{ "MOVD    $dst,$src\n\t"
11970             "PUNPCKLBW $dst,$dst\n\t"
11971             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11972   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11973   ins_pipe( pipe_slow );
11974 %}
11975 
11976 // Replicate scalar zero to packed byte (1 byte) values in xmm
11977 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11978   match(Set dst (Replicate8B zero));
11979   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11980   ins_encode( pxor(dst, dst));
11981   ins_pipe( fpu_reg_reg );
11982 %}
11983 
11984 // Replicate scalar to packed shore (2 byte) values in xmm
11985 instruct Repl4S_reg(regD dst, regD src) %{
11986   match(Set dst (Replicate4S src));
11987   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11988   ins_encode( pshufd_4x16(dst, src));
11989   ins_pipe( fpu_reg_reg );
11990 %}
11991 
11992 // Replicate scalar to packed shore (2 byte) values in xmm
11993 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11994   match(Set dst (Replicate4S src));
11995   format %{ "MOVD    $dst,$src\n\t"
11996             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11997   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11998   ins_pipe( fpu_reg_reg );
11999 %}
12000 
12001 // Replicate scalar zero to packed short (2 byte) values in xmm
12002 instruct Repl4S_immI0(regD dst, immI0 zero) %{
12003   match(Set dst (Replicate4S zero));
12004   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12005   ins_encode( pxor(dst, dst));
12006   ins_pipe( fpu_reg_reg );
12007 %}
12008 
12009 // Replicate scalar to packed char (2 byte) values in xmm
12010 instruct Repl4C_reg(regD dst, regD src) %{
12011   match(Set dst (Replicate4C src));
12012   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12013   ins_encode( pshufd_4x16(dst, src));
12014   ins_pipe( fpu_reg_reg );
12015 %}
12016 
12017 // Replicate scalar to packed char (2 byte) values in xmm
12018 instruct Repl4C_rRegI(regD dst, rRegI src) %{
12019   match(Set dst (Replicate4C src));
12020   format %{ "MOVD    $dst,$src\n\t"
12021             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12022   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12023   ins_pipe( fpu_reg_reg );
12024 %}
12025 
12026 // Replicate scalar zero to packed char (2 byte) values in xmm
12027 instruct Repl4C_immI0(regD dst, immI0 zero) %{
12028   match(Set dst (Replicate4C zero));
12029   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12030   ins_encode( pxor(dst, dst));
12031   ins_pipe( fpu_reg_reg );
12032 %}
12033 
12034 // Replicate scalar to packed integer (4 byte) values in xmm
12035 instruct Repl2I_reg(regD dst, regD src) %{
12036   match(Set dst (Replicate2I src));
12037   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12038   ins_encode( pshufd(dst, src, 0x00));
12039   ins_pipe( fpu_reg_reg );
12040 %}
12041 
12042 // Replicate scalar to packed integer (4 byte) values in xmm
12043 instruct Repl2I_rRegI(regD dst, rRegI src) %{
12044   match(Set dst (Replicate2I src));
12045   format %{ "MOVD   $dst,$src\n\t"
12046             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12047   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12048   ins_pipe( fpu_reg_reg );
12049 %}
12050 
12051 // Replicate scalar zero to packed integer (2 byte) values in xmm
12052 instruct Repl2I_immI0(regD dst, immI0 zero) %{
12053   match(Set dst (Replicate2I zero));
12054   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12055   ins_encode( pxor(dst, dst));
12056   ins_pipe( fpu_reg_reg );
12057 %}
12058 
12059 // Replicate scalar to packed single precision floating point values in xmm
12060 instruct Repl2F_reg(regD dst, regD src) %{
12061   match(Set dst (Replicate2F src));
12062   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12063   ins_encode( pshufd(dst, src, 0xe0));
12064   ins_pipe( fpu_reg_reg );
12065 %}
12066 
12067 // Replicate scalar to packed single precision floating point values in xmm
12068 instruct Repl2F_regF(regD dst, regF src) %{
12069   match(Set dst (Replicate2F src));
12070   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12071   ins_encode( pshufd(dst, src, 0xe0));
12072   ins_pipe( fpu_reg_reg );
12073 %}
12074 
12075 // Replicate scalar to packed single precision floating point values in xmm
12076 instruct Repl2F_immF0(regD dst, immF0 zero) %{
12077   match(Set dst (Replicate2F zero));
12078   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12079   ins_encode( pxor(dst, dst));
12080   ins_pipe( fpu_reg_reg );
12081 %}
12082 
12083 
12084 // =======================================================================
12085 // fast clearing of an array
12086 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
12087                   rFlagsReg cr)
12088 %{
12089   match(Set dummy (ClearArray cnt base));
12090   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12091 
12092   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
12093             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
12094   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
12095              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
12096   ins_pipe(pipe_slow);
12097 %}
12098 
12099 instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
12100                         rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
12101 %{
12102   match(Set result (StrComp str1 str2));
12103   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
12104   //ins_cost(300);
12105 
12106   format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
12107   ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
12108   ins_pipe( pipe_slow );
12109 %}
12110 
12111 instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
12112                         rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr)
12113 %{
12114   predicate(UseSSE42Intrinsics);
12115   match(Set result (StrIndexOf str1 str2));
12116   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
12117 
12118   format %{ "String IndexOf $str1,$str2 -> $result   // KILL RAX, RCX, RDX" %}
12119   ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
12120   ins_pipe( pipe_slow );
12121 %}
12122 
12123 // fast string equals
12124 instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3,
12125                        rcx_RegI tmp4, rax_RegI result, rFlagsReg cr)
12126 %{
12127   match(Set result (StrEquals str1 str2));
12128   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
12129 
12130   format %{ "String Equals $str1,$str2 -> $result    // KILL RBX, RCX" %}
12131   ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
12132   ins_pipe( pipe_slow );
12133 %}
12134 
12135 // fast array equals
12136 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3,
12137                       rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
12138 %{
12139   match(Set result (AryEq ary1 ary2));
12140   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12141   //ins_cost(300);
12142 
12143   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL RAX, RBX" %}
12144   ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
12145   ins_pipe( pipe_slow );
12146 %}
12147 
12148 //----------Control Flow Instructions------------------------------------------
12149 // Signed compare Instructions
12150 
12151 // XXX more variants!!
12152 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12153 %{
12154   match(Set cr (CmpI op1 op2));
12155   effect(DEF cr, USE op1, USE op2);
12156 
12157   format %{ "cmpl    $op1, $op2" %}
12158   opcode(0x3B);  /* Opcode 3B /r */
12159   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
12160   ins_pipe(ialu_cr_reg_reg);
12161 %}
12162 
12163 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12164 %{
12165   match(Set cr (CmpI op1 op2));
12166 
12167   format %{ "cmpl    $op1, $op2" %}
12168   opcode(0x81, 0x07); /* Opcode 81 /7 */
12169   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
12170   ins_pipe(ialu_cr_reg_imm);
12171 %}
12172 
12173 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12174 %{
12175   match(Set cr (CmpI op1 (LoadI op2)));
12176 
12177   ins_cost(500); // XXX
12178   format %{ "cmpl    $op1, $op2" %}
12179   opcode(0x3B); /* Opcode 3B /r */
12180   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
12181   ins_pipe(ialu_cr_reg_mem);
12182 %}
12183 
12184 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
12185 %{
12186   match(Set cr (CmpI src zero));
12187 
12188   format %{ "testl   $src, $src" %}
12189   opcode(0x85);
12190   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
12191   ins_pipe(ialu_cr_reg_imm);
12192 %}
12193 
12194 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
12195 %{
12196   match(Set cr (CmpI (AndI src con) zero));
12197 
12198   format %{ "testl   $src, $con" %}
12199   opcode(0xF7, 0x00);
12200   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
12201   ins_pipe(ialu_cr_reg_imm);
12202 %}
12203 
12204 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
12205 %{
12206   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12207 
12208   format %{ "testl   $src, $mem" %}
12209   opcode(0x85);
12210   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
12211   ins_pipe(ialu_cr_reg_mem);
12212 %}
12213 
12214 // Unsigned compare Instructions; really, same as signed except they
12215 // produce an rFlagsRegU instead of rFlagsReg.
12216 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12217 %{
12218   match(Set cr (CmpU op1 op2));
12219 
12220   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12221   opcode(0x3B); /* Opcode 3B /r */
12222   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
12223   ins_pipe(ialu_cr_reg_reg);
12224 %}
12225 
12226 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12227 %{
12228   match(Set cr (CmpU op1 op2));
12229 
12230   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12231   opcode(0x81,0x07); /* Opcode 81 /7 */
12232   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
12233   ins_pipe(ialu_cr_reg_imm);
12234 %}
12235 
12236 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12237 %{
12238   match(Set cr (CmpU op1 (LoadI op2)));
12239 
12240   ins_cost(500); // XXX
12241   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12242   opcode(0x3B); /* Opcode 3B /r */
12243   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
12244   ins_pipe(ialu_cr_reg_mem);
12245 %}
12246 
12247 // // // Cisc-spilled version of cmpU_rReg
12248 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12249 // //%{
12250 // //  match(Set cr (CmpU (LoadI op1) op2));
12251 // //
12252 // //  format %{ "CMPu   $op1,$op2" %}
12253 // //  ins_cost(500);
12254 // //  opcode(0x39);  /* Opcode 39 /r */
12255 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12256 // //%}
12257 
12258 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
12259 %{
12260   match(Set cr (CmpU src zero));
12261 
12262   format %{ "testl  $src, $src\t# unsigned" %}
12263   opcode(0x85);
12264   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
12265   ins_pipe(ialu_cr_reg_imm);
12266 %}
12267 
12268 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12269 %{
12270   match(Set cr (CmpP op1 op2));
12271 
12272   format %{ "cmpq    $op1, $op2\t# ptr" %}
12273   opcode(0x3B); /* Opcode 3B /r */
12274   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12275   ins_pipe(ialu_cr_reg_reg);
12276 %}
12277 
12278 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12279 %{
12280   match(Set cr (CmpP op1 (LoadP op2)));
12281 
12282   ins_cost(500); // XXX
12283   format %{ "cmpq    $op1, $op2\t# ptr" %}
12284   opcode(0x3B); /* Opcode 3B /r */
12285   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12286   ins_pipe(ialu_cr_reg_mem);
12287 %}
12288 
12289 // // // Cisc-spilled version of cmpP_rReg
12290 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12291 // //%{
12292 // //  match(Set cr (CmpP (LoadP op1) op2));
12293 // //
12294 // //  format %{ "CMPu   $op1,$op2" %}
12295 // //  ins_cost(500);
12296 // //  opcode(0x39);  /* Opcode 39 /r */
12297 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12298 // //%}
12299 
12300 // XXX this is generalized by compP_rReg_mem???
12301 // Compare raw pointer (used in out-of-heap check).
12302 // Only works because non-oop pointers must be raw pointers
12303 // and raw pointers have no anti-dependencies.
12304 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12305 %{
12306   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
12307   match(Set cr (CmpP op1 (LoadP op2)));
12308 
12309   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12310   opcode(0x3B); /* Opcode 3B /r */
12311   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12312   ins_pipe(ialu_cr_reg_mem);
12313 %}
12314 
12315 // This will generate a signed flags result. This should be OK since
12316 // any compare to a zero should be eq/neq.
12317 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12318 %{
12319   match(Set cr (CmpP src zero));
12320 
12321   format %{ "testq   $src, $src\t# ptr" %}
12322   opcode(0x85);
12323   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12324   ins_pipe(ialu_cr_reg_imm);
12325 %}
12326 
12327 // This will generate a signed flags result. This should be OK since
12328 // any compare to a zero should be eq/neq.
12329 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12330 %{
12331   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
12332   match(Set cr (CmpP (LoadP op) zero));
12333 
12334   ins_cost(500); // XXX
12335   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12336   opcode(0xF7); /* Opcode F7 /0 */
12337   ins_encode(REX_mem_wide(op),
12338              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
12339   ins_pipe(ialu_cr_reg_imm);
12340 %}
12341 
12342 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12343 %{
12344   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
12345   match(Set cr (CmpP (LoadP mem) zero));
12346 
12347   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12348   ins_encode %{
12349     __ cmpq(r12, $mem$$Address);
12350   %}
12351   ins_pipe(ialu_cr_reg_mem);
12352 %}
12353 
12354 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12355 %{
12356   match(Set cr (CmpN op1 op2));
12357 
12358   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12359   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12360   ins_pipe(ialu_cr_reg_reg);
12361 %}
12362 
12363 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12364 %{
12365   match(Set cr (CmpN src (LoadN mem)));
12366 
12367   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12368   ins_encode %{
12369     __ cmpl($src$$Register, $mem$$Address);
12370   %}
12371   ins_pipe(ialu_cr_reg_mem);
12372 %}
12373 
12374 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12375   match(Set cr (CmpN op1 op2));
12376 
12377   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12378   ins_encode %{
12379     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12380   %}
12381   ins_pipe(ialu_cr_reg_imm);
12382 %}
12383 
12384 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12385 %{
12386   match(Set cr (CmpN src (LoadN mem)));
12387 
12388   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12389   ins_encode %{
12390     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12391   %}
12392   ins_pipe(ialu_cr_reg_mem);
12393 %}
12394 
12395 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12396   match(Set cr (CmpN src zero));
12397 
12398   format %{ "testl   $src, $src\t# compressed ptr" %}
12399   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12400   ins_pipe(ialu_cr_reg_imm);
12401 %}
12402 
12403 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12404 %{
12405   predicate(Universe::narrow_oop_base() != NULL);
12406   match(Set cr (CmpN (LoadN mem) zero));
12407 
12408   ins_cost(500); // XXX
12409   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12410   ins_encode %{
12411     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12412   %}
12413   ins_pipe(ialu_cr_reg_mem);
12414 %}
12415 
12416 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12417 %{
12418   predicate(Universe::narrow_oop_base() == NULL);
12419   match(Set cr (CmpN (LoadN mem) zero));
12420 
12421   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12422   ins_encode %{
12423     __ cmpl(r12, $mem$$Address);
12424   %}
12425   ins_pipe(ialu_cr_reg_mem);
12426 %}
12427 
12428 // Yanked all unsigned pointer compare operations.
12429 // Pointer compares are done with CmpP which is already unsigned.
12430 
12431 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12432 %{
12433   match(Set cr (CmpL op1 op2));
12434 
12435   format %{ "cmpq    $op1, $op2" %}
12436   opcode(0x3B);  /* Opcode 3B /r */
12437   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12438   ins_pipe(ialu_cr_reg_reg);
12439 %}
12440 
12441 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12442 %{
12443   match(Set cr (CmpL op1 op2));
12444 
12445   format %{ "cmpq    $op1, $op2" %}
12446   opcode(0x81, 0x07); /* Opcode 81 /7 */
12447   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12448   ins_pipe(ialu_cr_reg_imm);
12449 %}
12450 
12451 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12452 %{
12453   match(Set cr (CmpL op1 (LoadL op2)));
12454 
12455   format %{ "cmpq    $op1, $op2" %}
12456   opcode(0x3B); /* Opcode 3B /r */
12457   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12458   ins_pipe(ialu_cr_reg_mem);
12459 %}
12460 
12461 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12462 %{
12463   match(Set cr (CmpL src zero));
12464 
12465   format %{ "testq   $src, $src" %}
12466   opcode(0x85);
12467   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12468   ins_pipe(ialu_cr_reg_imm);
12469 %}
12470 
12471 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12472 %{
12473   match(Set cr (CmpL (AndL src con) zero));
12474 
12475   format %{ "testq   $src, $con\t# long" %}
12476   opcode(0xF7, 0x00);
12477   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12478   ins_pipe(ialu_cr_reg_imm);
12479 %}
12480 
12481 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12482 %{
12483   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12484 
12485   format %{ "testq   $src, $mem" %}
12486   opcode(0x85);
12487   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12488   ins_pipe(ialu_cr_reg_mem);
12489 %}
12490 
12491 // Manifest a CmpL result in an integer register.  Very painful.
12492 // This is the test to avoid.
12493 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12494 %{
12495   match(Set dst (CmpL3 src1 src2));
12496   effect(KILL flags);
12497 
12498   ins_cost(275); // XXX
12499   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12500             "movl    $dst, -1\n\t"
12501             "jl,s    done\n\t"
12502             "setne   $dst\n\t"
12503             "movzbl  $dst, $dst\n\t"
12504     "done:" %}
12505   ins_encode(cmpl3_flag(src1, src2, dst));
12506   ins_pipe(pipe_slow);
12507 %}
12508 
12509 //----------Max and Min--------------------------------------------------------
12510 // Min Instructions
12511 
12512 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12513 %{
12514   effect(USE_DEF dst, USE src, USE cr);
12515 
12516   format %{ "cmovlgt $dst, $src\t# min" %}
12517   opcode(0x0F, 0x4F);
12518   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12519   ins_pipe(pipe_cmov_reg);
12520 %}
12521 
12522 
12523 instruct minI_rReg(rRegI dst, rRegI src)
12524 %{
12525   match(Set dst (MinI dst src));
12526 
12527   ins_cost(200);
12528   expand %{
12529     rFlagsReg cr;
12530     compI_rReg(cr, dst, src);
12531     cmovI_reg_g(dst, src, cr);
12532   %}
12533 %}
12534 
12535 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12536 %{
12537   effect(USE_DEF dst, USE src, USE cr);
12538 
12539   format %{ "cmovllt $dst, $src\t# max" %}
12540   opcode(0x0F, 0x4C);
12541   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12542   ins_pipe(pipe_cmov_reg);
12543 %}
12544 
12545 
12546 instruct maxI_rReg(rRegI dst, rRegI src)
12547 %{
12548   match(Set dst (MaxI dst src));
12549 
12550   ins_cost(200);
12551   expand %{
12552     rFlagsReg cr;
12553     compI_rReg(cr, dst, src);
12554     cmovI_reg_l(dst, src, cr);
12555   %}
12556 %}
12557 
12558 // ============================================================================
12559 // Branch Instructions
12560 
12561 // Jump Direct - Label defines a relative address from JMP+1
12562 instruct jmpDir(label labl)
12563 %{
12564   match(Goto);
12565   effect(USE labl);
12566 
12567   ins_cost(300);
12568   format %{ "jmp     $labl" %}
12569   size(5);
12570   opcode(0xE9);
12571   ins_encode(OpcP, Lbl(labl));
12572   ins_pipe(pipe_jmp);
12573   ins_pc_relative(1);
12574 %}
12575 
12576 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12577 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12578 %{
12579   match(If cop cr);
12580   effect(USE labl);
12581 
12582   ins_cost(300);
12583   format %{ "j$cop     $labl" %}
12584   size(6);
12585   opcode(0x0F, 0x80);
12586   ins_encode(Jcc(cop, labl));
12587   ins_pipe(pipe_jcc);
12588   ins_pc_relative(1);
12589 %}
12590 
12591 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12592 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12593 %{
12594   match(CountedLoopEnd cop cr);
12595   effect(USE labl);
12596 
12597   ins_cost(300);
12598   format %{ "j$cop     $labl\t# loop end" %}
12599   size(6);
12600   opcode(0x0F, 0x80);
12601   ins_encode(Jcc(cop, labl));
12602   ins_pipe(pipe_jcc);
12603   ins_pc_relative(1);
12604 %}
12605 
12606 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12607 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12608   match(CountedLoopEnd cop cmp);
12609   effect(USE labl);
12610 
12611   ins_cost(300);
12612   format %{ "j$cop,u   $labl\t# loop end" %}
12613   size(6);
12614   opcode(0x0F, 0x80);
12615   ins_encode(Jcc(cop, labl));
12616   ins_pipe(pipe_jcc);
12617   ins_pc_relative(1);
12618 %}
12619 
12620 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12621   match(CountedLoopEnd cop cmp);
12622   effect(USE labl);
12623 
12624   ins_cost(200);
12625   format %{ "j$cop,u   $labl\t# loop end" %}
12626   size(6);
12627   opcode(0x0F, 0x80);
12628   ins_encode(Jcc(cop, labl));
12629   ins_pipe(pipe_jcc);
12630   ins_pc_relative(1);
12631 %}
12632 
12633 // Jump Direct Conditional - using unsigned comparison
12634 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12635   match(If cop cmp);
12636   effect(USE labl);
12637 
12638   ins_cost(300);
12639   format %{ "j$cop,u  $labl" %}
12640   size(6);
12641   opcode(0x0F, 0x80);
12642   ins_encode(Jcc(cop, labl));
12643   ins_pipe(pipe_jcc);
12644   ins_pc_relative(1);
12645 %}
12646 
12647 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12648   match(If cop cmp);
12649   effect(USE labl);
12650 
12651   ins_cost(200);
12652   format %{ "j$cop,u  $labl" %}
12653   size(6);
12654   opcode(0x0F, 0x80);
12655   ins_encode(Jcc(cop, labl));
12656   ins_pipe(pipe_jcc);
12657   ins_pc_relative(1);
12658 %}
12659 
12660 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12661   match(If cop cmp);
12662   effect(USE labl);
12663 
12664   ins_cost(200);
12665   format %{ $$template
12666     if ($cop$$cmpcode == Assembler::notEqual) {
12667       $$emit$$"jp,u   $labl\n\t"
12668       $$emit$$"j$cop,u   $labl"
12669     } else {
12670       $$emit$$"jp,u   done\n\t"
12671       $$emit$$"j$cop,u   $labl\n\t"
12672       $$emit$$"done:"
12673     }
12674   %}
12675   size(12);
12676   opcode(0x0F, 0x80);
12677   ins_encode %{
12678     Label* l = $labl$$label;
12679     $$$emit8$primary;
12680     emit_cc(cbuf, $secondary, Assembler::parity);
12681     int parity_disp = -1;
12682     if ($cop$$cmpcode == Assembler::notEqual) {
12683        // the two jumps 6 bytes apart so the jump distances are too
12684        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12685     } else if ($cop$$cmpcode == Assembler::equal) {
12686        parity_disp = 6;
12687     } else {
12688        ShouldNotReachHere();
12689     }
12690     emit_d32(cbuf, parity_disp);
12691     $$$emit8$primary;
12692     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12693     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12694     emit_d32(cbuf, disp);
12695   %}
12696   ins_pipe(pipe_jcc);
12697   ins_pc_relative(1);
12698 %}
12699 
12700 // ============================================================================
12701 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12702 // superklass array for an instance of the superklass.  Set a hidden
12703 // internal cache on a hit (cache is checked with exposed code in
12704 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12705 // encoding ALSO sets flags.
12706 
12707 instruct partialSubtypeCheck(rdi_RegP result,
12708                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12709                              rFlagsReg cr)
12710 %{
12711   match(Set result (PartialSubtypeCheck sub super));
12712   effect(KILL rcx, KILL cr);
12713 
12714   ins_cost(1100);  // slightly larger than the next version
12715   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12716             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12717             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12718             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12719             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12720             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12721             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12722     "miss:\t" %}
12723 
12724   opcode(0x1); // Force a XOR of RDI
12725   ins_encode(enc_PartialSubtypeCheck());
12726   ins_pipe(pipe_slow);
12727 %}
12728 
12729 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12730                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12731                                      immP0 zero,
12732                                      rdi_RegP result)
12733 %{
12734   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12735   effect(KILL rcx, KILL result);
12736 
12737   ins_cost(1000);
12738   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12739             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12740             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12741             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12742             "jne,s   miss\t\t# Missed: flags nz\n\t"
12743             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12744     "miss:\t" %}
12745 
12746   opcode(0x0); // No need to XOR RDI
12747   ins_encode(enc_PartialSubtypeCheck());
12748   ins_pipe(pipe_slow);
12749 %}
12750 
12751 // ============================================================================
12752 // Branch Instructions -- short offset versions
12753 //
12754 // These instructions are used to replace jumps of a long offset (the default
12755 // match) with jumps of a shorter offset.  These instructions are all tagged
12756 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12757 // match rules in general matching.  Instead, the ADLC generates a conversion
12758 // method in the MachNode which can be used to do in-place replacement of the
12759 // long variant with the shorter variant.  The compiler will determine if a
12760 // branch can be taken by the is_short_branch_offset() predicate in the machine
12761 // specific code section of the file.
12762 
12763 // Jump Direct - Label defines a relative address from JMP+1
12764 instruct jmpDir_short(label labl) %{
12765   match(Goto);
12766   effect(USE labl);
12767 
12768   ins_cost(300);
12769   format %{ "jmp,s   $labl" %}
12770   size(2);
12771   opcode(0xEB);
12772   ins_encode(OpcP, LblShort(labl));
12773   ins_pipe(pipe_jmp);
12774   ins_pc_relative(1);
12775   ins_short_branch(1);
12776 %}
12777 
12778 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12779 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12780   match(If cop cr);
12781   effect(USE labl);
12782 
12783   ins_cost(300);
12784   format %{ "j$cop,s   $labl" %}
12785   size(2);
12786   opcode(0x70);
12787   ins_encode(JccShort(cop, labl));
12788   ins_pipe(pipe_jcc);
12789   ins_pc_relative(1);
12790   ins_short_branch(1);
12791 %}
12792 
12793 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12794 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12795   match(CountedLoopEnd cop cr);
12796   effect(USE labl);
12797 
12798   ins_cost(300);
12799   format %{ "j$cop,s   $labl\t# loop end" %}
12800   size(2);
12801   opcode(0x70);
12802   ins_encode(JccShort(cop, labl));
12803   ins_pipe(pipe_jcc);
12804   ins_pc_relative(1);
12805   ins_short_branch(1);
12806 %}
12807 
12808 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12809 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12810   match(CountedLoopEnd cop cmp);
12811   effect(USE labl);
12812 
12813   ins_cost(300);
12814   format %{ "j$cop,us  $labl\t# loop end" %}
12815   size(2);
12816   opcode(0x70);
12817   ins_encode(JccShort(cop, labl));
12818   ins_pipe(pipe_jcc);
12819   ins_pc_relative(1);
12820   ins_short_branch(1);
12821 %}
12822 
12823 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12824   match(CountedLoopEnd cop cmp);
12825   effect(USE labl);
12826 
12827   ins_cost(300);
12828   format %{ "j$cop,us  $labl\t# loop end" %}
12829   size(2);
12830   opcode(0x70);
12831   ins_encode(JccShort(cop, labl));
12832   ins_pipe(pipe_jcc);
12833   ins_pc_relative(1);
12834   ins_short_branch(1);
12835 %}
12836 
12837 // Jump Direct Conditional - using unsigned comparison
12838 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12839   match(If cop cmp);
12840   effect(USE labl);
12841 
12842   ins_cost(300);
12843   format %{ "j$cop,us  $labl" %}
12844   size(2);
12845   opcode(0x70);
12846   ins_encode(JccShort(cop, labl));
12847   ins_pipe(pipe_jcc);
12848   ins_pc_relative(1);
12849   ins_short_branch(1);
12850 %}
12851 
12852 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12853   match(If cop cmp);
12854   effect(USE labl);
12855 
12856   ins_cost(300);
12857   format %{ "j$cop,us  $labl" %}
12858   size(2);
12859   opcode(0x70);
12860   ins_encode(JccShort(cop, labl));
12861   ins_pipe(pipe_jcc);
12862   ins_pc_relative(1);
12863   ins_short_branch(1);
12864 %}
12865 
12866 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12867   match(If cop cmp);
12868   effect(USE labl);
12869 
12870   ins_cost(300);
12871   format %{ $$template
12872     if ($cop$$cmpcode == Assembler::notEqual) {
12873       $$emit$$"jp,u,s   $labl\n\t"
12874       $$emit$$"j$cop,u,s   $labl"
12875     } else {
12876       $$emit$$"jp,u,s   done\n\t"
12877       $$emit$$"j$cop,u,s  $labl\n\t"
12878       $$emit$$"done:"
12879     }
12880   %}
12881   size(4);
12882   opcode(0x70);
12883   ins_encode %{
12884     Label* l = $labl$$label;
12885     emit_cc(cbuf, $primary, Assembler::parity);
12886     int parity_disp = -1;
12887     if ($cop$$cmpcode == Assembler::notEqual) {
12888       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12889     } else if ($cop$$cmpcode == Assembler::equal) {
12890       parity_disp = 2;
12891     } else {
12892       ShouldNotReachHere();
12893     }
12894     emit_d8(cbuf, parity_disp);
12895     emit_cc(cbuf, $primary, $cop$$cmpcode);
12896     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12897     emit_d8(cbuf, disp);
12898     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12899     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12900   %}
12901   ins_pipe(pipe_jcc);
12902   ins_pc_relative(1);
12903   ins_short_branch(1);
12904 %}
12905 
12906 // ============================================================================
12907 // inlined locking and unlocking
12908 
12909 instruct cmpFastLock(rFlagsReg cr,
12910                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12911 %{
12912   match(Set cr (FastLock object box));
12913   effect(TEMP tmp, TEMP scr);
12914 
12915   ins_cost(300);
12916   format %{ "fastlock $object,$box,$tmp,$scr" %}
12917   ins_encode(Fast_Lock(object, box, tmp, scr));
12918   ins_pipe(pipe_slow);
12919   ins_pc_relative(1);
12920 %}
12921 
12922 instruct cmpFastUnlock(rFlagsReg cr,
12923                        rRegP object, rax_RegP box, rRegP tmp)
12924 %{
12925   match(Set cr (FastUnlock object box));
12926   effect(TEMP tmp);
12927 
12928   ins_cost(300);
12929   format %{ "fastunlock $object, $box, $tmp" %}
12930   ins_encode(Fast_Unlock(object, box, tmp));
12931   ins_pipe(pipe_slow);
12932   ins_pc_relative(1);
12933 %}
12934 
12935 
12936 // ============================================================================
12937 // Safepoint Instructions
12938 instruct safePoint_poll(rFlagsReg cr)
12939 %{
12940   match(SafePoint);
12941   effect(KILL cr);
12942 
12943   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12944             "# Safepoint: poll for GC" %}
12945   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12946   ins_cost(125);
12947   ins_encode(enc_safepoint_poll);
12948   ins_pipe(ialu_reg_mem);
12949 %}
12950 
12951 // ============================================================================
12952 // Procedure Call/Return Instructions
12953 // Call Java Static Instruction
12954 // Note: If this code changes, the corresponding ret_addr_offset() and
12955 //       compute_padding() functions will have to be adjusted.
12956 instruct CallStaticJavaDirect(method meth)
12957 %{
12958   match(CallStaticJava);
12959   effect(USE meth);
12960 
12961   ins_cost(300);
12962   format %{ "call,static " %}
12963   opcode(0xE8); /* E8 cd */
12964   ins_encode(Java_Static_Call(meth), call_epilog);
12965   ins_pipe(pipe_slow);
12966   ins_pc_relative(1);
12967   ins_alignment(4);
12968 %}
12969 
12970 // Call Java Dynamic Instruction
12971 // Note: If this code changes, the corresponding ret_addr_offset() and
12972 //       compute_padding() functions will have to be adjusted.
12973 instruct CallDynamicJavaDirect(method meth)
12974 %{
12975   match(CallDynamicJava);
12976   effect(USE meth);
12977 
12978   ins_cost(300);
12979   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12980             "call,dynamic " %}
12981   opcode(0xE8); /* E8 cd */
12982   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12983   ins_pipe(pipe_slow);
12984   ins_pc_relative(1);
12985   ins_alignment(4);
12986 %}
12987 
12988 // Call Runtime Instruction
12989 instruct CallRuntimeDirect(method meth)
12990 %{
12991   match(CallRuntime);
12992   effect(USE meth);
12993 
12994   ins_cost(300);
12995   format %{ "call,runtime " %}
12996   opcode(0xE8); /* E8 cd */
12997   ins_encode(Java_To_Runtime(meth));
12998   ins_pipe(pipe_slow);
12999   ins_pc_relative(1);
13000 %}
13001 
13002 // Call runtime without safepoint
13003 instruct CallLeafDirect(method meth)
13004 %{
13005   match(CallLeaf);
13006   effect(USE meth);
13007 
13008   ins_cost(300);
13009   format %{ "call_leaf,runtime " %}
13010   opcode(0xE8); /* E8 cd */
13011   ins_encode(Java_To_Runtime(meth));
13012   ins_pipe(pipe_slow);
13013   ins_pc_relative(1);
13014 %}
13015 
13016 // Call runtime without safepoint
13017 instruct CallLeafNoFPDirect(method meth)
13018 %{
13019   match(CallLeafNoFP);
13020   effect(USE meth);
13021 
13022   ins_cost(300);
13023   format %{ "call_leaf_nofp,runtime " %}
13024   opcode(0xE8); /* E8 cd */
13025   ins_encode(Java_To_Runtime(meth));
13026   ins_pipe(pipe_slow);
13027   ins_pc_relative(1);
13028 %}
13029 
13030 // Return Instruction
13031 // Remove the return address & jump to it.
13032 // Notice: We always emit a nop after a ret to make sure there is room
13033 // for safepoint patching
13034 instruct Ret()
13035 %{
13036   match(Return);
13037 
13038   format %{ "ret" %}
13039   opcode(0xC3);
13040   ins_encode(OpcP);
13041   ins_pipe(pipe_jmp);
13042 %}
13043 
13044 // Tail Call; Jump from runtime stub to Java code.
13045 // Also known as an 'interprocedural jump'.
13046 // Target of jump will eventually return to caller.
13047 // TailJump below removes the return address.
13048 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
13049 %{
13050   match(TailCall jump_target method_oop);
13051 
13052   ins_cost(300);
13053   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
13054   opcode(0xFF, 0x4); /* Opcode FF /4 */
13055   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
13056   ins_pipe(pipe_jmp);
13057 %}
13058 
13059 // Tail Jump; remove the return address; jump to target.
13060 // TailCall above leaves the return address around.
13061 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13062 %{
13063   match(TailJump jump_target ex_oop);
13064 
13065   ins_cost(300);
13066   format %{ "popq    rdx\t# pop return address\n\t"
13067             "jmp     $jump_target" %}
13068   opcode(0xFF, 0x4); /* Opcode FF /4 */
13069   ins_encode(Opcode(0x5a), // popq rdx
13070              REX_reg(jump_target), OpcP, reg_opc(jump_target));
13071   ins_pipe(pipe_jmp);
13072 %}
13073 
13074 // Create exception oop: created by stack-crawling runtime code.
13075 // Created exception is now available to this handler, and is setup
13076 // just prior to jumping to this handler.  No code emitted.
13077 instruct CreateException(rax_RegP ex_oop)
13078 %{
13079   match(Set ex_oop (CreateEx));
13080 
13081   size(0);
13082   // use the following format syntax
13083   format %{ "# exception oop is in rax; no code emitted" %}
13084   ins_encode();
13085   ins_pipe(empty);
13086 %}
13087 
13088 // Rethrow exception:
13089 // The exception oop will come in the first argument position.
13090 // Then JUMP (not call) to the rethrow stub code.
13091 instruct RethrowException()
13092 %{
13093   match(Rethrow);
13094 
13095   // use the following format syntax
13096   format %{ "jmp     rethrow_stub" %}
13097   ins_encode(enc_rethrow);
13098   ins_pipe(pipe_jmp);
13099 %}
13100 
13101 
13102 //----------PEEPHOLE RULES-----------------------------------------------------
13103 // These must follow all instruction definitions as they use the names
13104 // defined in the instructions definitions.
13105 //
13106 // peepmatch ( root_instr_name [preceding_instruction]* );
13107 //
13108 // peepconstraint %{
13109 // (instruction_number.operand_name relational_op instruction_number.operand_name
13110 //  [, ...] );
13111 // // instruction numbers are zero-based using left to right order in peepmatch
13112 //
13113 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13114 // // provide an instruction_number.operand_name for each operand that appears
13115 // // in the replacement instruction's match rule
13116 //
13117 // ---------VM FLAGS---------------------------------------------------------
13118 //
13119 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13120 //
13121 // Each peephole rule is given an identifying number starting with zero and
13122 // increasing by one in the order seen by the parser.  An individual peephole
13123 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13124 // on the command-line.
13125 //
13126 // ---------CURRENT LIMITATIONS----------------------------------------------
13127 //
13128 // Only match adjacent instructions in same basic block
13129 // Only equality constraints
13130 // Only constraints between operands, not (0.dest_reg == RAX_enc)
13131 // Only one replacement instruction
13132 //
13133 // ---------EXAMPLE----------------------------------------------------------
13134 //
13135 // // pertinent parts of existing instructions in architecture description
13136 // instruct movI(rRegI dst, rRegI src)
13137 // %{
13138 //   match(Set dst (CopyI src));
13139 // %}
13140 //
13141 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
13142 // %{
13143 //   match(Set dst (AddI dst src));
13144 //   effect(KILL cr);
13145 // %}
13146 //
13147 // // Change (inc mov) to lea
13148 // peephole %{
13149 //   // increment preceeded by register-register move
13150 //   peepmatch ( incI_rReg movI );
13151 //   // require that the destination register of the increment
13152 //   // match the destination register of the move
13153 //   peepconstraint ( 0.dst == 1.dst );
13154 //   // construct a replacement instruction that sets
13155 //   // the destination to ( move's source register + one )
13156 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13157 // %}
13158 //
13159 
13160 // Implementation no longer uses movX instructions since
13161 // machine-independent system no longer uses CopyX nodes.
13162 //
13163 // peephole
13164 // %{
13165 //   peepmatch (incI_rReg movI);
13166 //   peepconstraint (0.dst == 1.dst);
13167 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13168 // %}
13169 
13170 // peephole
13171 // %{
13172 //   peepmatch (decI_rReg movI);
13173 //   peepconstraint (0.dst == 1.dst);
13174 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13175 // %}
13176 
13177 // peephole
13178 // %{
13179 //   peepmatch (addI_rReg_imm movI);
13180 //   peepconstraint (0.dst == 1.dst);
13181 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13182 // %}
13183 
13184 // peephole
13185 // %{
13186 //   peepmatch (incL_rReg movL);
13187 //   peepconstraint (0.dst == 1.dst);
13188 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13189 // %}
13190 
13191 // peephole
13192 // %{
13193 //   peepmatch (decL_rReg movL);
13194 //   peepconstraint (0.dst == 1.dst);
13195 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13196 // %}
13197 
13198 // peephole
13199 // %{
13200 //   peepmatch (addL_rReg_imm movL);
13201 //   peepconstraint (0.dst == 1.dst);
13202 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13203 // %}
13204 
13205 // peephole
13206 // %{
13207 //   peepmatch (addP_rReg_imm movP);
13208 //   peepconstraint (0.dst == 1.dst);
13209 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
13210 // %}
13211 
13212 // // Change load of spilled value to only a spill
13213 // instruct storeI(memory mem, rRegI src)
13214 // %{
13215 //   match(Set mem (StoreI mem src));
13216 // %}
13217 //
13218 // instruct loadI(rRegI dst, memory mem)
13219 // %{
13220 //   match(Set dst (LoadI mem));
13221 // %}
13222 //
13223 
13224 peephole
13225 %{
13226   peepmatch (loadI storeI);
13227   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13228   peepreplace (storeI(1.mem 1.mem 1.src));
13229 %}
13230 
13231 peephole
13232 %{
13233   peepmatch (loadL storeL);
13234   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13235   peepreplace (storeL(1.mem 1.mem 1.src));
13236 %}
13237 
13238 //----------SMARTSPILL RULES---------------------------------------------------
13239 // These must follow all instruction definitions as they use the names
13240 // defined in the instructions definitions.