1 //
   2 // Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input.
 578 // Since amd64 does not have absolute addressing but RIP-relative
 579 // addressing and the polling page is within 2G, it doesn't.
 580 bool SafePointNode::needs_polling_address_input()
 581 {
 582   return false;
 583 }
 584 
 585 //
 586 // Compute padding required for nodes which need alignment
 587 //
 588 
 589 // The address of the call instruction needs to be 4-byte aligned to
 590 // ensure that it does not span a cache line so that it can be patched.
 591 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 592 {
 593   current_offset += 1; // skip call opcode byte
 594   return round_to(current_offset, alignment_required()) - current_offset;
 595 }
 596 
 597 // The address of the call instruction needs to be 4-byte aligned to
 598 // ensure that it does not span a cache line so that it can be patched.
 599 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 600 {
 601   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 602   current_offset += 1; // skip call opcode byte
 603   return round_to(current_offset, alignment_required()) - current_offset;
 604 }
 605 
 606 // The address of the call instruction needs to be 4-byte aligned to
 607 // ensure that it does not span a cache line so that it can be patched.
 608 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 609 {
 610   current_offset += 11; // skip movq instruction + call opcode byte
 611   return round_to(current_offset, alignment_required()) - current_offset;
 612 }
 613 
 614 #ifndef PRODUCT
 615 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 616 {
 617   st->print("INT3");
 618 }
 619 #endif
 620 
 621 // EMIT_RM()
 622 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 623 {
 624   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 625   *(cbuf.code_end()) = c;
 626   cbuf.set_code_end(cbuf.code_end() + 1);
 627 }
 628 
 629 // EMIT_CC()
 630 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 631 {
 632   unsigned char c = (unsigned char) (f1 | f2);
 633   *(cbuf.code_end()) = c;
 634   cbuf.set_code_end(cbuf.code_end() + 1);
 635 }
 636 
 637 // EMIT_OPCODE()
 638 void emit_opcode(CodeBuffer &cbuf, int code)
 639 {
 640   *(cbuf.code_end()) = (unsigned char) code;
 641   cbuf.set_code_end(cbuf.code_end() + 1);
 642 }
 643 
 644 // EMIT_OPCODE() w/ relocation information
 645 void emit_opcode(CodeBuffer &cbuf,
 646                  int code, relocInfo::relocType reloc, int offset, int format)
 647 {
 648   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 649   emit_opcode(cbuf, code);
 650 }
 651 
 652 // EMIT_D8()
 653 void emit_d8(CodeBuffer &cbuf, int d8)
 654 {
 655   *(cbuf.code_end()) = (unsigned char) d8;
 656   cbuf.set_code_end(cbuf.code_end() + 1);
 657 }
 658 
 659 // EMIT_D16()
 660 void emit_d16(CodeBuffer &cbuf, int d16)
 661 {
 662   *((short *)(cbuf.code_end())) = d16;
 663   cbuf.set_code_end(cbuf.code_end() + 2);
 664 }
 665 
 666 // EMIT_D32()
 667 void emit_d32(CodeBuffer &cbuf, int d32)
 668 {
 669   *((int *)(cbuf.code_end())) = d32;
 670   cbuf.set_code_end(cbuf.code_end() + 4);
 671 }
 672 
 673 // EMIT_D64()
 674 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 675 {
 676   *((int64_t*) (cbuf.code_end())) = d64;
 677   cbuf.set_code_end(cbuf.code_end() + 8);
 678 }
 679 
 680 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 681 void emit_d32_reloc(CodeBuffer& cbuf,
 682                     int d32,
 683                     relocInfo::relocType reloc,
 684                     int format)
 685 {
 686   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 687   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 688 
 689   *((int*) (cbuf.code_end())) = d32;
 690   cbuf.set_code_end(cbuf.code_end() + 4);
 691 }
 692 
 693 // emit 32 bit value and construct relocation entry from RelocationHolder
 694 void emit_d32_reloc(CodeBuffer& cbuf,
 695                     int d32,
 696                     RelocationHolder const& rspec,
 697                     int format)
 698 {
 699 #ifdef ASSERT
 700   if (rspec.reloc()->type() == relocInfo::oop_type &&
 701       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 702     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 703   }
 704 #endif
 705   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 706 
 707   *((int* )(cbuf.code_end())) = d32;
 708   cbuf.set_code_end(cbuf.code_end() + 4);
 709 }
 710 
 711 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 712   address next_ip = cbuf.code_end() + 4;
 713   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 714                  external_word_Relocation::spec(addr),
 715                  RELOC_DISP32);
 716 }
 717 
 718 
 719 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 720 void emit_d64_reloc(CodeBuffer& cbuf,
 721                     int64_t d64,
 722                     relocInfo::relocType reloc,
 723                     int format)
 724 {
 725   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 726 
 727   *((int64_t*) (cbuf.code_end())) = d64;
 728   cbuf.set_code_end(cbuf.code_end() + 8);
 729 }
 730 
 731 // emit 64 bit value and construct relocation entry from RelocationHolder
 732 void emit_d64_reloc(CodeBuffer& cbuf,
 733                     int64_t d64,
 734                     RelocationHolder const& rspec,
 735                     int format)
 736 {
 737 #ifdef ASSERT
 738   if (rspec.reloc()->type() == relocInfo::oop_type &&
 739       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 740     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 741            "cannot embed scavengable oops in code");
 742   }
 743 #endif
 744   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 745 
 746   *((int64_t*) (cbuf.code_end())) = d64;
 747   cbuf.set_code_end(cbuf.code_end() + 8);
 748 }
 749 
 750 // Access stack slot for load or store
 751 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 752 {
 753   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 754   if (-0x80 <= disp && disp < 0x80) {
 755     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 756     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 757     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 758   } else {
 759     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 760     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 761     emit_d32(cbuf, disp);     // Displacement // R/M byte
 762   }
 763 }
 764 
 765    // rRegI ereg, memory mem) %{    // emit_reg_mem
 766 void encode_RegMem(CodeBuffer &cbuf,
 767                    int reg,
 768                    int base, int index, int scale, int disp, bool disp_is_oop)
 769 {
 770   assert(!disp_is_oop, "cannot have disp");
 771   int regenc = reg & 7;
 772   int baseenc = base & 7;
 773   int indexenc = index & 7;
 774 
 775   // There is no index & no scale, use form without SIB byte
 776   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 777     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 778     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 779       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 780     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 781       // If 8-bit displacement, mode 0x1
 782       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 783       emit_d8(cbuf, disp);
 784     } else {
 785       // If 32-bit displacement
 786       if (base == -1) { // Special flag for absolute address
 787         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 788         if (disp_is_oop) {
 789           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 790         } else {
 791           emit_d32(cbuf, disp);
 792         }
 793       } else {
 794         // Normal base + offset
 795         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 796         if (disp_is_oop) {
 797           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 798         } else {
 799           emit_d32(cbuf, disp);
 800         }
 801       }
 802     }
 803   } else {
 804     // Else, encode with the SIB byte
 805     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 806     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 807       // If no displacement
 808       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 809       emit_rm(cbuf, scale, indexenc, baseenc);
 810     } else {
 811       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 812         // If 8-bit displacement, mode 0x1
 813         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 814         emit_rm(cbuf, scale, indexenc, baseenc);
 815         emit_d8(cbuf, disp);
 816       } else {
 817         // If 32-bit displacement
 818         if (base == 0x04 ) {
 819           emit_rm(cbuf, 0x2, regenc, 0x4);
 820           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 821         } else {
 822           emit_rm(cbuf, 0x2, regenc, 0x4);
 823           emit_rm(cbuf, scale, indexenc, baseenc); // *
 824         }
 825         if (disp_is_oop) {
 826           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 827         } else {
 828           emit_d32(cbuf, disp);
 829         }
 830       }
 831     }
 832   }
 833 }
 834 
 835 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 836 {
 837   if (dstenc != srcenc) {
 838     if (dstenc < 8) {
 839       if (srcenc >= 8) {
 840         emit_opcode(cbuf, Assembler::REX_B);
 841         srcenc -= 8;
 842       }
 843     } else {
 844       if (srcenc < 8) {
 845         emit_opcode(cbuf, Assembler::REX_R);
 846       } else {
 847         emit_opcode(cbuf, Assembler::REX_RB);
 848         srcenc -= 8;
 849       }
 850       dstenc -= 8;
 851     }
 852 
 853     emit_opcode(cbuf, 0x8B);
 854     emit_rm(cbuf, 0x3, dstenc, srcenc);
 855   }
 856 }
 857 
 858 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 859   if( dst_encoding == src_encoding ) {
 860     // reg-reg copy, use an empty encoding
 861   } else {
 862     MacroAssembler _masm(&cbuf);
 863 
 864     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 865   }
 866 }
 867 
 868 
 869 //=============================================================================
 870 #ifndef PRODUCT
 871 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 872 {
 873   Compile* C = ra_->C;
 874 
 875   int framesize = C->frame_slots() << LogBytesPerInt;
 876   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 877   // Remove wordSize for return adr already pushed
 878   // and another for the RBP we are going to save
 879   framesize -= 2*wordSize;
 880   bool need_nop = true;
 881 
 882   // Calls to C2R adapters often do not accept exceptional returns.
 883   // We require that their callers must bang for them.  But be
 884   // careful, because some VM calls (such as call site linkage) can
 885   // use several kilobytes of stack.  But the stack safety zone should
 886   // account for that.  See bugs 4446381, 4468289, 4497237.
 887   if (C->need_stack_bang(framesize)) {
 888     st->print_cr("# stack bang"); st->print("\t");
 889     need_nop = false;
 890   }
 891   st->print_cr("pushq   rbp"); st->print("\t");
 892 
 893   if (VerifyStackAtCalls) {
 894     // Majik cookie to verify stack depth
 895     st->print_cr("pushq   0xffffffffbadb100d"
 896                   "\t# Majik cookie for stack depth check");
 897     st->print("\t");
 898     framesize -= wordSize; // Remove 2 for cookie
 899     need_nop = false;
 900   }
 901 
 902   if (framesize) {
 903     st->print("subq    rsp, #%d\t# Create frame", framesize);
 904     if (framesize < 0x80 && need_nop) {
 905       st->print("\n\tnop\t# nop for patch_verified_entry");
 906     }
 907   }
 908 }
 909 #endif
 910 
 911 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 912 {
 913   Compile* C = ra_->C;
 914 
 915   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 916   // NativeJump::patch_verified_entry will be able to patch out the entry
 917   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 918   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 919   // 6 bytes. So if we don't do the fldcw or the push then we must
 920   // use the 6 byte frame allocation even if we have no frame. :-(
 921   // If method sets FPU control word do it now
 922 
 923   int framesize = C->frame_slots() << LogBytesPerInt;
 924   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 925   // Remove wordSize for return adr already pushed
 926   // and another for the RBP we are going to save
 927   framesize -= 2*wordSize;
 928   bool need_nop = true;
 929 
 930   // Calls to C2R adapters often do not accept exceptional returns.
 931   // We require that their callers must bang for them.  But be
 932   // careful, because some VM calls (such as call site linkage) can
 933   // use several kilobytes of stack.  But the stack safety zone should
 934   // account for that.  See bugs 4446381, 4468289, 4497237.
 935   if (C->need_stack_bang(framesize)) {
 936     MacroAssembler masm(&cbuf);
 937     masm.generate_stack_overflow_check(framesize);
 938     need_nop = false;
 939   }
 940 
 941   // We always push rbp so that on return to interpreter rbp will be
 942   // restored correctly and we can correct the stack.
 943   emit_opcode(cbuf, 0x50 | RBP_enc);
 944 
 945   if (VerifyStackAtCalls) {
 946     // Majik cookie to verify stack depth
 947     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 948     emit_d32(cbuf, 0xbadb100d);
 949     framesize -= wordSize; // Remove 2 for cookie
 950     need_nop = false;
 951   }
 952 
 953   if (framesize) {
 954     emit_opcode(cbuf, Assembler::REX_W);
 955     if (framesize < 0x80) {
 956       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 957       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 958       emit_d8(cbuf, framesize);
 959       if (need_nop) {
 960         emit_opcode(cbuf, 0x90); // nop
 961       }
 962     } else {
 963       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 964       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 965       emit_d32(cbuf, framesize);
 966     }
 967   }
 968 
 969   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 970 
 971 #ifdef ASSERT
 972   if (VerifyStackAtCalls) {
 973     Label L;
 974     MacroAssembler masm(&cbuf);
 975     masm.push(rax);
 976     masm.mov(rax, rsp);
 977     masm.andptr(rax, StackAlignmentInBytes-1);
 978     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 979     masm.pop(rax);
 980     masm.jcc(Assembler::equal, L);
 981     masm.stop("Stack is not properly aligned!");
 982     masm.bind(L);
 983   }
 984 #endif
 985 }
 986 
 987 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 988 {
 989   return MachNode::size(ra_); // too many variables; just compute it
 990                               // the hard way
 991 }
 992 
 993 int MachPrologNode::reloc() const
 994 {
 995   return 0; // a large enough number
 996 }
 997 
 998 //=============================================================================
 999 #ifndef PRODUCT
1000 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1001 {
1002   Compile* C = ra_->C;
1003   int framesize = C->frame_slots() << LogBytesPerInt;
1004   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1005   // Remove word for return adr already pushed
1006   // and RBP
1007   framesize -= 2*wordSize;
1008 
1009   if (framesize) {
1010     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
1011     st->print("\t");
1012   }
1013 
1014   st->print_cr("popq\trbp");
1015   if (do_polling() && C->is_method_compilation()) {
1016     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1017                   "# Safepoint: poll for GC");
1018     st->print("\t");
1019   }
1020 }
1021 #endif
1022 
1023 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1024 {
1025   Compile* C = ra_->C;
1026   int framesize = C->frame_slots() << LogBytesPerInt;
1027   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1028   // Remove word for return adr already pushed
1029   // and RBP
1030   framesize -= 2*wordSize;
1031 
1032   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1033 
1034   if (framesize) {
1035     emit_opcode(cbuf, Assembler::REX_W);
1036     if (framesize < 0x80) {
1037       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1038       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1039       emit_d8(cbuf, framesize);
1040     } else {
1041       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1042       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1043       emit_d32(cbuf, framesize);
1044     }
1045   }
1046 
1047   // popq rbp
1048   emit_opcode(cbuf, 0x58 | RBP_enc);
1049 
1050   if (do_polling() && C->is_method_compilation()) {
1051     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1052     // XXX reg_mem doesn't support RIP-relative addressing yet
1053     cbuf.set_inst_mark();
1054     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1055     emit_opcode(cbuf, 0x85); // testl
1056     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1057     // cbuf.inst_mark() is beginning of instruction
1058     emit_d32_reloc(cbuf, os::get_polling_page());
1059 //                    relocInfo::poll_return_type,
1060   }
1061 }
1062 
1063 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1064 {
1065   Compile* C = ra_->C;
1066   int framesize = C->frame_slots() << LogBytesPerInt;
1067   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1068   // Remove word for return adr already pushed
1069   // and RBP
1070   framesize -= 2*wordSize;
1071 
1072   uint size = 0;
1073 
1074   if (do_polling() && C->is_method_compilation()) {
1075     size += 6;
1076   }
1077 
1078   // count popq rbp
1079   size++;
1080 
1081   if (framesize) {
1082     if (framesize < 0x80) {
1083       size += 4;
1084     } else if (framesize) {
1085       size += 7;
1086     }
1087   }
1088 
1089   return size;
1090 }
1091 
1092 int MachEpilogNode::reloc() const
1093 {
1094   return 2; // a large enough number
1095 }
1096 
1097 const Pipeline* MachEpilogNode::pipeline() const
1098 {
1099   return MachNode::pipeline_class();
1100 }
1101 
1102 int MachEpilogNode::safepoint_offset() const
1103 {
1104   return 0;
1105 }
1106 
1107 //=============================================================================
1108 
1109 enum RC {
1110   rc_bad,
1111   rc_int,
1112   rc_float,
1113   rc_stack
1114 };
1115 
1116 static enum RC rc_class(OptoReg::Name reg)
1117 {
1118   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1119 
1120   if (OptoReg::is_stack(reg)) return rc_stack;
1121 
1122   VMReg r = OptoReg::as_VMReg(reg);
1123 
1124   if (r->is_Register()) return rc_int;
1125 
1126   assert(r->is_XMMRegister(), "must be");
1127   return rc_float;
1128 }
1129 
1130 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1131                                        PhaseRegAlloc* ra_,
1132                                        bool do_size,
1133                                        outputStream* st) const
1134 {
1135 
1136   // Get registers to move
1137   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1138   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1139   OptoReg::Name dst_second = ra_->get_reg_second(this);
1140   OptoReg::Name dst_first = ra_->get_reg_first(this);
1141 
1142   enum RC src_second_rc = rc_class(src_second);
1143   enum RC src_first_rc = rc_class(src_first);
1144   enum RC dst_second_rc = rc_class(dst_second);
1145   enum RC dst_first_rc = rc_class(dst_first);
1146 
1147   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1148          "must move at least 1 register" );
1149 
1150   if (src_first == dst_first && src_second == dst_second) {
1151     // Self copy, no move
1152     return 0;
1153   } else if (src_first_rc == rc_stack) {
1154     // mem ->
1155     if (dst_first_rc == rc_stack) {
1156       // mem -> mem
1157       assert(src_second != dst_first, "overlap");
1158       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1159           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1160         // 64-bit
1161         int src_offset = ra_->reg2offset(src_first);
1162         int dst_offset = ra_->reg2offset(dst_first);
1163         if (cbuf) {
1164           emit_opcode(*cbuf, 0xFF);
1165           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1166 
1167           emit_opcode(*cbuf, 0x8F);
1168           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1169 
1170 #ifndef PRODUCT
1171         } else if (!do_size) {
1172           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1173                      "popq    [rsp + #%d]",
1174                      src_offset,
1175                      dst_offset);
1176 #endif
1177         }
1178         return
1179           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1180           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1181       } else {
1182         // 32-bit
1183         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1184         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1185         // No pushl/popl, so:
1186         int src_offset = ra_->reg2offset(src_first);
1187         int dst_offset = ra_->reg2offset(dst_first);
1188         if (cbuf) {
1189           emit_opcode(*cbuf, Assembler::REX_W);
1190           emit_opcode(*cbuf, 0x89);
1191           emit_opcode(*cbuf, 0x44);
1192           emit_opcode(*cbuf, 0x24);
1193           emit_opcode(*cbuf, 0xF8);
1194 
1195           emit_opcode(*cbuf, 0x8B);
1196           encode_RegMem(*cbuf,
1197                         RAX_enc,
1198                         RSP_enc, 0x4, 0, src_offset,
1199                         false);
1200 
1201           emit_opcode(*cbuf, 0x89);
1202           encode_RegMem(*cbuf,
1203                         RAX_enc,
1204                         RSP_enc, 0x4, 0, dst_offset,
1205                         false);
1206 
1207           emit_opcode(*cbuf, Assembler::REX_W);
1208           emit_opcode(*cbuf, 0x8B);
1209           emit_opcode(*cbuf, 0x44);
1210           emit_opcode(*cbuf, 0x24);
1211           emit_opcode(*cbuf, 0xF8);
1212 
1213 #ifndef PRODUCT
1214         } else if (!do_size) {
1215           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1216                      "movl    rax, [rsp + #%d]\n\t"
1217                      "movl    [rsp + #%d], rax\n\t"
1218                      "movq    rax, [rsp - #8]",
1219                      src_offset,
1220                      dst_offset);
1221 #endif
1222         }
1223         return
1224           5 + // movq
1225           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1226           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1227           5; // movq
1228       }
1229     } else if (dst_first_rc == rc_int) {
1230       // mem -> gpr
1231       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1232           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1233         // 64-bit
1234         int offset = ra_->reg2offset(src_first);
1235         if (cbuf) {
1236           if (Matcher::_regEncode[dst_first] < 8) {
1237             emit_opcode(*cbuf, Assembler::REX_W);
1238           } else {
1239             emit_opcode(*cbuf, Assembler::REX_WR);
1240           }
1241           emit_opcode(*cbuf, 0x8B);
1242           encode_RegMem(*cbuf,
1243                         Matcher::_regEncode[dst_first],
1244                         RSP_enc, 0x4, 0, offset,
1245                         false);
1246 #ifndef PRODUCT
1247         } else if (!do_size) {
1248           st->print("movq    %s, [rsp + #%d]\t# spill",
1249                      Matcher::regName[dst_first],
1250                      offset);
1251 #endif
1252         }
1253         return
1254           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1255       } else {
1256         // 32-bit
1257         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1258         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1259         int offset = ra_->reg2offset(src_first);
1260         if (cbuf) {
1261           if (Matcher::_regEncode[dst_first] >= 8) {
1262             emit_opcode(*cbuf, Assembler::REX_R);
1263           }
1264           emit_opcode(*cbuf, 0x8B);
1265           encode_RegMem(*cbuf,
1266                         Matcher::_regEncode[dst_first],
1267                         RSP_enc, 0x4, 0, offset,
1268                         false);
1269 #ifndef PRODUCT
1270         } else if (!do_size) {
1271           st->print("movl    %s, [rsp + #%d]\t# spill",
1272                      Matcher::regName[dst_first],
1273                      offset);
1274 #endif
1275         }
1276         return
1277           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1278           ((Matcher::_regEncode[dst_first] < 8)
1279            ? 3
1280            : 4); // REX
1281       }
1282     } else if (dst_first_rc == rc_float) {
1283       // mem-> xmm
1284       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1285           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1286         // 64-bit
1287         int offset = ra_->reg2offset(src_first);
1288         if (cbuf) {
1289           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1290           if (Matcher::_regEncode[dst_first] >= 8) {
1291             emit_opcode(*cbuf, Assembler::REX_R);
1292           }
1293           emit_opcode(*cbuf, 0x0F);
1294           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1295           encode_RegMem(*cbuf,
1296                         Matcher::_regEncode[dst_first],
1297                         RSP_enc, 0x4, 0, offset,
1298                         false);
1299 #ifndef PRODUCT
1300         } else if (!do_size) {
1301           st->print("%s  %s, [rsp + #%d]\t# spill",
1302                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1303                      Matcher::regName[dst_first],
1304                      offset);
1305 #endif
1306         }
1307         return
1308           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1309           ((Matcher::_regEncode[dst_first] < 8)
1310            ? 5
1311            : 6); // REX
1312       } else {
1313         // 32-bit
1314         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1315         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1316         int offset = ra_->reg2offset(src_first);
1317         if (cbuf) {
1318           emit_opcode(*cbuf, 0xF3);
1319           if (Matcher::_regEncode[dst_first] >= 8) {
1320             emit_opcode(*cbuf, Assembler::REX_R);
1321           }
1322           emit_opcode(*cbuf, 0x0F);
1323           emit_opcode(*cbuf, 0x10);
1324           encode_RegMem(*cbuf,
1325                         Matcher::_regEncode[dst_first],
1326                         RSP_enc, 0x4, 0, offset,
1327                         false);
1328 #ifndef PRODUCT
1329         } else if (!do_size) {
1330           st->print("movss   %s, [rsp + #%d]\t# spill",
1331                      Matcher::regName[dst_first],
1332                      offset);
1333 #endif
1334         }
1335         return
1336           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1337           ((Matcher::_regEncode[dst_first] < 8)
1338            ? 5
1339            : 6); // REX
1340       }
1341     }
1342   } else if (src_first_rc == rc_int) {
1343     // gpr ->
1344     if (dst_first_rc == rc_stack) {
1345       // gpr -> mem
1346       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1347           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1348         // 64-bit
1349         int offset = ra_->reg2offset(dst_first);
1350         if (cbuf) {
1351           if (Matcher::_regEncode[src_first] < 8) {
1352             emit_opcode(*cbuf, Assembler::REX_W);
1353           } else {
1354             emit_opcode(*cbuf, Assembler::REX_WR);
1355           }
1356           emit_opcode(*cbuf, 0x89);
1357           encode_RegMem(*cbuf,
1358                         Matcher::_regEncode[src_first],
1359                         RSP_enc, 0x4, 0, offset,
1360                         false);
1361 #ifndef PRODUCT
1362         } else if (!do_size) {
1363           st->print("movq    [rsp + #%d], %s\t# spill",
1364                      offset,
1365                      Matcher::regName[src_first]);
1366 #endif
1367         }
1368         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1369       } else {
1370         // 32-bit
1371         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1372         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1373         int offset = ra_->reg2offset(dst_first);
1374         if (cbuf) {
1375           if (Matcher::_regEncode[src_first] >= 8) {
1376             emit_opcode(*cbuf, Assembler::REX_R);
1377           }
1378           emit_opcode(*cbuf, 0x89);
1379           encode_RegMem(*cbuf,
1380                         Matcher::_regEncode[src_first],
1381                         RSP_enc, 0x4, 0, offset,
1382                         false);
1383 #ifndef PRODUCT
1384         } else if (!do_size) {
1385           st->print("movl    [rsp + #%d], %s\t# spill",
1386                      offset,
1387                      Matcher::regName[src_first]);
1388 #endif
1389         }
1390         return
1391           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1392           ((Matcher::_regEncode[src_first] < 8)
1393            ? 3
1394            : 4); // REX
1395       }
1396     } else if (dst_first_rc == rc_int) {
1397       // gpr -> gpr
1398       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1399           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1400         // 64-bit
1401         if (cbuf) {
1402           if (Matcher::_regEncode[dst_first] < 8) {
1403             if (Matcher::_regEncode[src_first] < 8) {
1404               emit_opcode(*cbuf, Assembler::REX_W);
1405             } else {
1406               emit_opcode(*cbuf, Assembler::REX_WB);
1407             }
1408           } else {
1409             if (Matcher::_regEncode[src_first] < 8) {
1410               emit_opcode(*cbuf, Assembler::REX_WR);
1411             } else {
1412               emit_opcode(*cbuf, Assembler::REX_WRB);
1413             }
1414           }
1415           emit_opcode(*cbuf, 0x8B);
1416           emit_rm(*cbuf, 0x3,
1417                   Matcher::_regEncode[dst_first] & 7,
1418                   Matcher::_regEncode[src_first] & 7);
1419 #ifndef PRODUCT
1420         } else if (!do_size) {
1421           st->print("movq    %s, %s\t# spill",
1422                      Matcher::regName[dst_first],
1423                      Matcher::regName[src_first]);
1424 #endif
1425         }
1426         return 3; // REX
1427       } else {
1428         // 32-bit
1429         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1430         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1431         if (cbuf) {
1432           if (Matcher::_regEncode[dst_first] < 8) {
1433             if (Matcher::_regEncode[src_first] >= 8) {
1434               emit_opcode(*cbuf, Assembler::REX_B);
1435             }
1436           } else {
1437             if (Matcher::_regEncode[src_first] < 8) {
1438               emit_opcode(*cbuf, Assembler::REX_R);
1439             } else {
1440               emit_opcode(*cbuf, Assembler::REX_RB);
1441             }
1442           }
1443           emit_opcode(*cbuf, 0x8B);
1444           emit_rm(*cbuf, 0x3,
1445                   Matcher::_regEncode[dst_first] & 7,
1446                   Matcher::_regEncode[src_first] & 7);
1447 #ifndef PRODUCT
1448         } else if (!do_size) {
1449           st->print("movl    %s, %s\t# spill",
1450                      Matcher::regName[dst_first],
1451                      Matcher::regName[src_first]);
1452 #endif
1453         }
1454         return
1455           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1456           ? 2
1457           : 3; // REX
1458       }
1459     } else if (dst_first_rc == rc_float) {
1460       // gpr -> xmm
1461       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1462           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1463         // 64-bit
1464         if (cbuf) {
1465           emit_opcode(*cbuf, 0x66);
1466           if (Matcher::_regEncode[dst_first] < 8) {
1467             if (Matcher::_regEncode[src_first] < 8) {
1468               emit_opcode(*cbuf, Assembler::REX_W);
1469             } else {
1470               emit_opcode(*cbuf, Assembler::REX_WB);
1471             }
1472           } else {
1473             if (Matcher::_regEncode[src_first] < 8) {
1474               emit_opcode(*cbuf, Assembler::REX_WR);
1475             } else {
1476               emit_opcode(*cbuf, Assembler::REX_WRB);
1477             }
1478           }
1479           emit_opcode(*cbuf, 0x0F);
1480           emit_opcode(*cbuf, 0x6E);
1481           emit_rm(*cbuf, 0x3,
1482                   Matcher::_regEncode[dst_first] & 7,
1483                   Matcher::_regEncode[src_first] & 7);
1484 #ifndef PRODUCT
1485         } else if (!do_size) {
1486           st->print("movdq   %s, %s\t# spill",
1487                      Matcher::regName[dst_first],
1488                      Matcher::regName[src_first]);
1489 #endif
1490         }
1491         return 5; // REX
1492       } else {
1493         // 32-bit
1494         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1495         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1496         if (cbuf) {
1497           emit_opcode(*cbuf, 0x66);
1498           if (Matcher::_regEncode[dst_first] < 8) {
1499             if (Matcher::_regEncode[src_first] >= 8) {
1500               emit_opcode(*cbuf, Assembler::REX_B);
1501             }
1502           } else {
1503             if (Matcher::_regEncode[src_first] < 8) {
1504               emit_opcode(*cbuf, Assembler::REX_R);
1505             } else {
1506               emit_opcode(*cbuf, Assembler::REX_RB);
1507             }
1508           }
1509           emit_opcode(*cbuf, 0x0F);
1510           emit_opcode(*cbuf, 0x6E);
1511           emit_rm(*cbuf, 0x3,
1512                   Matcher::_regEncode[dst_first] & 7,
1513                   Matcher::_regEncode[src_first] & 7);
1514 #ifndef PRODUCT
1515         } else if (!do_size) {
1516           st->print("movdl   %s, %s\t# spill",
1517                      Matcher::regName[dst_first],
1518                      Matcher::regName[src_first]);
1519 #endif
1520         }
1521         return
1522           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1523           ? 4
1524           : 5; // REX
1525       }
1526     }
1527   } else if (src_first_rc == rc_float) {
1528     // xmm ->
1529     if (dst_first_rc == rc_stack) {
1530       // xmm -> mem
1531       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1532           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1533         // 64-bit
1534         int offset = ra_->reg2offset(dst_first);
1535         if (cbuf) {
1536           emit_opcode(*cbuf, 0xF2);
1537           if (Matcher::_regEncode[src_first] >= 8) {
1538               emit_opcode(*cbuf, Assembler::REX_R);
1539           }
1540           emit_opcode(*cbuf, 0x0F);
1541           emit_opcode(*cbuf, 0x11);
1542           encode_RegMem(*cbuf,
1543                         Matcher::_regEncode[src_first],
1544                         RSP_enc, 0x4, 0, offset,
1545                         false);
1546 #ifndef PRODUCT
1547         } else if (!do_size) {
1548           st->print("movsd   [rsp + #%d], %s\t# spill",
1549                      offset,
1550                      Matcher::regName[src_first]);
1551 #endif
1552         }
1553         return
1554           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1555           ((Matcher::_regEncode[src_first] < 8)
1556            ? 5
1557            : 6); // REX
1558       } else {
1559         // 32-bit
1560         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1561         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1562         int offset = ra_->reg2offset(dst_first);
1563         if (cbuf) {
1564           emit_opcode(*cbuf, 0xF3);
1565           if (Matcher::_regEncode[src_first] >= 8) {
1566               emit_opcode(*cbuf, Assembler::REX_R);
1567           }
1568           emit_opcode(*cbuf, 0x0F);
1569           emit_opcode(*cbuf, 0x11);
1570           encode_RegMem(*cbuf,
1571                         Matcher::_regEncode[src_first],
1572                         RSP_enc, 0x4, 0, offset,
1573                         false);
1574 #ifndef PRODUCT
1575         } else if (!do_size) {
1576           st->print("movss   [rsp + #%d], %s\t# spill",
1577                      offset,
1578                      Matcher::regName[src_first]);
1579 #endif
1580         }
1581         return
1582           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1583           ((Matcher::_regEncode[src_first] < 8)
1584            ? 5
1585            : 6); // REX
1586       }
1587     } else if (dst_first_rc == rc_int) {
1588       // xmm -> gpr
1589       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1590           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1591         // 64-bit
1592         if (cbuf) {
1593           emit_opcode(*cbuf, 0x66);
1594           if (Matcher::_regEncode[dst_first] < 8) {
1595             if (Matcher::_regEncode[src_first] < 8) {
1596               emit_opcode(*cbuf, Assembler::REX_W);
1597             } else {
1598               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1599             }
1600           } else {
1601             if (Matcher::_regEncode[src_first] < 8) {
1602               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1603             } else {
1604               emit_opcode(*cbuf, Assembler::REX_WRB);
1605             }
1606           }
1607           emit_opcode(*cbuf, 0x0F);
1608           emit_opcode(*cbuf, 0x7E);
1609           emit_rm(*cbuf, 0x3,
1610                   Matcher::_regEncode[dst_first] & 7,
1611                   Matcher::_regEncode[src_first] & 7);
1612 #ifndef PRODUCT
1613         } else if (!do_size) {
1614           st->print("movdq   %s, %s\t# spill",
1615                      Matcher::regName[dst_first],
1616                      Matcher::regName[src_first]);
1617 #endif
1618         }
1619         return 5; // REX
1620       } else {
1621         // 32-bit
1622         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1623         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1624         if (cbuf) {
1625           emit_opcode(*cbuf, 0x66);
1626           if (Matcher::_regEncode[dst_first] < 8) {
1627             if (Matcher::_regEncode[src_first] >= 8) {
1628               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1629             }
1630           } else {
1631             if (Matcher::_regEncode[src_first] < 8) {
1632               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1633             } else {
1634               emit_opcode(*cbuf, Assembler::REX_RB);
1635             }
1636           }
1637           emit_opcode(*cbuf, 0x0F);
1638           emit_opcode(*cbuf, 0x7E);
1639           emit_rm(*cbuf, 0x3,
1640                   Matcher::_regEncode[dst_first] & 7,
1641                   Matcher::_regEncode[src_first] & 7);
1642 #ifndef PRODUCT
1643         } else if (!do_size) {
1644           st->print("movdl   %s, %s\t# spill",
1645                      Matcher::regName[dst_first],
1646                      Matcher::regName[src_first]);
1647 #endif
1648         }
1649         return
1650           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1651           ? 4
1652           : 5; // REX
1653       }
1654     } else if (dst_first_rc == rc_float) {
1655       // xmm -> xmm
1656       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1657           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1658         // 64-bit
1659         if (cbuf) {
1660           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1661           if (Matcher::_regEncode[dst_first] < 8) {
1662             if (Matcher::_regEncode[src_first] >= 8) {
1663               emit_opcode(*cbuf, Assembler::REX_B);
1664             }
1665           } else {
1666             if (Matcher::_regEncode[src_first] < 8) {
1667               emit_opcode(*cbuf, Assembler::REX_R);
1668             } else {
1669               emit_opcode(*cbuf, Assembler::REX_RB);
1670             }
1671           }
1672           emit_opcode(*cbuf, 0x0F);
1673           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1674           emit_rm(*cbuf, 0x3,
1675                   Matcher::_regEncode[dst_first] & 7,
1676                   Matcher::_regEncode[src_first] & 7);
1677 #ifndef PRODUCT
1678         } else if (!do_size) {
1679           st->print("%s  %s, %s\t# spill",
1680                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1681                      Matcher::regName[dst_first],
1682                      Matcher::regName[src_first]);
1683 #endif
1684         }
1685         return
1686           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1687           ? 4
1688           : 5; // REX
1689       } else {
1690         // 32-bit
1691         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1692         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1693         if (cbuf) {
1694           if (!UseXmmRegToRegMoveAll)
1695             emit_opcode(*cbuf, 0xF3);
1696           if (Matcher::_regEncode[dst_first] < 8) {
1697             if (Matcher::_regEncode[src_first] >= 8) {
1698               emit_opcode(*cbuf, Assembler::REX_B);
1699             }
1700           } else {
1701             if (Matcher::_regEncode[src_first] < 8) {
1702               emit_opcode(*cbuf, Assembler::REX_R);
1703             } else {
1704               emit_opcode(*cbuf, Assembler::REX_RB);
1705             }
1706           }
1707           emit_opcode(*cbuf, 0x0F);
1708           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1709           emit_rm(*cbuf, 0x3,
1710                   Matcher::_regEncode[dst_first] & 7,
1711                   Matcher::_regEncode[src_first] & 7);
1712 #ifndef PRODUCT
1713         } else if (!do_size) {
1714           st->print("%s  %s, %s\t# spill",
1715                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1716                      Matcher::regName[dst_first],
1717                      Matcher::regName[src_first]);
1718 #endif
1719         }
1720         return
1721           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1722           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1723           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1724       }
1725     }
1726   }
1727 
1728   assert(0," foo ");
1729   Unimplemented();
1730 
1731   return 0;
1732 }
1733 
1734 #ifndef PRODUCT
1735 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1736 {
1737   implementation(NULL, ra_, false, st);
1738 }
1739 #endif
1740 
1741 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1742 {
1743   implementation(&cbuf, ra_, false, NULL);
1744 }
1745 
1746 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1747 {
1748   return implementation(NULL, ra_, true, NULL);
1749 }
1750 
1751 //=============================================================================
1752 #ifndef PRODUCT
1753 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1754 {
1755   st->print("nop \t# %d bytes pad for loops and calls", _count);
1756 }
1757 #endif
1758 
1759 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1760 {
1761   MacroAssembler _masm(&cbuf);
1762   __ nop(_count);
1763 }
1764 
1765 uint MachNopNode::size(PhaseRegAlloc*) const
1766 {
1767   return _count;
1768 }
1769 
1770 
1771 //=============================================================================
1772 #ifndef PRODUCT
1773 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1774 {
1775   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1776   int reg = ra_->get_reg_first(this);
1777   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1778             Matcher::regName[reg], offset);
1779 }
1780 #endif
1781 
1782 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1783 {
1784   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1785   int reg = ra_->get_encode(this);
1786   if (offset >= 0x80) {
1787     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1788     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1789     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1790     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1791     emit_d32(cbuf, offset);
1792   } else {
1793     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1794     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1795     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1796     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1797     emit_d8(cbuf, offset);
1798   }
1799 }
1800 
1801 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1802 {
1803   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1804   return (offset < 0x80) ? 5 : 8; // REX
1805 }
1806 
1807 //=============================================================================
1808 
1809 // emit call stub, compiled java to interpreter
1810 void emit_java_to_interp(CodeBuffer& cbuf)
1811 {
1812   // Stub is fixed up when the corresponding call is converted from
1813   // calling compiled code to calling interpreted code.
1814   // movq rbx, 0
1815   // jmp -5 # to self
1816 
1817   address mark = cbuf.inst_mark();  // get mark within main instrs section
1818 
1819   // Note that the code buffer's inst_mark is always relative to insts.
1820   // That's why we must use the macroassembler to generate a stub.
1821   MacroAssembler _masm(&cbuf);
1822 
1823   address base =
1824   __ start_a_stub(Compile::MAX_stubs_size);
1825   if (base == NULL)  return;  // CodeBuffer::expand failed
1826   // static stub relocation stores the instruction address of the call
1827   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1828   // static stub relocation also tags the methodOop in the code-stream.
1829   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1830   // This is recognized as unresolved by relocs/nativeinst/ic code
1831   __ jump(RuntimeAddress(__ pc()));
1832 
1833   // Update current stubs pointer and restore code_end.
1834   __ end_a_stub();
1835 }
1836 
1837 // size of call stub, compiled java to interpretor
1838 uint size_java_to_interp()
1839 {
1840   return 15;  // movq (1+1+8); jmp (1+4)
1841 }
1842 
1843 // relocation entries for call stub, compiled java to interpretor
1844 uint reloc_java_to_interp()
1845 {
1846   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1847 }
1848 
1849 //=============================================================================
1850 #ifndef PRODUCT
1851 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1852 {
1853   if (UseCompressedOops) {
1854     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1855     if (Universe::narrow_oop_shift() != 0) {
1856       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1857     }
1858     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1859   } else {
1860     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1861                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1862   }
1863   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1864   st->print_cr("\tnop");
1865   if (!OptoBreakpoint) {
1866     st->print_cr("\tnop");
1867   }
1868 }
1869 #endif
1870 
1871 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1872 {
1873   MacroAssembler masm(&cbuf);
1874 #ifdef ASSERT
1875   uint code_size = cbuf.code_size();
1876 #endif
1877   if (UseCompressedOops) {
1878     masm.load_klass(rscratch1, j_rarg0);
1879     masm.cmpptr(rax, rscratch1);
1880   } else {
1881     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1882   }
1883 
1884   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1885 
1886   /* WARNING these NOPs are critical so that verified entry point is properly
1887      aligned for patching by NativeJump::patch_verified_entry() */
1888   int nops_cnt = 1;
1889   if (!OptoBreakpoint) {
1890     // Leave space for int3
1891      nops_cnt += 1;
1892   }
1893   if (UseCompressedOops) {
1894     // ??? divisible by 4 is aligned?
1895     nops_cnt += 1;
1896   }
1897   masm.nop(nops_cnt);
1898 
1899   assert(cbuf.code_size() - code_size == size(ra_),
1900          "checking code size of inline cache node");
1901 }
1902 
1903 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1904 {
1905   if (UseCompressedOops) {
1906     if (Universe::narrow_oop_shift() == 0) {
1907       return OptoBreakpoint ? 15 : 16;
1908     } else {
1909       return OptoBreakpoint ? 19 : 20;
1910     }
1911   } else {
1912     return OptoBreakpoint ? 11 : 12;
1913   }
1914 }
1915 
1916 
1917 //=============================================================================
1918 uint size_exception_handler()
1919 {
1920   // NativeCall instruction size is the same as NativeJump.
1921   // Note that this value is also credited (in output.cpp) to
1922   // the size of the code section.
1923   return NativeJump::instruction_size;
1924 }
1925 
1926 // Emit exception handler code.
1927 int emit_exception_handler(CodeBuffer& cbuf)
1928 {
1929 
1930   // Note that the code buffer's inst_mark is always relative to insts.
1931   // That's why we must use the macroassembler to generate a handler.
1932   MacroAssembler _masm(&cbuf);
1933   address base =
1934   __ start_a_stub(size_exception_handler());
1935   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1936   int offset = __ offset();
1937   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1938   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1939   __ end_a_stub();
1940   return offset;
1941 }
1942 
1943 uint size_deopt_handler()
1944 {
1945   // three 5 byte instructions
1946   return 15;
1947 }
1948 
1949 // Emit deopt handler code.
1950 int emit_deopt_handler(CodeBuffer& cbuf)
1951 {
1952 
1953   // Note that the code buffer's inst_mark is always relative to insts.
1954   // That's why we must use the macroassembler to generate a handler.
1955   MacroAssembler _masm(&cbuf);
1956   address base =
1957   __ start_a_stub(size_deopt_handler());
1958   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1959   int offset = __ offset();
1960   address the_pc = (address) __ pc();
1961   Label next;
1962   // push a "the_pc" on the stack without destroying any registers
1963   // as they all may be live.
1964 
1965   // push address of "next"
1966   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1967   __ bind(next);
1968   // adjust it so it matches "the_pc"
1969   __ subptr(Address(rsp, 0), __ offset() - offset);
1970   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1971   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1972   __ end_a_stub();
1973   return offset;
1974 }
1975 
1976 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1977   int mark = cbuf.insts()->mark_off();
1978   MacroAssembler _masm(&cbuf);
1979   address double_address = __ double_constant(x);
1980   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1981   emit_d32_reloc(cbuf,
1982                  (int) (double_address - cbuf.code_end() - 4),
1983                  internal_word_Relocation::spec(double_address),
1984                  RELOC_DISP32);
1985 }
1986 
1987 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1988   int mark = cbuf.insts()->mark_off();
1989   MacroAssembler _masm(&cbuf);
1990   address float_address = __ float_constant(x);
1991   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1992   emit_d32_reloc(cbuf,
1993                  (int) (float_address - cbuf.code_end() - 4),
1994                  internal_word_Relocation::spec(float_address),
1995                  RELOC_DISP32);
1996 }
1997 
1998 
1999 const bool Matcher::match_rule_supported(int opcode) {
2000   if (!has_match_rule(opcode))
2001     return false;
2002 
2003   return true;  // Per default match rules are supported.
2004 }
2005 
2006 int Matcher::regnum_to_fpu_offset(int regnum)
2007 {
2008   return regnum - 32; // The FP registers are in the second chunk
2009 }
2010 
2011 // This is UltraSparc specific, true just means we have fast l2f conversion
2012 const bool Matcher::convL2FSupported(void) {
2013   return true;
2014 }
2015 
2016 // Vector width in bytes
2017 const uint Matcher::vector_width_in_bytes(void) {
2018   return 8;
2019 }
2020 
2021 // Vector ideal reg
2022 const uint Matcher::vector_ideal_reg(void) {
2023   return Op_RegD;
2024 }
2025 
2026 // Is this branch offset short enough that a short branch can be used?
2027 //
2028 // NOTE: If the platform does not provide any short branch variants, then
2029 //       this method should return false for offset 0.
2030 bool Matcher::is_short_branch_offset(int rule, int offset) {
2031   // the short version of jmpConUCF2 contains multiple branches,
2032   // making the reach slightly less
2033   if (rule == jmpConUCF2_rule)
2034     return (-126 <= offset && offset <= 125);
2035   return (-128 <= offset && offset <= 127);
2036 }
2037 
2038 const bool Matcher::isSimpleConstant64(jlong value) {
2039   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2040   //return value == (int) value;  // Cf. storeImmL and immL32.
2041 
2042   // Probably always true, even if a temp register is required.
2043   return true;
2044 }
2045 
2046 // The ecx parameter to rep stosq for the ClearArray node is in words.
2047 const bool Matcher::init_array_count_is_in_bytes = false;
2048 
2049 // Threshold size for cleararray.
2050 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2051 
2052 // Should the Matcher clone shifts on addressing modes, expecting them
2053 // to be subsumed into complex addressing expressions or compute them
2054 // into registers?  True for Intel but false for most RISCs
2055 const bool Matcher::clone_shift_expressions = true;
2056 
2057 // Is it better to copy float constants, or load them directly from
2058 // memory?  Intel can load a float constant from a direct address,
2059 // requiring no extra registers.  Most RISCs will have to materialize
2060 // an address into a register first, so they would do better to copy
2061 // the constant from stack.
2062 const bool Matcher::rematerialize_float_constants = true; // XXX
2063 
2064 // If CPU can load and store mis-aligned doubles directly then no
2065 // fixup is needed.  Else we split the double into 2 integer pieces
2066 // and move it piece-by-piece.  Only happens when passing doubles into
2067 // C code as the Java calling convention forces doubles to be aligned.
2068 const bool Matcher::misaligned_doubles_ok = true;
2069 
2070 // No-op on amd64
2071 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2072 
2073 // Advertise here if the CPU requires explicit rounding operations to
2074 // implement the UseStrictFP mode.
2075 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2076 
2077 // Are floats conerted to double when stored to stack during deoptimization?
2078 // On x64 it is stored without convertion so we can use normal access.
2079 bool Matcher::float_in_double() { return false; }
2080 
2081 // Do ints take an entire long register or just half?
2082 const bool Matcher::int_in_long = true;
2083 
2084 // Return whether or not this register is ever used as an argument.
2085 // This function is used on startup to build the trampoline stubs in
2086 // generateOptoStub.  Registers not mentioned will be killed by the VM
2087 // call in the trampoline, and arguments in those registers not be
2088 // available to the callee.
2089 bool Matcher::can_be_java_arg(int reg)
2090 {
2091   return
2092     reg ==  RDI_num || reg ==  RDI_H_num ||
2093     reg ==  RSI_num || reg ==  RSI_H_num ||
2094     reg ==  RDX_num || reg ==  RDX_H_num ||
2095     reg ==  RCX_num || reg ==  RCX_H_num ||
2096     reg ==   R8_num || reg ==   R8_H_num ||
2097     reg ==   R9_num || reg ==   R9_H_num ||
2098     reg ==  R12_num || reg ==  R12_H_num ||
2099     reg == XMM0_num || reg == XMM0_H_num ||
2100     reg == XMM1_num || reg == XMM1_H_num ||
2101     reg == XMM2_num || reg == XMM2_H_num ||
2102     reg == XMM3_num || reg == XMM3_H_num ||
2103     reg == XMM4_num || reg == XMM4_H_num ||
2104     reg == XMM5_num || reg == XMM5_H_num ||
2105     reg == XMM6_num || reg == XMM6_H_num ||
2106     reg == XMM7_num || reg == XMM7_H_num;
2107 }
2108 
2109 bool Matcher::is_spillable_arg(int reg)
2110 {
2111   return can_be_java_arg(reg);
2112 }
2113 
2114 // Register for DIVI projection of divmodI
2115 RegMask Matcher::divI_proj_mask() {
2116   return INT_RAX_REG_mask;
2117 }
2118 
2119 // Register for MODI projection of divmodI
2120 RegMask Matcher::modI_proj_mask() {
2121   return INT_RDX_REG_mask;
2122 }
2123 
2124 // Register for DIVL projection of divmodL
2125 RegMask Matcher::divL_proj_mask() {
2126   return LONG_RAX_REG_mask;
2127 }
2128 
2129 // Register for MODL projection of divmodL
2130 RegMask Matcher::modL_proj_mask() {
2131   return LONG_RDX_REG_mask;
2132 }
2133 
2134 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2135   return PTR_RBP_REG_mask;
2136 }
2137 
2138 static Address build_address(int b, int i, int s, int d) {
2139   Register index = as_Register(i);
2140   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2141   if (index == rsp) {
2142     index = noreg;
2143     scale = Address::no_scale;
2144   }
2145   Address addr(as_Register(b), index, scale, d);
2146   return addr;
2147 }
2148 
2149 %}
2150 
2151 //----------ENCODING BLOCK-----------------------------------------------------
2152 // This block specifies the encoding classes used by the compiler to
2153 // output byte streams.  Encoding classes are parameterized macros
2154 // used by Machine Instruction Nodes in order to generate the bit
2155 // encoding of the instruction.  Operands specify their base encoding
2156 // interface with the interface keyword.  There are currently
2157 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2158 // COND_INTER.  REG_INTER causes an operand to generate a function
2159 // which returns its register number when queried.  CONST_INTER causes
2160 // an operand to generate a function which returns the value of the
2161 // constant when queried.  MEMORY_INTER causes an operand to generate
2162 // four functions which return the Base Register, the Index Register,
2163 // the Scale Value, and the Offset Value of the operand when queried.
2164 // COND_INTER causes an operand to generate six functions which return
2165 // the encoding code (ie - encoding bits for the instruction)
2166 // associated with each basic boolean condition for a conditional
2167 // instruction.
2168 //
2169 // Instructions specify two basic values for encoding.  Again, a
2170 // function is available to check if the constant displacement is an
2171 // oop. They use the ins_encode keyword to specify their encoding
2172 // classes (which must be a sequence of enc_class names, and their
2173 // parameters, specified in the encoding block), and they use the
2174 // opcode keyword to specify, in order, their primary, secondary, and
2175 // tertiary opcode.  Only the opcode sections which a particular
2176 // instruction needs for encoding need to be specified.
2177 encode %{
2178   // Build emit functions for each basic byte or larger field in the
2179   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2180   // from C++ code in the enc_class source block.  Emit functions will
2181   // live in the main source block for now.  In future, we can
2182   // generalize this by adding a syntax that specifies the sizes of
2183   // fields in an order, so that the adlc can build the emit functions
2184   // automagically
2185 
2186   // Emit primary opcode
2187   enc_class OpcP
2188   %{
2189     emit_opcode(cbuf, $primary);
2190   %}
2191 
2192   // Emit secondary opcode
2193   enc_class OpcS
2194   %{
2195     emit_opcode(cbuf, $secondary);
2196   %}
2197 
2198   // Emit tertiary opcode
2199   enc_class OpcT
2200   %{
2201     emit_opcode(cbuf, $tertiary);
2202   %}
2203 
2204   // Emit opcode directly
2205   enc_class Opcode(immI d8)
2206   %{
2207     emit_opcode(cbuf, $d8$$constant);
2208   %}
2209 
2210   // Emit size prefix
2211   enc_class SizePrefix
2212   %{
2213     emit_opcode(cbuf, 0x66);
2214   %}
2215 
2216   enc_class reg(rRegI reg)
2217   %{
2218     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2219   %}
2220 
2221   enc_class reg_reg(rRegI dst, rRegI src)
2222   %{
2223     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2224   %}
2225 
2226   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2227   %{
2228     emit_opcode(cbuf, $opcode$$constant);
2229     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2230   %}
2231 
2232   enc_class cmpfp_fixup()
2233   %{
2234     // jnp,s exit
2235     emit_opcode(cbuf, 0x7B);
2236     emit_d8(cbuf, 0x0A);
2237 
2238     // pushfq
2239     emit_opcode(cbuf, 0x9C);
2240 
2241     // andq $0xffffff2b, (%rsp)
2242     emit_opcode(cbuf, Assembler::REX_W);
2243     emit_opcode(cbuf, 0x81);
2244     emit_opcode(cbuf, 0x24);
2245     emit_opcode(cbuf, 0x24);
2246     emit_d32(cbuf, 0xffffff2b);
2247 
2248     // popfq
2249     emit_opcode(cbuf, 0x9D);
2250 
2251     // nop (target for branch to avoid branch to branch)
2252     emit_opcode(cbuf, 0x90);
2253   %}
2254 
2255   enc_class cmpfp3(rRegI dst)
2256   %{
2257     int dstenc = $dst$$reg;
2258 
2259     // movl $dst, -1
2260     if (dstenc >= 8) {
2261       emit_opcode(cbuf, Assembler::REX_B);
2262     }
2263     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2264     emit_d32(cbuf, -1);
2265 
2266     // jp,s done
2267     emit_opcode(cbuf, 0x7A);
2268     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2269 
2270     // jb,s done
2271     emit_opcode(cbuf, 0x72);
2272     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2273 
2274     // setne $dst
2275     if (dstenc >= 4) {
2276       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2277     }
2278     emit_opcode(cbuf, 0x0F);
2279     emit_opcode(cbuf, 0x95);
2280     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2281 
2282     // movzbl $dst, $dst
2283     if (dstenc >= 4) {
2284       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2285     }
2286     emit_opcode(cbuf, 0x0F);
2287     emit_opcode(cbuf, 0xB6);
2288     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2289   %}
2290 
2291   enc_class cdql_enc(no_rax_rdx_RegI div)
2292   %{
2293     // Full implementation of Java idiv and irem; checks for
2294     // special case as described in JVM spec., p.243 & p.271.
2295     //
2296     //         normal case                           special case
2297     //
2298     // input : rax: dividend                         min_int
2299     //         reg: divisor                          -1
2300     //
2301     // output: rax: quotient  (= rax idiv reg)       min_int
2302     //         rdx: remainder (= rax irem reg)       0
2303     //
2304     //  Code sequnce:
2305     //
2306     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2307     //    5:   75 07/08                jne    e <normal>
2308     //    7:   33 d2                   xor    %edx,%edx
2309     //  [div >= 8 -> offset + 1]
2310     //  [REX_B]
2311     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2312     //    c:   74 03/04                je     11 <done>
2313     // 000000000000000e <normal>:
2314     //    e:   99                      cltd
2315     //  [div >= 8 -> offset + 1]
2316     //  [REX_B]
2317     //    f:   f7 f9                   idiv   $div
2318     // 0000000000000011 <done>:
2319 
2320     // cmp    $0x80000000,%eax
2321     emit_opcode(cbuf, 0x3d);
2322     emit_d8(cbuf, 0x00);
2323     emit_d8(cbuf, 0x00);
2324     emit_d8(cbuf, 0x00);
2325     emit_d8(cbuf, 0x80);
2326 
2327     // jne    e <normal>
2328     emit_opcode(cbuf, 0x75);
2329     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2330 
2331     // xor    %edx,%edx
2332     emit_opcode(cbuf, 0x33);
2333     emit_d8(cbuf, 0xD2);
2334 
2335     // cmp    $0xffffffffffffffff,%ecx
2336     if ($div$$reg >= 8) {
2337       emit_opcode(cbuf, Assembler::REX_B);
2338     }
2339     emit_opcode(cbuf, 0x83);
2340     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2341     emit_d8(cbuf, 0xFF);
2342 
2343     // je     11 <done>
2344     emit_opcode(cbuf, 0x74);
2345     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2346 
2347     // <normal>
2348     // cltd
2349     emit_opcode(cbuf, 0x99);
2350 
2351     // idivl (note: must be emitted by the user of this rule)
2352     // <done>
2353   %}
2354 
2355   enc_class cdqq_enc(no_rax_rdx_RegL div)
2356   %{
2357     // Full implementation of Java ldiv and lrem; checks for
2358     // special case as described in JVM spec., p.243 & p.271.
2359     //
2360     //         normal case                           special case
2361     //
2362     // input : rax: dividend                         min_long
2363     //         reg: divisor                          -1
2364     //
2365     // output: rax: quotient  (= rax idiv reg)       min_long
2366     //         rdx: remainder (= rax irem reg)       0
2367     //
2368     //  Code sequnce:
2369     //
2370     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2371     //    7:   00 00 80
2372     //    a:   48 39 d0                cmp    %rdx,%rax
2373     //    d:   75 08                   jne    17 <normal>
2374     //    f:   33 d2                   xor    %edx,%edx
2375     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2376     //   15:   74 05                   je     1c <done>
2377     // 0000000000000017 <normal>:
2378     //   17:   48 99                   cqto
2379     //   19:   48 f7 f9                idiv   $div
2380     // 000000000000001c <done>:
2381 
2382     // mov    $0x8000000000000000,%rdx
2383     emit_opcode(cbuf, Assembler::REX_W);
2384     emit_opcode(cbuf, 0xBA);
2385     emit_d8(cbuf, 0x00);
2386     emit_d8(cbuf, 0x00);
2387     emit_d8(cbuf, 0x00);
2388     emit_d8(cbuf, 0x00);
2389     emit_d8(cbuf, 0x00);
2390     emit_d8(cbuf, 0x00);
2391     emit_d8(cbuf, 0x00);
2392     emit_d8(cbuf, 0x80);
2393 
2394     // cmp    %rdx,%rax
2395     emit_opcode(cbuf, Assembler::REX_W);
2396     emit_opcode(cbuf, 0x39);
2397     emit_d8(cbuf, 0xD0);
2398 
2399     // jne    17 <normal>
2400     emit_opcode(cbuf, 0x75);
2401     emit_d8(cbuf, 0x08);
2402 
2403     // xor    %edx,%edx
2404     emit_opcode(cbuf, 0x33);
2405     emit_d8(cbuf, 0xD2);
2406 
2407     // cmp    $0xffffffffffffffff,$div
2408     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2409     emit_opcode(cbuf, 0x83);
2410     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2411     emit_d8(cbuf, 0xFF);
2412 
2413     // je     1e <done>
2414     emit_opcode(cbuf, 0x74);
2415     emit_d8(cbuf, 0x05);
2416 
2417     // <normal>
2418     // cqto
2419     emit_opcode(cbuf, Assembler::REX_W);
2420     emit_opcode(cbuf, 0x99);
2421 
2422     // idivq (note: must be emitted by the user of this rule)
2423     // <done>
2424   %}
2425 
2426   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2427   enc_class OpcSE(immI imm)
2428   %{
2429     // Emit primary opcode and set sign-extend bit
2430     // Check for 8-bit immediate, and set sign extend bit in opcode
2431     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2432       emit_opcode(cbuf, $primary | 0x02);
2433     } else {
2434       // 32-bit immediate
2435       emit_opcode(cbuf, $primary);
2436     }
2437   %}
2438 
2439   enc_class OpcSErm(rRegI dst, immI imm)
2440   %{
2441     // OpcSEr/m
2442     int dstenc = $dst$$reg;
2443     if (dstenc >= 8) {
2444       emit_opcode(cbuf, Assembler::REX_B);
2445       dstenc -= 8;
2446     }
2447     // Emit primary opcode and set sign-extend bit
2448     // Check for 8-bit immediate, and set sign extend bit in opcode
2449     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2450       emit_opcode(cbuf, $primary | 0x02);
2451     } else {
2452       // 32-bit immediate
2453       emit_opcode(cbuf, $primary);
2454     }
2455     // Emit r/m byte with secondary opcode, after primary opcode.
2456     emit_rm(cbuf, 0x3, $secondary, dstenc);
2457   %}
2458 
2459   enc_class OpcSErm_wide(rRegL dst, immI imm)
2460   %{
2461     // OpcSEr/m
2462     int dstenc = $dst$$reg;
2463     if (dstenc < 8) {
2464       emit_opcode(cbuf, Assembler::REX_W);
2465     } else {
2466       emit_opcode(cbuf, Assembler::REX_WB);
2467       dstenc -= 8;
2468     }
2469     // Emit primary opcode and set sign-extend bit
2470     // Check for 8-bit immediate, and set sign extend bit in opcode
2471     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2472       emit_opcode(cbuf, $primary | 0x02);
2473     } else {
2474       // 32-bit immediate
2475       emit_opcode(cbuf, $primary);
2476     }
2477     // Emit r/m byte with secondary opcode, after primary opcode.
2478     emit_rm(cbuf, 0x3, $secondary, dstenc);
2479   %}
2480 
2481   enc_class Con8or32(immI imm)
2482   %{
2483     // Check for 8-bit immediate, and set sign extend bit in opcode
2484     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2485       $$$emit8$imm$$constant;
2486     } else {
2487       // 32-bit immediate
2488       $$$emit32$imm$$constant;
2489     }
2490   %}
2491 
2492   enc_class Lbl(label labl)
2493   %{
2494     // JMP, CALL
2495     Label* l = $labl$$label;
2496     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2497   %}
2498 
2499   enc_class LblShort(label labl)
2500   %{
2501     // JMP, CALL
2502     Label* l = $labl$$label;
2503     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2504     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2505     emit_d8(cbuf, disp);
2506   %}
2507 
2508   enc_class opc2_reg(rRegI dst)
2509   %{
2510     // BSWAP
2511     emit_cc(cbuf, $secondary, $dst$$reg);
2512   %}
2513 
2514   enc_class opc3_reg(rRegI dst)
2515   %{
2516     // BSWAP
2517     emit_cc(cbuf, $tertiary, $dst$$reg);
2518   %}
2519 
2520   enc_class reg_opc(rRegI div)
2521   %{
2522     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2523     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2524   %}
2525 
2526   enc_class Jcc(cmpOp cop, label labl)
2527   %{
2528     // JCC
2529     Label* l = $labl$$label;
2530     $$$emit8$primary;
2531     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2532     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2533   %}
2534 
2535   enc_class JccShort (cmpOp cop, label labl)
2536   %{
2537   // JCC
2538     Label *l = $labl$$label;
2539     emit_cc(cbuf, $primary, $cop$$cmpcode);
2540     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2541     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2542     emit_d8(cbuf, disp);
2543   %}
2544 
2545   enc_class enc_cmov(cmpOp cop)
2546   %{
2547     // CMOV
2548     $$$emit8$primary;
2549     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2550   %}
2551 
2552   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2553   %{
2554     // Invert sense of branch from sense of cmov
2555     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2556     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2557                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2558                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2559     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2560     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2561     if ($dst$$reg < 8) {
2562       if ($src$$reg >= 8) {
2563         emit_opcode(cbuf, Assembler::REX_B);
2564       }
2565     } else {
2566       if ($src$$reg < 8) {
2567         emit_opcode(cbuf, Assembler::REX_R);
2568       } else {
2569         emit_opcode(cbuf, Assembler::REX_RB);
2570       }
2571     }
2572     emit_opcode(cbuf, 0x0F);
2573     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2574     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2575   %}
2576 
2577   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2578   %{
2579     // Invert sense of branch from sense of cmov
2580     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2581     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2582 
2583     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2584     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2585     if ($dst$$reg < 8) {
2586       if ($src$$reg >= 8) {
2587         emit_opcode(cbuf, Assembler::REX_B);
2588       }
2589     } else {
2590       if ($src$$reg < 8) {
2591         emit_opcode(cbuf, Assembler::REX_R);
2592       } else {
2593         emit_opcode(cbuf, Assembler::REX_RB);
2594       }
2595     }
2596     emit_opcode(cbuf, 0x0F);
2597     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2598     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2599   %}
2600 
2601   enc_class enc_PartialSubtypeCheck()
2602   %{
2603     Register Rrdi = as_Register(RDI_enc); // result register
2604     Register Rrax = as_Register(RAX_enc); // super class
2605     Register Rrcx = as_Register(RCX_enc); // killed
2606     Register Rrsi = as_Register(RSI_enc); // sub class
2607     Label miss;
2608     const bool set_cond_codes = true;
2609 
2610     MacroAssembler _masm(&cbuf);
2611     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2612                                      NULL, &miss,
2613                                      /*set_cond_codes:*/ true);
2614     if ($primary) {
2615       __ xorptr(Rrdi, Rrdi);
2616     }
2617     __ bind(miss);
2618   %}
2619 
2620   enc_class Java_To_Interpreter(method meth)
2621   %{
2622     // CALL Java_To_Interpreter
2623     // This is the instruction starting address for relocation info.
2624     cbuf.set_inst_mark();
2625     $$$emit8$primary;
2626     // CALL directly to the runtime
2627     emit_d32_reloc(cbuf,
2628                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2629                    runtime_call_Relocation::spec(),
2630                    RELOC_DISP32);
2631   %}
2632 
2633   enc_class preserve_SP %{
2634     debug_only(int off0 = cbuf.code_size());
2635     MacroAssembler _masm(&cbuf);
2636     // RBP is preserved across all calls, even compiled calls.
2637     // Use it to preserve RSP in places where the callee might change the SP.
2638     __ movptr(rbp, rsp);
2639     debug_only(int off1 = cbuf.code_size());
2640     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2641   %}
2642 
2643   enc_class restore_SP %{
2644     MacroAssembler _masm(&cbuf);
2645     __ movptr(rsp, rbp);
2646   %}
2647 
2648   enc_class Java_Static_Call(method meth)
2649   %{
2650     // JAVA STATIC CALL
2651     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2652     // determine who we intended to call.
2653     cbuf.set_inst_mark();
2654     $$$emit8$primary;
2655 
2656     if (!_method) {
2657       emit_d32_reloc(cbuf,
2658                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2659                      runtime_call_Relocation::spec(),
2660                      RELOC_DISP32);
2661     } else if (_optimized_virtual) {
2662       emit_d32_reloc(cbuf,
2663                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2664                      opt_virtual_call_Relocation::spec(),
2665                      RELOC_DISP32);
2666     } else {
2667       emit_d32_reloc(cbuf,
2668                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2669                      static_call_Relocation::spec(),
2670                      RELOC_DISP32);
2671     }
2672     if (_method) {
2673       // Emit stub for static call
2674       emit_java_to_interp(cbuf);
2675     }
2676   %}
2677 
2678   enc_class Java_Dynamic_Call(method meth)
2679   %{
2680     // JAVA DYNAMIC CALL
2681     // !!!!!
2682     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2683     // emit_call_dynamic_prologue( cbuf );
2684     cbuf.set_inst_mark();
2685 
2686     // movq rax, -1
2687     emit_opcode(cbuf, Assembler::REX_W);
2688     emit_opcode(cbuf, 0xB8 | RAX_enc);
2689     emit_d64_reloc(cbuf,
2690                    (int64_t) Universe::non_oop_word(),
2691                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2692     address virtual_call_oop_addr = cbuf.inst_mark();
2693     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2694     // who we intended to call.
2695     cbuf.set_inst_mark();
2696     $$$emit8$primary;
2697     emit_d32_reloc(cbuf,
2698                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2699                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2700                    RELOC_DISP32);
2701   %}
2702 
2703   enc_class Java_Compiled_Call(method meth)
2704   %{
2705     // JAVA COMPILED CALL
2706     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2707 
2708     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2709     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2710 
2711     // callq *disp(%rax)
2712     cbuf.set_inst_mark();
2713     $$$emit8$primary;
2714     if (disp < 0x80) {
2715       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2716       emit_d8(cbuf, disp); // Displacement
2717     } else {
2718       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2719       emit_d32(cbuf, disp); // Displacement
2720     }
2721   %}
2722 
2723   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2724   %{
2725     // SAL, SAR, SHR
2726     int dstenc = $dst$$reg;
2727     if (dstenc >= 8) {
2728       emit_opcode(cbuf, Assembler::REX_B);
2729       dstenc -= 8;
2730     }
2731     $$$emit8$primary;
2732     emit_rm(cbuf, 0x3, $secondary, dstenc);
2733     $$$emit8$shift$$constant;
2734   %}
2735 
2736   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2737   %{
2738     // SAL, SAR, SHR
2739     int dstenc = $dst$$reg;
2740     if (dstenc < 8) {
2741       emit_opcode(cbuf, Assembler::REX_W);
2742     } else {
2743       emit_opcode(cbuf, Assembler::REX_WB);
2744       dstenc -= 8;
2745     }
2746     $$$emit8$primary;
2747     emit_rm(cbuf, 0x3, $secondary, dstenc);
2748     $$$emit8$shift$$constant;
2749   %}
2750 
2751   enc_class load_immI(rRegI dst, immI src)
2752   %{
2753     int dstenc = $dst$$reg;
2754     if (dstenc >= 8) {
2755       emit_opcode(cbuf, Assembler::REX_B);
2756       dstenc -= 8;
2757     }
2758     emit_opcode(cbuf, 0xB8 | dstenc);
2759     $$$emit32$src$$constant;
2760   %}
2761 
2762   enc_class load_immL(rRegL dst, immL src)
2763   %{
2764     int dstenc = $dst$$reg;
2765     if (dstenc < 8) {
2766       emit_opcode(cbuf, Assembler::REX_W);
2767     } else {
2768       emit_opcode(cbuf, Assembler::REX_WB);
2769       dstenc -= 8;
2770     }
2771     emit_opcode(cbuf, 0xB8 | dstenc);
2772     emit_d64(cbuf, $src$$constant);
2773   %}
2774 
2775   enc_class load_immUL32(rRegL dst, immUL32 src)
2776   %{
2777     // same as load_immI, but this time we care about zeroes in the high word
2778     int dstenc = $dst$$reg;
2779     if (dstenc >= 8) {
2780       emit_opcode(cbuf, Assembler::REX_B);
2781       dstenc -= 8;
2782     }
2783     emit_opcode(cbuf, 0xB8 | dstenc);
2784     $$$emit32$src$$constant;
2785   %}
2786 
2787   enc_class load_immL32(rRegL dst, immL32 src)
2788   %{
2789     int dstenc = $dst$$reg;
2790     if (dstenc < 8) {
2791       emit_opcode(cbuf, Assembler::REX_W);
2792     } else {
2793       emit_opcode(cbuf, Assembler::REX_WB);
2794       dstenc -= 8;
2795     }
2796     emit_opcode(cbuf, 0xC7);
2797     emit_rm(cbuf, 0x03, 0x00, dstenc);
2798     $$$emit32$src$$constant;
2799   %}
2800 
2801   enc_class load_immP31(rRegP dst, immP32 src)
2802   %{
2803     // same as load_immI, but this time we care about zeroes in the high word
2804     int dstenc = $dst$$reg;
2805     if (dstenc >= 8) {
2806       emit_opcode(cbuf, Assembler::REX_B);
2807       dstenc -= 8;
2808     }
2809     emit_opcode(cbuf, 0xB8 | dstenc);
2810     $$$emit32$src$$constant;
2811   %}
2812 
2813   enc_class load_immP(rRegP dst, immP src)
2814   %{
2815     int dstenc = $dst$$reg;
2816     if (dstenc < 8) {
2817       emit_opcode(cbuf, Assembler::REX_W);
2818     } else {
2819       emit_opcode(cbuf, Assembler::REX_WB);
2820       dstenc -= 8;
2821     }
2822     emit_opcode(cbuf, 0xB8 | dstenc);
2823     // This next line should be generated from ADLC
2824     if ($src->constant_is_oop()) {
2825       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2826     } else {
2827       emit_d64(cbuf, $src$$constant);
2828     }
2829   %}
2830 
2831   enc_class load_immF(regF dst, immF con)
2832   %{
2833     // XXX reg_mem doesn't support RIP-relative addressing yet
2834     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2835     emit_float_constant(cbuf, $con$$constant);
2836   %}
2837 
2838   enc_class load_immD(regD dst, immD con)
2839   %{
2840     // XXX reg_mem doesn't support RIP-relative addressing yet
2841     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2842     emit_double_constant(cbuf, $con$$constant);
2843   %}
2844 
2845   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2846     emit_opcode(cbuf, 0xF3);
2847     if ($dst$$reg >= 8) {
2848       emit_opcode(cbuf, Assembler::REX_R);
2849     }
2850     emit_opcode(cbuf, 0x0F);
2851     emit_opcode(cbuf, 0x10);
2852     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2853     emit_float_constant(cbuf, $con$$constant);
2854   %}
2855 
2856   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2857     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2858     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2859     if ($dst$$reg >= 8) {
2860       emit_opcode(cbuf, Assembler::REX_R);
2861     }
2862     emit_opcode(cbuf, 0x0F);
2863     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2864     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2865     emit_double_constant(cbuf, $con$$constant);
2866   %}
2867 
2868   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2869   enc_class enc_copy(rRegI dst, rRegI src)
2870   %{
2871     encode_copy(cbuf, $dst$$reg, $src$$reg);
2872   %}
2873 
2874   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2875   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2876     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2877   %}
2878 
2879   enc_class enc_copy_always(rRegI dst, rRegI src)
2880   %{
2881     int srcenc = $src$$reg;
2882     int dstenc = $dst$$reg;
2883 
2884     if (dstenc < 8) {
2885       if (srcenc >= 8) {
2886         emit_opcode(cbuf, Assembler::REX_B);
2887         srcenc -= 8;
2888       }
2889     } else {
2890       if (srcenc < 8) {
2891         emit_opcode(cbuf, Assembler::REX_R);
2892       } else {
2893         emit_opcode(cbuf, Assembler::REX_RB);
2894         srcenc -= 8;
2895       }
2896       dstenc -= 8;
2897     }
2898 
2899     emit_opcode(cbuf, 0x8B);
2900     emit_rm(cbuf, 0x3, dstenc, srcenc);
2901   %}
2902 
2903   enc_class enc_copy_wide(rRegL dst, rRegL src)
2904   %{
2905     int srcenc = $src$$reg;
2906     int dstenc = $dst$$reg;
2907 
2908     if (dstenc != srcenc) {
2909       if (dstenc < 8) {
2910         if (srcenc < 8) {
2911           emit_opcode(cbuf, Assembler::REX_W);
2912         } else {
2913           emit_opcode(cbuf, Assembler::REX_WB);
2914           srcenc -= 8;
2915         }
2916       } else {
2917         if (srcenc < 8) {
2918           emit_opcode(cbuf, Assembler::REX_WR);
2919         } else {
2920           emit_opcode(cbuf, Assembler::REX_WRB);
2921           srcenc -= 8;
2922         }
2923         dstenc -= 8;
2924       }
2925       emit_opcode(cbuf, 0x8B);
2926       emit_rm(cbuf, 0x3, dstenc, srcenc);
2927     }
2928   %}
2929 
2930   enc_class Con32(immI src)
2931   %{
2932     // Output immediate
2933     $$$emit32$src$$constant;
2934   %}
2935 
2936   enc_class Con64(immL src)
2937   %{
2938     // Output immediate
2939     emit_d64($src$$constant);
2940   %}
2941 
2942   enc_class Con32F_as_bits(immF src)
2943   %{
2944     // Output Float immediate bits
2945     jfloat jf = $src$$constant;
2946     jint jf_as_bits = jint_cast(jf);
2947     emit_d32(cbuf, jf_as_bits);
2948   %}
2949 
2950   enc_class Con16(immI src)
2951   %{
2952     // Output immediate
2953     $$$emit16$src$$constant;
2954   %}
2955 
2956   // How is this different from Con32??? XXX
2957   enc_class Con_d32(immI src)
2958   %{
2959     emit_d32(cbuf,$src$$constant);
2960   %}
2961 
2962   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2963     // Output immediate memory reference
2964     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2965     emit_d32(cbuf, 0x00);
2966   %}
2967 
2968   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2969     MacroAssembler masm(&cbuf);
2970 
2971     Register switch_reg = as_Register($switch_val$$reg);
2972     Register dest_reg   = as_Register($dest$$reg);
2973     address table_base  = masm.address_table_constant(_index2label);
2974 
2975     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2976     // to do that and the compiler is using that register as one it can allocate.
2977     // So we build it all by hand.
2978     // Address index(noreg, switch_reg, Address::times_1);
2979     // ArrayAddress dispatch(table, index);
2980 
2981     Address dispatch(dest_reg, switch_reg, Address::times_1);
2982 
2983     masm.lea(dest_reg, InternalAddress(table_base));
2984     masm.jmp(dispatch);
2985   %}
2986 
2987   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2988     MacroAssembler masm(&cbuf);
2989 
2990     Register switch_reg = as_Register($switch_val$$reg);
2991     Register dest_reg   = as_Register($dest$$reg);
2992     address table_base  = masm.address_table_constant(_index2label);
2993 
2994     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2995     // to do that and the compiler is using that register as one it can allocate.
2996     // So we build it all by hand.
2997     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2998     // ArrayAddress dispatch(table, index);
2999 
3000     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
3001 
3002     masm.lea(dest_reg, InternalAddress(table_base));
3003     masm.jmp(dispatch);
3004   %}
3005 
3006   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
3007     MacroAssembler masm(&cbuf);
3008 
3009     Register switch_reg = as_Register($switch_val$$reg);
3010     Register dest_reg   = as_Register($dest$$reg);
3011     address table_base  = masm.address_table_constant(_index2label);
3012 
3013     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
3014     // to do that and the compiler is using that register as one it can allocate.
3015     // So we build it all by hand.
3016     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3017     // ArrayAddress dispatch(table, index);
3018 
3019     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3020     masm.lea(dest_reg, InternalAddress(table_base));
3021     masm.jmp(dispatch);
3022 
3023   %}
3024 
3025   enc_class lock_prefix()
3026   %{
3027     if (os::is_MP()) {
3028       emit_opcode(cbuf, 0xF0); // lock
3029     }
3030   %}
3031 
3032   enc_class REX_mem(memory mem)
3033   %{
3034     if ($mem$$base >= 8) {
3035       if ($mem$$index < 8) {
3036         emit_opcode(cbuf, Assembler::REX_B);
3037       } else {
3038         emit_opcode(cbuf, Assembler::REX_XB);
3039       }
3040     } else {
3041       if ($mem$$index >= 8) {
3042         emit_opcode(cbuf, Assembler::REX_X);
3043       }
3044     }
3045   %}
3046 
3047   enc_class REX_mem_wide(memory mem)
3048   %{
3049     if ($mem$$base >= 8) {
3050       if ($mem$$index < 8) {
3051         emit_opcode(cbuf, Assembler::REX_WB);
3052       } else {
3053         emit_opcode(cbuf, Assembler::REX_WXB);
3054       }
3055     } else {
3056       if ($mem$$index < 8) {
3057         emit_opcode(cbuf, Assembler::REX_W);
3058       } else {
3059         emit_opcode(cbuf, Assembler::REX_WX);
3060       }
3061     }
3062   %}
3063 
3064   // for byte regs
3065   enc_class REX_breg(rRegI reg)
3066   %{
3067     if ($reg$$reg >= 4) {
3068       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3069     }
3070   %}
3071 
3072   // for byte regs
3073   enc_class REX_reg_breg(rRegI dst, rRegI src)
3074   %{
3075     if ($dst$$reg < 8) {
3076       if ($src$$reg >= 4) {
3077         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3078       }
3079     } else {
3080       if ($src$$reg < 8) {
3081         emit_opcode(cbuf, Assembler::REX_R);
3082       } else {
3083         emit_opcode(cbuf, Assembler::REX_RB);
3084       }
3085     }
3086   %}
3087 
3088   // for byte regs
3089   enc_class REX_breg_mem(rRegI reg, memory mem)
3090   %{
3091     if ($reg$$reg < 8) {
3092       if ($mem$$base < 8) {
3093         if ($mem$$index >= 8) {
3094           emit_opcode(cbuf, Assembler::REX_X);
3095         } else if ($reg$$reg >= 4) {
3096           emit_opcode(cbuf, Assembler::REX);
3097         }
3098       } else {
3099         if ($mem$$index < 8) {
3100           emit_opcode(cbuf, Assembler::REX_B);
3101         } else {
3102           emit_opcode(cbuf, Assembler::REX_XB);
3103         }
3104       }
3105     } else {
3106       if ($mem$$base < 8) {
3107         if ($mem$$index < 8) {
3108           emit_opcode(cbuf, Assembler::REX_R);
3109         } else {
3110           emit_opcode(cbuf, Assembler::REX_RX);
3111         }
3112       } else {
3113         if ($mem$$index < 8) {
3114           emit_opcode(cbuf, Assembler::REX_RB);
3115         } else {
3116           emit_opcode(cbuf, Assembler::REX_RXB);
3117         }
3118       }
3119     }
3120   %}
3121 
3122   enc_class REX_reg(rRegI reg)
3123   %{
3124     if ($reg$$reg >= 8) {
3125       emit_opcode(cbuf, Assembler::REX_B);
3126     }
3127   %}
3128 
3129   enc_class REX_reg_wide(rRegI reg)
3130   %{
3131     if ($reg$$reg < 8) {
3132       emit_opcode(cbuf, Assembler::REX_W);
3133     } else {
3134       emit_opcode(cbuf, Assembler::REX_WB);
3135     }
3136   %}
3137 
3138   enc_class REX_reg_reg(rRegI dst, rRegI src)
3139   %{
3140     if ($dst$$reg < 8) {
3141       if ($src$$reg >= 8) {
3142         emit_opcode(cbuf, Assembler::REX_B);
3143       }
3144     } else {
3145       if ($src$$reg < 8) {
3146         emit_opcode(cbuf, Assembler::REX_R);
3147       } else {
3148         emit_opcode(cbuf, Assembler::REX_RB);
3149       }
3150     }
3151   %}
3152 
3153   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3154   %{
3155     if ($dst$$reg < 8) {
3156       if ($src$$reg < 8) {
3157         emit_opcode(cbuf, Assembler::REX_W);
3158       } else {
3159         emit_opcode(cbuf, Assembler::REX_WB);
3160       }
3161     } else {
3162       if ($src$$reg < 8) {
3163         emit_opcode(cbuf, Assembler::REX_WR);
3164       } else {
3165         emit_opcode(cbuf, Assembler::REX_WRB);
3166       }
3167     }
3168   %}
3169 
3170   enc_class REX_reg_mem(rRegI reg, memory mem)
3171   %{
3172     if ($reg$$reg < 8) {
3173       if ($mem$$base < 8) {
3174         if ($mem$$index >= 8) {
3175           emit_opcode(cbuf, Assembler::REX_X);
3176         }
3177       } else {
3178         if ($mem$$index < 8) {
3179           emit_opcode(cbuf, Assembler::REX_B);
3180         } else {
3181           emit_opcode(cbuf, Assembler::REX_XB);
3182         }
3183       }
3184     } else {
3185       if ($mem$$base < 8) {
3186         if ($mem$$index < 8) {
3187           emit_opcode(cbuf, Assembler::REX_R);
3188         } else {
3189           emit_opcode(cbuf, Assembler::REX_RX);
3190         }
3191       } else {
3192         if ($mem$$index < 8) {
3193           emit_opcode(cbuf, Assembler::REX_RB);
3194         } else {
3195           emit_opcode(cbuf, Assembler::REX_RXB);
3196         }
3197       }
3198     }
3199   %}
3200 
3201   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3202   %{
3203     if ($reg$$reg < 8) {
3204       if ($mem$$base < 8) {
3205         if ($mem$$index < 8) {
3206           emit_opcode(cbuf, Assembler::REX_W);
3207         } else {
3208           emit_opcode(cbuf, Assembler::REX_WX);
3209         }
3210       } else {
3211         if ($mem$$index < 8) {
3212           emit_opcode(cbuf, Assembler::REX_WB);
3213         } else {
3214           emit_opcode(cbuf, Assembler::REX_WXB);
3215         }
3216       }
3217     } else {
3218       if ($mem$$base < 8) {
3219         if ($mem$$index < 8) {
3220           emit_opcode(cbuf, Assembler::REX_WR);
3221         } else {
3222           emit_opcode(cbuf, Assembler::REX_WRX);
3223         }
3224       } else {
3225         if ($mem$$index < 8) {
3226           emit_opcode(cbuf, Assembler::REX_WRB);
3227         } else {
3228           emit_opcode(cbuf, Assembler::REX_WRXB);
3229         }
3230       }
3231     }
3232   %}
3233 
3234   enc_class reg_mem(rRegI ereg, memory mem)
3235   %{
3236     // High registers handle in encode_RegMem
3237     int reg = $ereg$$reg;
3238     int base = $mem$$base;
3239     int index = $mem$$index;
3240     int scale = $mem$$scale;
3241     int disp = $mem$$disp;
3242     bool disp_is_oop = $mem->disp_is_oop();
3243 
3244     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3245   %}
3246 
3247   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3248   %{
3249     int rm_byte_opcode = $rm_opcode$$constant;
3250 
3251     // High registers handle in encode_RegMem
3252     int base = $mem$$base;
3253     int index = $mem$$index;
3254     int scale = $mem$$scale;
3255     int displace = $mem$$disp;
3256 
3257     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3258                                             // working with static
3259                                             // globals
3260     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3261                   disp_is_oop);
3262   %}
3263 
3264   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3265   %{
3266     int reg_encoding = $dst$$reg;
3267     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3268     int index        = 0x04;            // 0x04 indicates no index
3269     int scale        = 0x00;            // 0x00 indicates no scale
3270     int displace     = $src1$$constant; // 0x00 indicates no displacement
3271     bool disp_is_oop = false;
3272     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3273                   disp_is_oop);
3274   %}
3275 
3276   enc_class neg_reg(rRegI dst)
3277   %{
3278     int dstenc = $dst$$reg;
3279     if (dstenc >= 8) {
3280       emit_opcode(cbuf, Assembler::REX_B);
3281       dstenc -= 8;
3282     }
3283     // NEG $dst
3284     emit_opcode(cbuf, 0xF7);
3285     emit_rm(cbuf, 0x3, 0x03, dstenc);
3286   %}
3287 
3288   enc_class neg_reg_wide(rRegI dst)
3289   %{
3290     int dstenc = $dst$$reg;
3291     if (dstenc < 8) {
3292       emit_opcode(cbuf, Assembler::REX_W);
3293     } else {
3294       emit_opcode(cbuf, Assembler::REX_WB);
3295       dstenc -= 8;
3296     }
3297     // NEG $dst
3298     emit_opcode(cbuf, 0xF7);
3299     emit_rm(cbuf, 0x3, 0x03, dstenc);
3300   %}
3301 
3302   enc_class setLT_reg(rRegI dst)
3303   %{
3304     int dstenc = $dst$$reg;
3305     if (dstenc >= 8) {
3306       emit_opcode(cbuf, Assembler::REX_B);
3307       dstenc -= 8;
3308     } else if (dstenc >= 4) {
3309       emit_opcode(cbuf, Assembler::REX);
3310     }
3311     // SETLT $dst
3312     emit_opcode(cbuf, 0x0F);
3313     emit_opcode(cbuf, 0x9C);
3314     emit_rm(cbuf, 0x3, 0x0, dstenc);
3315   %}
3316 
3317   enc_class setNZ_reg(rRegI dst)
3318   %{
3319     int dstenc = $dst$$reg;
3320     if (dstenc >= 8) {
3321       emit_opcode(cbuf, Assembler::REX_B);
3322       dstenc -= 8;
3323     } else if (dstenc >= 4) {
3324       emit_opcode(cbuf, Assembler::REX);
3325     }
3326     // SETNZ $dst
3327     emit_opcode(cbuf, 0x0F);
3328     emit_opcode(cbuf, 0x95);
3329     emit_rm(cbuf, 0x3, 0x0, dstenc);
3330   %}
3331 
3332   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3333                        rcx_RegI tmp)
3334   %{
3335     // cadd_cmpLT
3336 
3337     int tmpReg = $tmp$$reg;
3338 
3339     int penc = $p$$reg;
3340     int qenc = $q$$reg;
3341     int yenc = $y$$reg;
3342 
3343     // subl $p,$q
3344     if (penc < 8) {
3345       if (qenc >= 8) {
3346         emit_opcode(cbuf, Assembler::REX_B);
3347       }
3348     } else {
3349       if (qenc < 8) {
3350         emit_opcode(cbuf, Assembler::REX_R);
3351       } else {
3352         emit_opcode(cbuf, Assembler::REX_RB);
3353       }
3354     }
3355     emit_opcode(cbuf, 0x2B);
3356     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3357 
3358     // sbbl $tmp, $tmp
3359     emit_opcode(cbuf, 0x1B);
3360     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3361 
3362     // andl $tmp, $y
3363     if (yenc >= 8) {
3364       emit_opcode(cbuf, Assembler::REX_B);
3365     }
3366     emit_opcode(cbuf, 0x23);
3367     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3368 
3369     // addl $p,$tmp
3370     if (penc >= 8) {
3371         emit_opcode(cbuf, Assembler::REX_R);
3372     }
3373     emit_opcode(cbuf, 0x03);
3374     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3375   %}
3376 
3377   // Compare the lonogs and set -1, 0, or 1 into dst
3378   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3379   %{
3380     int src1enc = $src1$$reg;
3381     int src2enc = $src2$$reg;
3382     int dstenc = $dst$$reg;
3383 
3384     // cmpq $src1, $src2
3385     if (src1enc < 8) {
3386       if (src2enc < 8) {
3387         emit_opcode(cbuf, Assembler::REX_W);
3388       } else {
3389         emit_opcode(cbuf, Assembler::REX_WB);
3390       }
3391     } else {
3392       if (src2enc < 8) {
3393         emit_opcode(cbuf, Assembler::REX_WR);
3394       } else {
3395         emit_opcode(cbuf, Assembler::REX_WRB);
3396       }
3397     }
3398     emit_opcode(cbuf, 0x3B);
3399     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3400 
3401     // movl $dst, -1
3402     if (dstenc >= 8) {
3403       emit_opcode(cbuf, Assembler::REX_B);
3404     }
3405     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3406     emit_d32(cbuf, -1);
3407 
3408     // jl,s done
3409     emit_opcode(cbuf, 0x7C);
3410     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3411 
3412     // setne $dst
3413     if (dstenc >= 4) {
3414       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3415     }
3416     emit_opcode(cbuf, 0x0F);
3417     emit_opcode(cbuf, 0x95);
3418     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3419 
3420     // movzbl $dst, $dst
3421     if (dstenc >= 4) {
3422       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3423     }
3424     emit_opcode(cbuf, 0x0F);
3425     emit_opcode(cbuf, 0xB6);
3426     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3427   %}
3428 
3429   enc_class Push_ResultXD(regD dst) %{
3430     int dstenc = $dst$$reg;
3431 
3432     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3433 
3434     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3435     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3436     if (dstenc >= 8) {
3437       emit_opcode(cbuf, Assembler::REX_R);
3438     }
3439     emit_opcode  (cbuf, 0x0F );
3440     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3441     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3442 
3443     // add rsp,8
3444     emit_opcode(cbuf, Assembler::REX_W);
3445     emit_opcode(cbuf,0x83);
3446     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3447     emit_d8(cbuf,0x08);
3448   %}
3449 
3450   enc_class Push_SrcXD(regD src) %{
3451     int srcenc = $src$$reg;
3452 
3453     // subq rsp,#8
3454     emit_opcode(cbuf, Assembler::REX_W);
3455     emit_opcode(cbuf, 0x83);
3456     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3457     emit_d8(cbuf, 0x8);
3458 
3459     // movsd [rsp],src
3460     emit_opcode(cbuf, 0xF2);
3461     if (srcenc >= 8) {
3462       emit_opcode(cbuf, Assembler::REX_R);
3463     }
3464     emit_opcode(cbuf, 0x0F);
3465     emit_opcode(cbuf, 0x11);
3466     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3467 
3468     // fldd [rsp]
3469     emit_opcode(cbuf, 0x66);
3470     emit_opcode(cbuf, 0xDD);
3471     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3472   %}
3473 
3474 
3475   enc_class movq_ld(regD dst, memory mem) %{
3476     MacroAssembler _masm(&cbuf);
3477     __ movq($dst$$XMMRegister, $mem$$Address);
3478   %}
3479 
3480   enc_class movq_st(memory mem, regD src) %{
3481     MacroAssembler _masm(&cbuf);
3482     __ movq($mem$$Address, $src$$XMMRegister);
3483   %}
3484 
3485   enc_class pshufd_8x8(regF dst, regF src) %{
3486     MacroAssembler _masm(&cbuf);
3487 
3488     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3489     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3490     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3491   %}
3492 
3493   enc_class pshufd_4x16(regF dst, regF src) %{
3494     MacroAssembler _masm(&cbuf);
3495 
3496     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3497   %}
3498 
3499   enc_class pshufd(regD dst, regD src, int mode) %{
3500     MacroAssembler _masm(&cbuf);
3501 
3502     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3503   %}
3504 
3505   enc_class pxor(regD dst, regD src) %{
3506     MacroAssembler _masm(&cbuf);
3507 
3508     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3509   %}
3510 
3511   enc_class mov_i2x(regD dst, rRegI src) %{
3512     MacroAssembler _masm(&cbuf);
3513 
3514     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3515   %}
3516 
3517   // obj: object to lock
3518   // box: box address (header location) -- killed
3519   // tmp: rax -- killed
3520   // scr: rbx -- killed
3521   //
3522   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3523   // from i486.ad.  See that file for comments.
3524   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3525   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3526 
3527 
3528   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3529   %{
3530     Register objReg = as_Register((int)$obj$$reg);
3531     Register boxReg = as_Register((int)$box$$reg);
3532     Register tmpReg = as_Register($tmp$$reg);
3533     Register scrReg = as_Register($scr$$reg);
3534     MacroAssembler masm(&cbuf);
3535 
3536     // Verify uniqueness of register assignments -- necessary but not sufficient
3537     assert (objReg != boxReg && objReg != tmpReg &&
3538             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3539 
3540     if (_counters != NULL) {
3541       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3542     }
3543     if (EmitSync & 1) {
3544         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3545         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3546         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3547     } else
3548     if (EmitSync & 2) {
3549         Label DONE_LABEL;
3550         if (UseBiasedLocking) {
3551            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3552           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3553         }
3554         // QQQ was movl...
3555         masm.movptr(tmpReg, 0x1);
3556         masm.orptr(tmpReg, Address(objReg, 0));
3557         masm.movptr(Address(boxReg, 0), tmpReg);
3558         if (os::is_MP()) {
3559           masm.lock();
3560         }
3561         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3562         masm.jcc(Assembler::equal, DONE_LABEL);
3563 
3564         // Recursive locking
3565         masm.subptr(tmpReg, rsp);
3566         masm.andptr(tmpReg, 7 - os::vm_page_size());
3567         masm.movptr(Address(boxReg, 0), tmpReg);
3568 
3569         masm.bind(DONE_LABEL);
3570         masm.nop(); // avoid branch to branch
3571     } else {
3572         Label DONE_LABEL, IsInflated, Egress;
3573 
3574         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3575         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3576         masm.jcc   (Assembler::notZero, IsInflated) ; 
3577          
3578         // it's stack-locked, biased or neutral
3579         // TODO: optimize markword triage order to reduce the number of
3580         // conditional branches in the most common cases.
3581         // Beware -- there's a subtle invariant that fetch of the markword
3582         // at [FETCH], below, will never observe a biased encoding (*101b).
3583         // If this invariant is not held we'll suffer exclusion (safety) failure.
3584 
3585         if (UseBiasedLocking && !UseOptoBiasInlining) {
3586           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3587           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3588         }
3589 
3590         // was q will it destroy high?
3591         masm.orl   (tmpReg, 1) ; 
3592         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3593         if (os::is_MP()) { masm.lock(); } 
3594         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3595         if (_counters != NULL) {
3596            masm.cond_inc32(Assembler::equal,
3597                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3598         }
3599         masm.jcc   (Assembler::equal, DONE_LABEL);
3600 
3601         // Recursive locking
3602         masm.subptr(tmpReg, rsp);
3603         masm.andptr(tmpReg, 7 - os::vm_page_size());
3604         masm.movptr(Address(boxReg, 0), tmpReg);
3605         if (_counters != NULL) {
3606            masm.cond_inc32(Assembler::equal,
3607                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3608         }
3609         masm.jmp   (DONE_LABEL) ;
3610 
3611         masm.bind  (IsInflated) ;
3612         // It's inflated
3613 
3614         // TODO: someday avoid the ST-before-CAS penalty by
3615         // relocating (deferring) the following ST.
3616         // We should also think about trying a CAS without having
3617         // fetched _owner.  If the CAS is successful we may
3618         // avoid an RTO->RTS upgrade on the $line.
3619         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3620         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3621 
3622         masm.mov    (boxReg, tmpReg) ; 
3623         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3624         masm.testptr(tmpReg, tmpReg) ;   
3625         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3626 
3627         // It's inflated and appears unlocked
3628         if (os::is_MP()) { masm.lock(); } 
3629         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3630         // Intentional fall-through into DONE_LABEL ...
3631 
3632         masm.bind  (DONE_LABEL) ;
3633         masm.nop   () ;                 // avoid jmp to jmp
3634     }
3635   %}
3636 
3637   // obj: object to unlock
3638   // box: box address (displaced header location), killed
3639   // RBX: killed tmp; cannot be obj nor box
3640   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3641   %{
3642 
3643     Register objReg = as_Register($obj$$reg);
3644     Register boxReg = as_Register($box$$reg);
3645     Register tmpReg = as_Register($tmp$$reg);
3646     MacroAssembler masm(&cbuf);
3647 
3648     if (EmitSync & 4) { 
3649        masm.cmpptr(rsp, 0) ; 
3650     } else
3651     if (EmitSync & 8) {
3652        Label DONE_LABEL;
3653        if (UseBiasedLocking) {
3654          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3655        }
3656 
3657        // Check whether the displaced header is 0
3658        //(=> recursive unlock)
3659        masm.movptr(tmpReg, Address(boxReg, 0));
3660        masm.testptr(tmpReg, tmpReg);
3661        masm.jcc(Assembler::zero, DONE_LABEL);
3662 
3663        // If not recursive lock, reset the header to displaced header
3664        if (os::is_MP()) {
3665          masm.lock();
3666        }
3667        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3668        masm.bind(DONE_LABEL);
3669        masm.nop(); // avoid branch to branch
3670     } else {
3671        Label DONE_LABEL, Stacked, CheckSucc ;
3672 
3673        if (UseBiasedLocking && !UseOptoBiasInlining) {
3674          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3675        }
3676         
3677        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3678        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3679        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3680        masm.testl (tmpReg, 0x02) ; 
3681        masm.jcc   (Assembler::zero, Stacked) ; 
3682         
3683        // It's inflated
3684        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3685        masm.xorptr(boxReg, r15_thread) ; 
3686        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3687        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3688        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3689        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3690        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3691        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3692        masm.jmp   (DONE_LABEL) ; 
3693         
3694        if ((EmitSync & 65536) == 0) { 
3695          Label LSuccess, LGoSlowPath ;
3696          masm.bind  (CheckSucc) ;
3697          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3698          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3699 
3700          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3701          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3702          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3703          // are all faster when the write buffer is populated.
3704          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3705          if (os::is_MP()) {
3706             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3707          }
3708          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3709          masm.jcc   (Assembler::notZero, LSuccess) ;
3710 
3711          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3712          if (os::is_MP()) { masm.lock(); }
3713          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3714          masm.jcc   (Assembler::notEqual, LSuccess) ;
3715          // Intentional fall-through into slow-path
3716 
3717          masm.bind  (LGoSlowPath) ;
3718          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3719          masm.jmp   (DONE_LABEL) ;
3720 
3721          masm.bind  (LSuccess) ;
3722          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3723          masm.jmp   (DONE_LABEL) ;
3724        }
3725 
3726        masm.bind  (Stacked) ; 
3727        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3728        if (os::is_MP()) { masm.lock(); } 
3729        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3730 
3731        if (EmitSync & 65536) {
3732           masm.bind (CheckSucc) ;
3733        }
3734        masm.bind(DONE_LABEL);
3735        if (EmitSync & 32768) {
3736           masm.nop();                      // avoid branch to branch
3737        }
3738     }
3739   %}
3740 
3741 
3742   enc_class enc_rethrow()
3743   %{
3744     cbuf.set_inst_mark();
3745     emit_opcode(cbuf, 0xE9); // jmp entry
3746     emit_d32_reloc(cbuf,
3747                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3748                    runtime_call_Relocation::spec(),
3749                    RELOC_DISP32);
3750   %}
3751 
3752   enc_class absF_encoding(regF dst)
3753   %{
3754     int dstenc = $dst$$reg;
3755     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3756 
3757     cbuf.set_inst_mark();
3758     if (dstenc >= 8) {
3759       emit_opcode(cbuf, Assembler::REX_R);
3760       dstenc -= 8;
3761     }
3762     // XXX reg_mem doesn't support RIP-relative addressing yet
3763     emit_opcode(cbuf, 0x0F);
3764     emit_opcode(cbuf, 0x54);
3765     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3766     emit_d32_reloc(cbuf, signmask_address);
3767   %}
3768 
3769   enc_class absD_encoding(regD dst)
3770   %{
3771     int dstenc = $dst$$reg;
3772     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3773 
3774     cbuf.set_inst_mark();
3775     emit_opcode(cbuf, 0x66);
3776     if (dstenc >= 8) {
3777       emit_opcode(cbuf, Assembler::REX_R);
3778       dstenc -= 8;
3779     }
3780     // XXX reg_mem doesn't support RIP-relative addressing yet
3781     emit_opcode(cbuf, 0x0F);
3782     emit_opcode(cbuf, 0x54);
3783     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3784     emit_d32_reloc(cbuf, signmask_address);
3785   %}
3786 
3787   enc_class negF_encoding(regF dst)
3788   %{
3789     int dstenc = $dst$$reg;
3790     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3791 
3792     cbuf.set_inst_mark();
3793     if (dstenc >= 8) {
3794       emit_opcode(cbuf, Assembler::REX_R);
3795       dstenc -= 8;
3796     }
3797     // XXX reg_mem doesn't support RIP-relative addressing yet
3798     emit_opcode(cbuf, 0x0F);
3799     emit_opcode(cbuf, 0x57);
3800     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3801     emit_d32_reloc(cbuf, signflip_address);
3802   %}
3803 
3804   enc_class negD_encoding(regD dst)
3805   %{
3806     int dstenc = $dst$$reg;
3807     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3808 
3809     cbuf.set_inst_mark();
3810     emit_opcode(cbuf, 0x66);
3811     if (dstenc >= 8) {
3812       emit_opcode(cbuf, Assembler::REX_R);
3813       dstenc -= 8;
3814     }
3815     // XXX reg_mem doesn't support RIP-relative addressing yet
3816     emit_opcode(cbuf, 0x0F);
3817     emit_opcode(cbuf, 0x57);
3818     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3819     emit_d32_reloc(cbuf, signflip_address);
3820   %}
3821 
3822   enc_class f2i_fixup(rRegI dst, regF src)
3823   %{
3824     int dstenc = $dst$$reg;
3825     int srcenc = $src$$reg;
3826 
3827     // cmpl $dst, #0x80000000
3828     if (dstenc >= 8) {
3829       emit_opcode(cbuf, Assembler::REX_B);
3830     }
3831     emit_opcode(cbuf, 0x81);
3832     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3833     emit_d32(cbuf, 0x80000000);
3834 
3835     // jne,s done
3836     emit_opcode(cbuf, 0x75);
3837     if (srcenc < 8 && dstenc < 8) {
3838       emit_d8(cbuf, 0xF);
3839     } else if (srcenc >= 8 && dstenc >= 8) {
3840       emit_d8(cbuf, 0x11);
3841     } else {
3842       emit_d8(cbuf, 0x10);
3843     }
3844 
3845     // subq rsp, #8
3846     emit_opcode(cbuf, Assembler::REX_W);
3847     emit_opcode(cbuf, 0x83);
3848     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3849     emit_d8(cbuf, 8);
3850 
3851     // movss [rsp], $src
3852     emit_opcode(cbuf, 0xF3);
3853     if (srcenc >= 8) {
3854       emit_opcode(cbuf, Assembler::REX_R);
3855     }
3856     emit_opcode(cbuf, 0x0F);
3857     emit_opcode(cbuf, 0x11);
3858     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3859 
3860     // call f2i_fixup
3861     cbuf.set_inst_mark();
3862     emit_opcode(cbuf, 0xE8);
3863     emit_d32_reloc(cbuf,
3864                    (int)
3865                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
3866                    runtime_call_Relocation::spec(),
3867                    RELOC_DISP32);
3868 
3869     // popq $dst
3870     if (dstenc >= 8) {
3871       emit_opcode(cbuf, Assembler::REX_B);
3872     }
3873     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3874 
3875     // done:
3876   %}
3877 
3878   enc_class f2l_fixup(rRegL dst, regF src)
3879   %{
3880     int dstenc = $dst$$reg;
3881     int srcenc = $src$$reg;
3882     address const_address = (address) StubRoutines::x86::double_sign_flip();
3883 
3884     // cmpq $dst, [0x8000000000000000]
3885     cbuf.set_inst_mark();
3886     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3887     emit_opcode(cbuf, 0x39);
3888     // XXX reg_mem doesn't support RIP-relative addressing yet
3889     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3890     emit_d32_reloc(cbuf, const_address);
3891 
3892 
3893     // jne,s done
3894     emit_opcode(cbuf, 0x75);
3895     if (srcenc < 8 && dstenc < 8) {
3896       emit_d8(cbuf, 0xF);
3897     } else if (srcenc >= 8 && dstenc >= 8) {
3898       emit_d8(cbuf, 0x11);
3899     } else {
3900       emit_d8(cbuf, 0x10);
3901     }
3902 
3903     // subq rsp, #8
3904     emit_opcode(cbuf, Assembler::REX_W);
3905     emit_opcode(cbuf, 0x83);
3906     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3907     emit_d8(cbuf, 8);
3908 
3909     // movss [rsp], $src
3910     emit_opcode(cbuf, 0xF3);
3911     if (srcenc >= 8) {
3912       emit_opcode(cbuf, Assembler::REX_R);
3913     }
3914     emit_opcode(cbuf, 0x0F);
3915     emit_opcode(cbuf, 0x11);
3916     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3917 
3918     // call f2l_fixup
3919     cbuf.set_inst_mark();
3920     emit_opcode(cbuf, 0xE8);
3921     emit_d32_reloc(cbuf,
3922                    (int)
3923                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
3924                    runtime_call_Relocation::spec(),
3925                    RELOC_DISP32);
3926 
3927     // popq $dst
3928     if (dstenc >= 8) {
3929       emit_opcode(cbuf, Assembler::REX_B);
3930     }
3931     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3932 
3933     // done:
3934   %}
3935 
3936   enc_class d2i_fixup(rRegI dst, regD src)
3937   %{
3938     int dstenc = $dst$$reg;
3939     int srcenc = $src$$reg;
3940 
3941     // cmpl $dst, #0x80000000
3942     if (dstenc >= 8) {
3943       emit_opcode(cbuf, Assembler::REX_B);
3944     }
3945     emit_opcode(cbuf, 0x81);
3946     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3947     emit_d32(cbuf, 0x80000000);
3948 
3949     // jne,s done
3950     emit_opcode(cbuf, 0x75);
3951     if (srcenc < 8 && dstenc < 8) {
3952       emit_d8(cbuf, 0xF);
3953     } else if (srcenc >= 8 && dstenc >= 8) {
3954       emit_d8(cbuf, 0x11);
3955     } else {
3956       emit_d8(cbuf, 0x10);
3957     }
3958 
3959     // subq rsp, #8
3960     emit_opcode(cbuf, Assembler::REX_W);
3961     emit_opcode(cbuf, 0x83);
3962     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3963     emit_d8(cbuf, 8);
3964 
3965     // movsd [rsp], $src
3966     emit_opcode(cbuf, 0xF2);
3967     if (srcenc >= 8) {
3968       emit_opcode(cbuf, Assembler::REX_R);
3969     }
3970     emit_opcode(cbuf, 0x0F);
3971     emit_opcode(cbuf, 0x11);
3972     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3973 
3974     // call d2i_fixup
3975     cbuf.set_inst_mark();
3976     emit_opcode(cbuf, 0xE8);
3977     emit_d32_reloc(cbuf,
3978                    (int)
3979                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
3980                    runtime_call_Relocation::spec(),
3981                    RELOC_DISP32);
3982 
3983     // popq $dst
3984     if (dstenc >= 8) {
3985       emit_opcode(cbuf, Assembler::REX_B);
3986     }
3987     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3988 
3989     // done:
3990   %}
3991 
3992   enc_class d2l_fixup(rRegL dst, regD src)
3993   %{
3994     int dstenc = $dst$$reg;
3995     int srcenc = $src$$reg;
3996     address const_address = (address) StubRoutines::x86::double_sign_flip();
3997 
3998     // cmpq $dst, [0x8000000000000000]
3999     cbuf.set_inst_mark();
4000     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4001     emit_opcode(cbuf, 0x39);
4002     // XXX reg_mem doesn't support RIP-relative addressing yet
4003     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4004     emit_d32_reloc(cbuf, const_address);
4005 
4006 
4007     // jne,s done
4008     emit_opcode(cbuf, 0x75);
4009     if (srcenc < 8 && dstenc < 8) {
4010       emit_d8(cbuf, 0xF);
4011     } else if (srcenc >= 8 && dstenc >= 8) {
4012       emit_d8(cbuf, 0x11);
4013     } else {
4014       emit_d8(cbuf, 0x10);
4015     }
4016 
4017     // subq rsp, #8
4018     emit_opcode(cbuf, Assembler::REX_W);
4019     emit_opcode(cbuf, 0x83);
4020     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4021     emit_d8(cbuf, 8);
4022 
4023     // movsd [rsp], $src
4024     emit_opcode(cbuf, 0xF2);
4025     if (srcenc >= 8) {
4026       emit_opcode(cbuf, Assembler::REX_R);
4027     }
4028     emit_opcode(cbuf, 0x0F);
4029     emit_opcode(cbuf, 0x11);
4030     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4031 
4032     // call d2l_fixup
4033     cbuf.set_inst_mark();
4034     emit_opcode(cbuf, 0xE8);
4035     emit_d32_reloc(cbuf,
4036                    (int)
4037                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4038                    runtime_call_Relocation::spec(),
4039                    RELOC_DISP32);
4040 
4041     // popq $dst
4042     if (dstenc >= 8) {
4043       emit_opcode(cbuf, Assembler::REX_B);
4044     }
4045     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4046 
4047     // done:
4048   %}
4049 
4050   // Safepoint Poll.  This polls the safepoint page, and causes an
4051   // exception if it is not readable. Unfortunately, it kills
4052   // RFLAGS in the process.
4053   enc_class enc_safepoint_poll
4054   %{
4055     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4056     // XXX reg_mem doesn't support RIP-relative addressing yet
4057     cbuf.set_inst_mark();
4058     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4059     emit_opcode(cbuf, 0x85); // testl
4060     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4061     // cbuf.inst_mark() is beginning of instruction
4062     emit_d32_reloc(cbuf, os::get_polling_page());
4063 //                    relocInfo::poll_type,
4064   %}
4065 %}
4066 
4067 
4068 
4069 //----------FRAME--------------------------------------------------------------
4070 // Definition of frame structure and management information.
4071 //
4072 //  S T A C K   L A Y O U T    Allocators stack-slot number
4073 //                             |   (to get allocators register number
4074 //  G  Owned by    |        |  v    add OptoReg::stack0())
4075 //  r   CALLER     |        |
4076 //  o     |        +--------+      pad to even-align allocators stack-slot
4077 //  w     V        |  pad0  |        numbers; owned by CALLER
4078 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4079 //  h     ^        |   in   |  5
4080 //        |        |  args  |  4   Holes in incoming args owned by SELF
4081 //  |     |        |        |  3
4082 //  |     |        +--------+
4083 //  V     |        | old out|      Empty on Intel, window on Sparc
4084 //        |    old |preserve|      Must be even aligned.
4085 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4086 //        |        |   in   |  3   area for Intel ret address
4087 //     Owned by    |preserve|      Empty on Sparc.
4088 //       SELF      +--------+
4089 //        |        |  pad2  |  2   pad to align old SP
4090 //        |        +--------+  1
4091 //        |        | locks  |  0
4092 //        |        +--------+----> OptoReg::stack0(), even aligned
4093 //        |        |  pad1  | 11   pad to align new SP
4094 //        |        +--------+
4095 //        |        |        | 10
4096 //        |        | spills |  9   spills
4097 //        V        |        |  8   (pad0 slot for callee)
4098 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4099 //        ^        |  out   |  7
4100 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4101 //     Owned by    +--------+
4102 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4103 //        |    new |preserve|      Must be even-aligned.
4104 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4105 //        |        |        |
4106 //
4107 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4108 //         known from SELF's arguments and the Java calling convention.
4109 //         Region 6-7 is determined per call site.
4110 // Note 2: If the calling convention leaves holes in the incoming argument
4111 //         area, those holes are owned by SELF.  Holes in the outgoing area
4112 //         are owned by the CALLEE.  Holes should not be nessecary in the
4113 //         incoming area, as the Java calling convention is completely under
4114 //         the control of the AD file.  Doubles can be sorted and packed to
4115 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4116 //         varargs C calling conventions.
4117 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4118 //         even aligned with pad0 as needed.
4119 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4120 //         region 6-11 is even aligned; it may be padded out more so that
4121 //         the region from SP to FP meets the minimum stack alignment.
4122 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4123 //         alignment.  Region 11, pad1, may be dynamically extended so that
4124 //         SP meets the minimum alignment.
4125 
4126 frame
4127 %{
4128   // What direction does stack grow in (assumed to be same for C & Java)
4129   stack_direction(TOWARDS_LOW);
4130 
4131   // These three registers define part of the calling convention
4132   // between compiled code and the interpreter.
4133   inline_cache_reg(RAX);                // Inline Cache Register
4134   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4135                                         // calling interpreter
4136 
4137   // Optional: name the operand used by cisc-spilling to access
4138   // [stack_pointer + offset]
4139   cisc_spilling_operand_name(indOffset32);
4140 
4141   // Number of stack slots consumed by locking an object
4142   sync_stack_slots(2);
4143 
4144   // Compiled code's Frame Pointer
4145   frame_pointer(RSP);
4146 
4147   // Interpreter stores its frame pointer in a register which is
4148   // stored to the stack by I2CAdaptors.
4149   // I2CAdaptors convert from interpreted java to compiled java.
4150   interpreter_frame_pointer(RBP);
4151 
4152   // Stack alignment requirement
4153   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4154 
4155   // Number of stack slots between incoming argument block and the start of
4156   // a new frame.  The PROLOG must add this many slots to the stack.  The
4157   // EPILOG must remove this many slots.  amd64 needs two slots for
4158   // return address.
4159   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4160 
4161   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4162   // for calls to C.  Supports the var-args backing area for register parms.
4163   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4164 
4165   // The after-PROLOG location of the return address.  Location of
4166   // return address specifies a type (REG or STACK) and a number
4167   // representing the register number (i.e. - use a register name) or
4168   // stack slot.
4169   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4170   // Otherwise, it is above the locks and verification slot and alignment word
4171   return_addr(STACK - 2 +
4172               round_to(2 + 2 * VerifyStackAtCalls +
4173                        Compile::current()->fixed_slots(),
4174                        WordsPerLong * 2));
4175 
4176   // Body of function which returns an integer array locating
4177   // arguments either in registers or in stack slots.  Passed an array
4178   // of ideal registers called "sig" and a "length" count.  Stack-slot
4179   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4180   // arguments for a CALLEE.  Incoming stack arguments are
4181   // automatically biased by the preserve_stack_slots field above.
4182 
4183   calling_convention
4184   %{
4185     // No difference between ingoing/outgoing just pass false
4186     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4187   %}
4188 
4189   c_calling_convention
4190   %{
4191     // This is obviously always outgoing
4192     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4193   %}
4194 
4195   // Location of compiled Java return values.  Same as C for now.
4196   return_value
4197   %{
4198     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4199            "only return normal values");
4200 
4201     static const int lo[Op_RegL + 1] = {
4202       0,
4203       0,
4204       RAX_num,  // Op_RegN
4205       RAX_num,  // Op_RegI
4206       RAX_num,  // Op_RegP
4207       XMM0_num, // Op_RegF
4208       XMM0_num, // Op_RegD
4209       RAX_num   // Op_RegL
4210     };
4211     static const int hi[Op_RegL + 1] = {
4212       0,
4213       0,
4214       OptoReg::Bad, // Op_RegN
4215       OptoReg::Bad, // Op_RegI
4216       RAX_H_num,    // Op_RegP
4217       OptoReg::Bad, // Op_RegF
4218       XMM0_H_num,   // Op_RegD
4219       RAX_H_num     // Op_RegL
4220     };
4221     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4222     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4223   %}
4224 %}
4225 
4226 //----------ATTRIBUTES---------------------------------------------------------
4227 //----------Operand Attributes-------------------------------------------------
4228 op_attrib op_cost(0);        // Required cost attribute
4229 
4230 //----------Instruction Attributes---------------------------------------------
4231 ins_attrib ins_cost(100);       // Required cost attribute
4232 ins_attrib ins_size(8);         // Required size attribute (in bits)
4233 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4234 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4235                                 // a non-matching short branch variant
4236                                 // of some long branch?
4237 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4238                                 // be a power of 2) specifies the
4239                                 // alignment that some part of the
4240                                 // instruction (not necessarily the
4241                                 // start) requires.  If > 1, a
4242                                 // compute_padding() function must be
4243                                 // provided for the instruction
4244 
4245 //----------OPERANDS-----------------------------------------------------------
4246 // Operand definitions must precede instruction definitions for correct parsing
4247 // in the ADLC because operands constitute user defined types which are used in
4248 // instruction definitions.
4249 
4250 //----------Simple Operands----------------------------------------------------
4251 // Immediate Operands
4252 // Integer Immediate
4253 operand immI()
4254 %{
4255   match(ConI);
4256 
4257   op_cost(10);
4258   format %{ %}
4259   interface(CONST_INTER);
4260 %}
4261 
4262 // Constant for test vs zero
4263 operand immI0()
4264 %{
4265   predicate(n->get_int() == 0);
4266   match(ConI);
4267 
4268   op_cost(0);
4269   format %{ %}
4270   interface(CONST_INTER);
4271 %}
4272 
4273 // Constant for increment
4274 operand immI1()
4275 %{
4276   predicate(n->get_int() == 1);
4277   match(ConI);
4278 
4279   op_cost(0);
4280   format %{ %}
4281   interface(CONST_INTER);
4282 %}
4283 
4284 // Constant for decrement
4285 operand immI_M1()
4286 %{
4287   predicate(n->get_int() == -1);
4288   match(ConI);
4289 
4290   op_cost(0);
4291   format %{ %}
4292   interface(CONST_INTER);
4293 %}
4294 
4295 // Valid scale values for addressing modes
4296 operand immI2()
4297 %{
4298   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4299   match(ConI);
4300 
4301   format %{ %}
4302   interface(CONST_INTER);
4303 %}
4304 
4305 operand immI8()
4306 %{
4307   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4308   match(ConI);
4309 
4310   op_cost(5);
4311   format %{ %}
4312   interface(CONST_INTER);
4313 %}
4314 
4315 operand immI16()
4316 %{
4317   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4318   match(ConI);
4319 
4320   op_cost(10);
4321   format %{ %}
4322   interface(CONST_INTER);
4323 %}
4324 
4325 // Constant for long shifts
4326 operand immI_32()
4327 %{
4328   predicate( n->get_int() == 32 );
4329   match(ConI);
4330 
4331   op_cost(0);
4332   format %{ %}
4333   interface(CONST_INTER);
4334 %}
4335 
4336 // Constant for long shifts
4337 operand immI_64()
4338 %{
4339   predicate( n->get_int() == 64 );
4340   match(ConI);
4341 
4342   op_cost(0);
4343   format %{ %}
4344   interface(CONST_INTER);
4345 %}
4346 
4347 // Pointer Immediate
4348 operand immP()
4349 %{
4350   match(ConP);
4351 
4352   op_cost(10);
4353   format %{ %}
4354   interface(CONST_INTER);
4355 %}
4356 
4357 // NULL Pointer Immediate
4358 operand immP0()
4359 %{
4360   predicate(n->get_ptr() == 0);
4361   match(ConP);
4362 
4363   op_cost(5);
4364   format %{ %}
4365   interface(CONST_INTER);
4366 %}
4367 
4368 // Pointer Immediate
4369 operand immN() %{
4370   match(ConN);
4371 
4372   op_cost(10);
4373   format %{ %}
4374   interface(CONST_INTER);
4375 %}
4376 
4377 // NULL Pointer Immediate
4378 operand immN0() %{
4379   predicate(n->get_narrowcon() == 0);
4380   match(ConN);
4381 
4382   op_cost(5);
4383   format %{ %}
4384   interface(CONST_INTER);
4385 %}
4386 
4387 operand immP31()
4388 %{
4389   predicate(!n->as_Type()->type()->isa_oopptr()
4390             && (n->get_ptr() >> 31) == 0);
4391   match(ConP);
4392 
4393   op_cost(5);
4394   format %{ %}
4395   interface(CONST_INTER);
4396 %}
4397 
4398 
4399 // Long Immediate
4400 operand immL()
4401 %{
4402   match(ConL);
4403 
4404   op_cost(20);
4405   format %{ %}
4406   interface(CONST_INTER);
4407 %}
4408 
4409 // Long Immediate 8-bit
4410 operand immL8()
4411 %{
4412   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4413   match(ConL);
4414 
4415   op_cost(5);
4416   format %{ %}
4417   interface(CONST_INTER);
4418 %}
4419 
4420 // Long Immediate 32-bit unsigned
4421 operand immUL32()
4422 %{
4423   predicate(n->get_long() == (unsigned int) (n->get_long()));
4424   match(ConL);
4425 
4426   op_cost(10);
4427   format %{ %}
4428   interface(CONST_INTER);
4429 %}
4430 
4431 // Long Immediate 32-bit signed
4432 operand immL32()
4433 %{
4434   predicate(n->get_long() == (int) (n->get_long()));
4435   match(ConL);
4436 
4437   op_cost(15);
4438   format %{ %}
4439   interface(CONST_INTER);
4440 %}
4441 
4442 // Long Immediate zero
4443 operand immL0()
4444 %{
4445   predicate(n->get_long() == 0L);
4446   match(ConL);
4447 
4448   op_cost(10);
4449   format %{ %}
4450   interface(CONST_INTER);
4451 %}
4452 
4453 // Constant for increment
4454 operand immL1()
4455 %{
4456   predicate(n->get_long() == 1);
4457   match(ConL);
4458 
4459   format %{ %}
4460   interface(CONST_INTER);
4461 %}
4462 
4463 // Constant for decrement
4464 operand immL_M1()
4465 %{
4466   predicate(n->get_long() == -1);
4467   match(ConL);
4468 
4469   format %{ %}
4470   interface(CONST_INTER);
4471 %}
4472 
4473 // Long Immediate: the value 10
4474 operand immL10()
4475 %{
4476   predicate(n->get_long() == 10);
4477   match(ConL);
4478 
4479   format %{ %}
4480   interface(CONST_INTER);
4481 %}
4482 
4483 // Long immediate from 0 to 127.
4484 // Used for a shorter form of long mul by 10.
4485 operand immL_127()
4486 %{
4487   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4488   match(ConL);
4489 
4490   op_cost(10);
4491   format %{ %}
4492   interface(CONST_INTER);
4493 %}
4494 
4495 // Long Immediate: low 32-bit mask
4496 operand immL_32bits()
4497 %{
4498   predicate(n->get_long() == 0xFFFFFFFFL);
4499   match(ConL);
4500   op_cost(20);
4501 
4502   format %{ %}
4503   interface(CONST_INTER);
4504 %}
4505 
4506 // Float Immediate zero
4507 operand immF0()
4508 %{
4509   predicate(jint_cast(n->getf()) == 0);
4510   match(ConF);
4511 
4512   op_cost(5);
4513   format %{ %}
4514   interface(CONST_INTER);
4515 %}
4516 
4517 // Float Immediate
4518 operand immF()
4519 %{
4520   match(ConF);
4521 
4522   op_cost(15);
4523   format %{ %}
4524   interface(CONST_INTER);
4525 %}
4526 
4527 // Double Immediate zero
4528 operand immD0()
4529 %{
4530   predicate(jlong_cast(n->getd()) == 0);
4531   match(ConD);
4532 
4533   op_cost(5);
4534   format %{ %}
4535   interface(CONST_INTER);
4536 %}
4537 
4538 // Double Immediate
4539 operand immD()
4540 %{
4541   match(ConD);
4542 
4543   op_cost(15);
4544   format %{ %}
4545   interface(CONST_INTER);
4546 %}
4547 
4548 // Immediates for special shifts (sign extend)
4549 
4550 // Constants for increment
4551 operand immI_16()
4552 %{
4553   predicate(n->get_int() == 16);
4554   match(ConI);
4555 
4556   format %{ %}
4557   interface(CONST_INTER);
4558 %}
4559 
4560 operand immI_24()
4561 %{
4562   predicate(n->get_int() == 24);
4563   match(ConI);
4564 
4565   format %{ %}
4566   interface(CONST_INTER);
4567 %}
4568 
4569 // Constant for byte-wide masking
4570 operand immI_255()
4571 %{
4572   predicate(n->get_int() == 255);
4573   match(ConI);
4574 
4575   format %{ %}
4576   interface(CONST_INTER);
4577 %}
4578 
4579 // Constant for short-wide masking
4580 operand immI_65535()
4581 %{
4582   predicate(n->get_int() == 65535);
4583   match(ConI);
4584 
4585   format %{ %}
4586   interface(CONST_INTER);
4587 %}
4588 
4589 // Constant for byte-wide masking
4590 operand immL_255()
4591 %{
4592   predicate(n->get_long() == 255);
4593   match(ConL);
4594 
4595   format %{ %}
4596   interface(CONST_INTER);
4597 %}
4598 
4599 // Constant for short-wide masking
4600 operand immL_65535()
4601 %{
4602   predicate(n->get_long() == 65535);
4603   match(ConL);
4604 
4605   format %{ %}
4606   interface(CONST_INTER);
4607 %}
4608 
4609 // Register Operands
4610 // Integer Register
4611 operand rRegI()
4612 %{
4613   constraint(ALLOC_IN_RC(int_reg));
4614   match(RegI);
4615 
4616   match(rax_RegI);
4617   match(rbx_RegI);
4618   match(rcx_RegI);
4619   match(rdx_RegI);
4620   match(rdi_RegI);
4621 
4622   format %{ %}
4623   interface(REG_INTER);
4624 %}
4625 
4626 // Special Registers
4627 operand rax_RegI()
4628 %{
4629   constraint(ALLOC_IN_RC(int_rax_reg));
4630   match(RegI);
4631   match(rRegI);
4632 
4633   format %{ "RAX" %}
4634   interface(REG_INTER);
4635 %}
4636 
4637 // Special Registers
4638 operand rbx_RegI()
4639 %{
4640   constraint(ALLOC_IN_RC(int_rbx_reg));
4641   match(RegI);
4642   match(rRegI);
4643 
4644   format %{ "RBX" %}
4645   interface(REG_INTER);
4646 %}
4647 
4648 operand rcx_RegI()
4649 %{
4650   constraint(ALLOC_IN_RC(int_rcx_reg));
4651   match(RegI);
4652   match(rRegI);
4653 
4654   format %{ "RCX" %}
4655   interface(REG_INTER);
4656 %}
4657 
4658 operand rdx_RegI()
4659 %{
4660   constraint(ALLOC_IN_RC(int_rdx_reg));
4661   match(RegI);
4662   match(rRegI);
4663 
4664   format %{ "RDX" %}
4665   interface(REG_INTER);
4666 %}
4667 
4668 operand rdi_RegI()
4669 %{
4670   constraint(ALLOC_IN_RC(int_rdi_reg));
4671   match(RegI);
4672   match(rRegI);
4673 
4674   format %{ "RDI" %}
4675   interface(REG_INTER);
4676 %}
4677 
4678 operand no_rcx_RegI()
4679 %{
4680   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4681   match(RegI);
4682   match(rax_RegI);
4683   match(rbx_RegI);
4684   match(rdx_RegI);
4685   match(rdi_RegI);
4686 
4687   format %{ %}
4688   interface(REG_INTER);
4689 %}
4690 
4691 operand no_rax_rdx_RegI()
4692 %{
4693   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4694   match(RegI);
4695   match(rbx_RegI);
4696   match(rcx_RegI);
4697   match(rdi_RegI);
4698 
4699   format %{ %}
4700   interface(REG_INTER);
4701 %}
4702 
4703 // Pointer Register
4704 operand any_RegP()
4705 %{
4706   constraint(ALLOC_IN_RC(any_reg));
4707   match(RegP);
4708   match(rax_RegP);
4709   match(rbx_RegP);
4710   match(rdi_RegP);
4711   match(rsi_RegP);
4712   match(rbp_RegP);
4713   match(r15_RegP);
4714   match(rRegP);
4715 
4716   format %{ %}
4717   interface(REG_INTER);
4718 %}
4719 
4720 operand rRegP()
4721 %{
4722   constraint(ALLOC_IN_RC(ptr_reg));
4723   match(RegP);
4724   match(rax_RegP);
4725   match(rbx_RegP);
4726   match(rdi_RegP);
4727   match(rsi_RegP);
4728   match(rbp_RegP);
4729   match(r15_RegP);  // See Q&A below about r15_RegP.
4730 
4731   format %{ %}
4732   interface(REG_INTER);
4733 %}
4734 
4735 operand rRegN() %{
4736   constraint(ALLOC_IN_RC(int_reg));
4737   match(RegN);
4738 
4739   format %{ %}
4740   interface(REG_INTER);
4741 %}
4742 
4743 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4744 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4745 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4746 // The output of an instruction is controlled by the allocator, which respects
4747 // register class masks, not match rules.  Unless an instruction mentions
4748 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4749 // by the allocator as an input.
4750 
4751 operand no_rax_RegP()
4752 %{
4753   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4754   match(RegP);
4755   match(rbx_RegP);
4756   match(rsi_RegP);
4757   match(rdi_RegP);
4758 
4759   format %{ %}
4760   interface(REG_INTER);
4761 %}
4762 
4763 operand no_rbp_RegP()
4764 %{
4765   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4766   match(RegP);
4767   match(rbx_RegP);
4768   match(rsi_RegP);
4769   match(rdi_RegP);
4770 
4771   format %{ %}
4772   interface(REG_INTER);
4773 %}
4774 
4775 operand no_rax_rbx_RegP()
4776 %{
4777   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4778   match(RegP);
4779   match(rsi_RegP);
4780   match(rdi_RegP);
4781 
4782   format %{ %}
4783   interface(REG_INTER);
4784 %}
4785 
4786 // Special Registers
4787 // Return a pointer value
4788 operand rax_RegP()
4789 %{
4790   constraint(ALLOC_IN_RC(ptr_rax_reg));
4791   match(RegP);
4792   match(rRegP);
4793 
4794   format %{ %}
4795   interface(REG_INTER);
4796 %}
4797 
4798 // Special Registers
4799 // Return a compressed pointer value
4800 operand rax_RegN()
4801 %{
4802   constraint(ALLOC_IN_RC(int_rax_reg));
4803   match(RegN);
4804   match(rRegN);
4805 
4806   format %{ %}
4807   interface(REG_INTER);
4808 %}
4809 
4810 // Used in AtomicAdd
4811 operand rbx_RegP()
4812 %{
4813   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4814   match(RegP);
4815   match(rRegP);
4816 
4817   format %{ %}
4818   interface(REG_INTER);
4819 %}
4820 
4821 operand rsi_RegP()
4822 %{
4823   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4824   match(RegP);
4825   match(rRegP);
4826 
4827   format %{ %}
4828   interface(REG_INTER);
4829 %}
4830 
4831 // Used in rep stosq
4832 operand rdi_RegP()
4833 %{
4834   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4835   match(RegP);
4836   match(rRegP);
4837 
4838   format %{ %}
4839   interface(REG_INTER);
4840 %}
4841 
4842 operand rbp_RegP()
4843 %{
4844   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4845   match(RegP);
4846   match(rRegP);
4847 
4848   format %{ %}
4849   interface(REG_INTER);
4850 %}
4851 
4852 operand r15_RegP()
4853 %{
4854   constraint(ALLOC_IN_RC(ptr_r15_reg));
4855   match(RegP);
4856   match(rRegP);
4857 
4858   format %{ %}
4859   interface(REG_INTER);
4860 %}
4861 
4862 operand rRegL()
4863 %{
4864   constraint(ALLOC_IN_RC(long_reg));
4865   match(RegL);
4866   match(rax_RegL);
4867   match(rdx_RegL);
4868 
4869   format %{ %}
4870   interface(REG_INTER);
4871 %}
4872 
4873 // Special Registers
4874 operand no_rax_rdx_RegL()
4875 %{
4876   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4877   match(RegL);
4878   match(rRegL);
4879 
4880   format %{ %}
4881   interface(REG_INTER);
4882 %}
4883 
4884 operand no_rax_RegL()
4885 %{
4886   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4887   match(RegL);
4888   match(rRegL);
4889   match(rdx_RegL);
4890 
4891   format %{ %}
4892   interface(REG_INTER);
4893 %}
4894 
4895 operand no_rcx_RegL()
4896 %{
4897   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4898   match(RegL);
4899   match(rRegL);
4900 
4901   format %{ %}
4902   interface(REG_INTER);
4903 %}
4904 
4905 operand rax_RegL()
4906 %{
4907   constraint(ALLOC_IN_RC(long_rax_reg));
4908   match(RegL);
4909   match(rRegL);
4910 
4911   format %{ "RAX" %}
4912   interface(REG_INTER);
4913 %}
4914 
4915 operand rcx_RegL()
4916 %{
4917   constraint(ALLOC_IN_RC(long_rcx_reg));
4918   match(RegL);
4919   match(rRegL);
4920 
4921   format %{ %}
4922   interface(REG_INTER);
4923 %}
4924 
4925 operand rdx_RegL()
4926 %{
4927   constraint(ALLOC_IN_RC(long_rdx_reg));
4928   match(RegL);
4929   match(rRegL);
4930 
4931   format %{ %}
4932   interface(REG_INTER);
4933 %}
4934 
4935 // Flags register, used as output of compare instructions
4936 operand rFlagsReg()
4937 %{
4938   constraint(ALLOC_IN_RC(int_flags));
4939   match(RegFlags);
4940 
4941   format %{ "RFLAGS" %}
4942   interface(REG_INTER);
4943 %}
4944 
4945 // Flags register, used as output of FLOATING POINT compare instructions
4946 operand rFlagsRegU()
4947 %{
4948   constraint(ALLOC_IN_RC(int_flags));
4949   match(RegFlags);
4950 
4951   format %{ "RFLAGS_U" %}
4952   interface(REG_INTER);
4953 %}
4954 
4955 operand rFlagsRegUCF() %{
4956   constraint(ALLOC_IN_RC(int_flags));
4957   match(RegFlags);
4958   predicate(false);
4959 
4960   format %{ "RFLAGS_U_CF" %}
4961   interface(REG_INTER);
4962 %}
4963 
4964 // Float register operands
4965 operand regF()
4966 %{
4967   constraint(ALLOC_IN_RC(float_reg));
4968   match(RegF);
4969 
4970   format %{ %}
4971   interface(REG_INTER);
4972 %}
4973 
4974 // Double register operands
4975 operand regD() 
4976 %{
4977   constraint(ALLOC_IN_RC(double_reg));
4978   match(RegD);
4979 
4980   format %{ %}
4981   interface(REG_INTER);
4982 %}
4983 
4984 
4985 //----------Memory Operands----------------------------------------------------
4986 // Direct Memory Operand
4987 // operand direct(immP addr)
4988 // %{
4989 //   match(addr);
4990 
4991 //   format %{ "[$addr]" %}
4992 //   interface(MEMORY_INTER) %{
4993 //     base(0xFFFFFFFF);
4994 //     index(0x4);
4995 //     scale(0x0);
4996 //     disp($addr);
4997 //   %}
4998 // %}
4999 
5000 // Indirect Memory Operand
5001 operand indirect(any_RegP reg)
5002 %{
5003   constraint(ALLOC_IN_RC(ptr_reg));
5004   match(reg);
5005 
5006   format %{ "[$reg]" %}
5007   interface(MEMORY_INTER) %{
5008     base($reg);
5009     index(0x4);
5010     scale(0x0);
5011     disp(0x0);
5012   %}
5013 %}
5014 
5015 // Indirect Memory Plus Short Offset Operand
5016 operand indOffset8(any_RegP reg, immL8 off)
5017 %{
5018   constraint(ALLOC_IN_RC(ptr_reg));
5019   match(AddP reg off);
5020 
5021   format %{ "[$reg + $off (8-bit)]" %}
5022   interface(MEMORY_INTER) %{
5023     base($reg);
5024     index(0x4);
5025     scale(0x0);
5026     disp($off);
5027   %}
5028 %}
5029 
5030 // Indirect Memory Plus Long Offset Operand
5031 operand indOffset32(any_RegP reg, immL32 off)
5032 %{
5033   constraint(ALLOC_IN_RC(ptr_reg));
5034   match(AddP reg off);
5035 
5036   format %{ "[$reg + $off (32-bit)]" %}
5037   interface(MEMORY_INTER) %{
5038     base($reg);
5039     index(0x4);
5040     scale(0x0);
5041     disp($off);
5042   %}
5043 %}
5044 
5045 // Indirect Memory Plus Index Register Plus Offset Operand
5046 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5047 %{
5048   constraint(ALLOC_IN_RC(ptr_reg));
5049   match(AddP (AddP reg lreg) off);
5050 
5051   op_cost(10);
5052   format %{"[$reg + $off + $lreg]" %}
5053   interface(MEMORY_INTER) %{
5054     base($reg);
5055     index($lreg);
5056     scale(0x0);
5057     disp($off);
5058   %}
5059 %}
5060 
5061 // Indirect Memory Plus Index Register Plus Offset Operand
5062 operand indIndex(any_RegP reg, rRegL lreg)
5063 %{
5064   constraint(ALLOC_IN_RC(ptr_reg));
5065   match(AddP reg lreg);
5066 
5067   op_cost(10);
5068   format %{"[$reg + $lreg]" %}
5069   interface(MEMORY_INTER) %{
5070     base($reg);
5071     index($lreg);
5072     scale(0x0);
5073     disp(0x0);
5074   %}
5075 %}
5076 
5077 // Indirect Memory Times Scale Plus Index Register
5078 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5079 %{
5080   constraint(ALLOC_IN_RC(ptr_reg));
5081   match(AddP reg (LShiftL lreg scale));
5082 
5083   op_cost(10);
5084   format %{"[$reg + $lreg << $scale]" %}
5085   interface(MEMORY_INTER) %{
5086     base($reg);
5087     index($lreg);
5088     scale($scale);
5089     disp(0x0);
5090   %}
5091 %}
5092 
5093 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5094 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5095 %{
5096   constraint(ALLOC_IN_RC(ptr_reg));
5097   match(AddP (AddP reg (LShiftL lreg scale)) off);
5098 
5099   op_cost(10);
5100   format %{"[$reg + $off + $lreg << $scale]" %}
5101   interface(MEMORY_INTER) %{
5102     base($reg);
5103     index($lreg);
5104     scale($scale);
5105     disp($off);
5106   %}
5107 %}
5108 
5109 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5110 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5111 %{
5112   constraint(ALLOC_IN_RC(ptr_reg));
5113   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5114   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5115 
5116   op_cost(10);
5117   format %{"[$reg + $off + $idx << $scale]" %}
5118   interface(MEMORY_INTER) %{
5119     base($reg);
5120     index($idx);
5121     scale($scale);
5122     disp($off);
5123   %}
5124 %}
5125 
5126 // Indirect Narrow Oop Plus Offset Operand
5127 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5128 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5129 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5130   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5131   constraint(ALLOC_IN_RC(ptr_reg));
5132   match(AddP (DecodeN reg) off);
5133 
5134   op_cost(10);
5135   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5136   interface(MEMORY_INTER) %{
5137     base(0xc); // R12
5138     index($reg);
5139     scale(0x3);
5140     disp($off);
5141   %}
5142 %}
5143 
5144 // Indirect Memory Operand
5145 operand indirectNarrow(rRegN reg)
5146 %{
5147   predicate(Universe::narrow_oop_shift() == 0);
5148   constraint(ALLOC_IN_RC(ptr_reg));
5149   match(DecodeN reg);
5150 
5151   format %{ "[$reg]" %}
5152   interface(MEMORY_INTER) %{
5153     base($reg);
5154     index(0x4);
5155     scale(0x0);
5156     disp(0x0);
5157   %}
5158 %}
5159 
5160 // Indirect Memory Plus Short Offset Operand
5161 operand indOffset8Narrow(rRegN reg, immL8 off)
5162 %{
5163   predicate(Universe::narrow_oop_shift() == 0);
5164   constraint(ALLOC_IN_RC(ptr_reg));
5165   match(AddP (DecodeN reg) off);
5166 
5167   format %{ "[$reg + $off (8-bit)]" %}
5168   interface(MEMORY_INTER) %{
5169     base($reg);
5170     index(0x4);
5171     scale(0x0);
5172     disp($off);
5173   %}
5174 %}
5175 
5176 // Indirect Memory Plus Long Offset Operand
5177 operand indOffset32Narrow(rRegN reg, immL32 off)
5178 %{
5179   predicate(Universe::narrow_oop_shift() == 0);
5180   constraint(ALLOC_IN_RC(ptr_reg));
5181   match(AddP (DecodeN reg) off);
5182 
5183   format %{ "[$reg + $off (32-bit)]" %}
5184   interface(MEMORY_INTER) %{
5185     base($reg);
5186     index(0x4);
5187     scale(0x0);
5188     disp($off);
5189   %}
5190 %}
5191 
5192 // Indirect Memory Plus Index Register Plus Offset Operand
5193 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5194 %{
5195   predicate(Universe::narrow_oop_shift() == 0);
5196   constraint(ALLOC_IN_RC(ptr_reg));
5197   match(AddP (AddP (DecodeN reg) lreg) off);
5198 
5199   op_cost(10);
5200   format %{"[$reg + $off + $lreg]" %}
5201   interface(MEMORY_INTER) %{
5202     base($reg);
5203     index($lreg);
5204     scale(0x0);
5205     disp($off);
5206   %}
5207 %}
5208 
5209 // Indirect Memory Plus Index Register Plus Offset Operand
5210 operand indIndexNarrow(rRegN reg, rRegL lreg)
5211 %{
5212   predicate(Universe::narrow_oop_shift() == 0);
5213   constraint(ALLOC_IN_RC(ptr_reg));
5214   match(AddP (DecodeN reg) lreg);
5215 
5216   op_cost(10);
5217   format %{"[$reg + $lreg]" %}
5218   interface(MEMORY_INTER) %{
5219     base($reg);
5220     index($lreg);
5221     scale(0x0);
5222     disp(0x0);
5223   %}
5224 %}
5225 
5226 // Indirect Memory Times Scale Plus Index Register
5227 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5228 %{
5229   predicate(Universe::narrow_oop_shift() == 0);
5230   constraint(ALLOC_IN_RC(ptr_reg));
5231   match(AddP (DecodeN reg) (LShiftL lreg scale));
5232 
5233   op_cost(10);
5234   format %{"[$reg + $lreg << $scale]" %}
5235   interface(MEMORY_INTER) %{
5236     base($reg);
5237     index($lreg);
5238     scale($scale);
5239     disp(0x0);
5240   %}
5241 %}
5242 
5243 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5244 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5245 %{
5246   predicate(Universe::narrow_oop_shift() == 0);
5247   constraint(ALLOC_IN_RC(ptr_reg));
5248   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5249 
5250   op_cost(10);
5251   format %{"[$reg + $off + $lreg << $scale]" %}
5252   interface(MEMORY_INTER) %{
5253     base($reg);
5254     index($lreg);
5255     scale($scale);
5256     disp($off);
5257   %}
5258 %}
5259 
5260 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5261 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5262 %{
5263   constraint(ALLOC_IN_RC(ptr_reg));
5264   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5265   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5266 
5267   op_cost(10);
5268   format %{"[$reg + $off + $idx << $scale]" %}
5269   interface(MEMORY_INTER) %{
5270     base($reg);
5271     index($idx);
5272     scale($scale);
5273     disp($off);
5274   %}
5275 %}
5276 
5277 
5278 //----------Special Memory Operands--------------------------------------------
5279 // Stack Slot Operand - This operand is used for loading and storing temporary
5280 //                      values on the stack where a match requires a value to
5281 //                      flow through memory.
5282 operand stackSlotP(sRegP reg)
5283 %{
5284   constraint(ALLOC_IN_RC(stack_slots));
5285   // No match rule because this operand is only generated in matching
5286 
5287   format %{ "[$reg]" %}
5288   interface(MEMORY_INTER) %{
5289     base(0x4);   // RSP
5290     index(0x4);  // No Index
5291     scale(0x0);  // No Scale
5292     disp($reg);  // Stack Offset
5293   %}
5294 %}
5295 
5296 operand stackSlotI(sRegI reg)
5297 %{
5298   constraint(ALLOC_IN_RC(stack_slots));
5299   // No match rule because this operand is only generated in matching
5300 
5301   format %{ "[$reg]" %}
5302   interface(MEMORY_INTER) %{
5303     base(0x4);   // RSP
5304     index(0x4);  // No Index
5305     scale(0x0);  // No Scale
5306     disp($reg);  // Stack Offset
5307   %}
5308 %}
5309 
5310 operand stackSlotF(sRegF reg)
5311 %{
5312   constraint(ALLOC_IN_RC(stack_slots));
5313   // No match rule because this operand is only generated in matching
5314 
5315   format %{ "[$reg]" %}
5316   interface(MEMORY_INTER) %{
5317     base(0x4);   // RSP
5318     index(0x4);  // No Index
5319     scale(0x0);  // No Scale
5320     disp($reg);  // Stack Offset
5321   %}
5322 %}
5323 
5324 operand stackSlotD(sRegD reg)
5325 %{
5326   constraint(ALLOC_IN_RC(stack_slots));
5327   // No match rule because this operand is only generated in matching
5328 
5329   format %{ "[$reg]" %}
5330   interface(MEMORY_INTER) %{
5331     base(0x4);   // RSP
5332     index(0x4);  // No Index
5333     scale(0x0);  // No Scale
5334     disp($reg);  // Stack Offset
5335   %}
5336 %}
5337 operand stackSlotL(sRegL reg)
5338 %{
5339   constraint(ALLOC_IN_RC(stack_slots));
5340   // No match rule because this operand is only generated in matching
5341 
5342   format %{ "[$reg]" %}
5343   interface(MEMORY_INTER) %{
5344     base(0x4);   // RSP
5345     index(0x4);  // No Index
5346     scale(0x0);  // No Scale
5347     disp($reg);  // Stack Offset
5348   %}
5349 %}
5350 
5351 //----------Conditional Branch Operands----------------------------------------
5352 // Comparison Op  - This is the operation of the comparison, and is limited to
5353 //                  the following set of codes:
5354 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5355 //
5356 // Other attributes of the comparison, such as unsignedness, are specified
5357 // by the comparison instruction that sets a condition code flags register.
5358 // That result is represented by a flags operand whose subtype is appropriate
5359 // to the unsignedness (etc.) of the comparison.
5360 //
5361 // Later, the instruction which matches both the Comparison Op (a Bool) and
5362 // the flags (produced by the Cmp) specifies the coding of the comparison op
5363 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5364 
5365 // Comparision Code
5366 operand cmpOp()
5367 %{
5368   match(Bool);
5369 
5370   format %{ "" %}
5371   interface(COND_INTER) %{
5372     equal(0x4, "e");
5373     not_equal(0x5, "ne");
5374     less(0xC, "l");
5375     greater_equal(0xD, "ge");
5376     less_equal(0xE, "le");
5377     greater(0xF, "g");
5378   %}
5379 %}
5380 
5381 // Comparison Code, unsigned compare.  Used by FP also, with
5382 // C2 (unordered) turned into GT or LT already.  The other bits
5383 // C0 and C3 are turned into Carry & Zero flags.
5384 operand cmpOpU()
5385 %{
5386   match(Bool);
5387 
5388   format %{ "" %}
5389   interface(COND_INTER) %{
5390     equal(0x4, "e");
5391     not_equal(0x5, "ne");
5392     less(0x2, "b");
5393     greater_equal(0x3, "nb");
5394     less_equal(0x6, "be");
5395     greater(0x7, "nbe");
5396   %}
5397 %}
5398 
5399 
5400 // Floating comparisons that don't require any fixup for the unordered case
5401 operand cmpOpUCF() %{
5402   match(Bool);
5403   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5404             n->as_Bool()->_test._test == BoolTest::ge ||
5405             n->as_Bool()->_test._test == BoolTest::le ||
5406             n->as_Bool()->_test._test == BoolTest::gt);
5407   format %{ "" %}
5408   interface(COND_INTER) %{
5409     equal(0x4, "e");
5410     not_equal(0x5, "ne");
5411     less(0x2, "b");
5412     greater_equal(0x3, "nb");
5413     less_equal(0x6, "be");
5414     greater(0x7, "nbe");
5415   %}
5416 %}
5417 
5418 
5419 // Floating comparisons that can be fixed up with extra conditional jumps
5420 operand cmpOpUCF2() %{
5421   match(Bool);
5422   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5423             n->as_Bool()->_test._test == BoolTest::eq);
5424   format %{ "" %}
5425   interface(COND_INTER) %{
5426     equal(0x4, "e");
5427     not_equal(0x5, "ne");
5428     less(0x2, "b");
5429     greater_equal(0x3, "nb");
5430     less_equal(0x6, "be");
5431     greater(0x7, "nbe");
5432   %}
5433 %}
5434 
5435 
5436 //----------OPERAND CLASSES----------------------------------------------------
5437 // Operand Classes are groups of operands that are used as to simplify
5438 // instruction definitions by not requiring the AD writer to specify separate
5439 // instructions for every form of operand when the instruction accepts
5440 // multiple operand types with the same basic encoding and format.  The classic
5441 // case of this is memory operands.
5442 
5443 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5444                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5445                indCompressedOopOffset,
5446                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5447                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5448                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5449 
5450 //----------PIPELINE-----------------------------------------------------------
5451 // Rules which define the behavior of the target architectures pipeline.
5452 pipeline %{
5453 
5454 //----------ATTRIBUTES---------------------------------------------------------
5455 attributes %{
5456   variable_size_instructions;        // Fixed size instructions
5457   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5458   instruction_unit_size = 1;         // An instruction is 1 bytes long
5459   instruction_fetch_unit_size = 16;  // The processor fetches one line
5460   instruction_fetch_units = 1;       // of 16 bytes
5461 
5462   // List of nop instructions
5463   nops( MachNop );
5464 %}
5465 
5466 //----------RESOURCES----------------------------------------------------------
5467 // Resources are the functional units available to the machine
5468 
5469 // Generic P2/P3 pipeline
5470 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5471 // 3 instructions decoded per cycle.
5472 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5473 // 3 ALU op, only ALU0 handles mul instructions.
5474 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5475            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5476            BR, FPU,
5477            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5478 
5479 //----------PIPELINE DESCRIPTION-----------------------------------------------
5480 // Pipeline Description specifies the stages in the machine's pipeline
5481 
5482 // Generic P2/P3 pipeline
5483 pipe_desc(S0, S1, S2, S3, S4, S5);
5484 
5485 //----------PIPELINE CLASSES---------------------------------------------------
5486 // Pipeline Classes describe the stages in which input and output are
5487 // referenced by the hardware pipeline.
5488 
5489 // Naming convention: ialu or fpu
5490 // Then: _reg
5491 // Then: _reg if there is a 2nd register
5492 // Then: _long if it's a pair of instructions implementing a long
5493 // Then: _fat if it requires the big decoder
5494 //   Or: _mem if it requires the big decoder and a memory unit.
5495 
5496 // Integer ALU reg operation
5497 pipe_class ialu_reg(rRegI dst)
5498 %{
5499     single_instruction;
5500     dst    : S4(write);
5501     dst    : S3(read);
5502     DECODE : S0;        // any decoder
5503     ALU    : S3;        // any alu
5504 %}
5505 
5506 // Long ALU reg operation
5507 pipe_class ialu_reg_long(rRegL dst)
5508 %{
5509     instruction_count(2);
5510     dst    : S4(write);
5511     dst    : S3(read);
5512     DECODE : S0(2);     // any 2 decoders
5513     ALU    : S3(2);     // both alus
5514 %}
5515 
5516 // Integer ALU reg operation using big decoder
5517 pipe_class ialu_reg_fat(rRegI dst)
5518 %{
5519     single_instruction;
5520     dst    : S4(write);
5521     dst    : S3(read);
5522     D0     : S0;        // big decoder only
5523     ALU    : S3;        // any alu
5524 %}
5525 
5526 // Long ALU reg operation using big decoder
5527 pipe_class ialu_reg_long_fat(rRegL dst)
5528 %{
5529     instruction_count(2);
5530     dst    : S4(write);
5531     dst    : S3(read);
5532     D0     : S0(2);     // big decoder only; twice
5533     ALU    : S3(2);     // any 2 alus
5534 %}
5535 
5536 // Integer ALU reg-reg operation
5537 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5538 %{
5539     single_instruction;
5540     dst    : S4(write);
5541     src    : S3(read);
5542     DECODE : S0;        // any decoder
5543     ALU    : S3;        // any alu
5544 %}
5545 
5546 // Long ALU reg-reg operation
5547 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5548 %{
5549     instruction_count(2);
5550     dst    : S4(write);
5551     src    : S3(read);
5552     DECODE : S0(2);     // any 2 decoders
5553     ALU    : S3(2);     // both alus
5554 %}
5555 
5556 // Integer ALU reg-reg operation
5557 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5558 %{
5559     single_instruction;
5560     dst    : S4(write);
5561     src    : S3(read);
5562     D0     : S0;        // big decoder only
5563     ALU    : S3;        // any alu
5564 %}
5565 
5566 // Long ALU reg-reg operation
5567 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5568 %{
5569     instruction_count(2);
5570     dst    : S4(write);
5571     src    : S3(read);
5572     D0     : S0(2);     // big decoder only; twice
5573     ALU    : S3(2);     // both alus
5574 %}
5575 
5576 // Integer ALU reg-mem operation
5577 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5578 %{
5579     single_instruction;
5580     dst    : S5(write);
5581     mem    : S3(read);
5582     D0     : S0;        // big decoder only
5583     ALU    : S4;        // any alu
5584     MEM    : S3;        // any mem
5585 %}
5586 
5587 // Integer mem operation (prefetch)
5588 pipe_class ialu_mem(memory mem)
5589 %{
5590     single_instruction;
5591     mem    : S3(read);
5592     D0     : S0;        // big decoder only
5593     MEM    : S3;        // any mem
5594 %}
5595 
5596 // Integer Store to Memory
5597 pipe_class ialu_mem_reg(memory mem, rRegI src)
5598 %{
5599     single_instruction;
5600     mem    : S3(read);
5601     src    : S5(read);
5602     D0     : S0;        // big decoder only
5603     ALU    : S4;        // any alu
5604     MEM    : S3;
5605 %}
5606 
5607 // // Long Store to Memory
5608 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5609 // %{
5610 //     instruction_count(2);
5611 //     mem    : S3(read);
5612 //     src    : S5(read);
5613 //     D0     : S0(2);          // big decoder only; twice
5614 //     ALU    : S4(2);     // any 2 alus
5615 //     MEM    : S3(2);  // Both mems
5616 // %}
5617 
5618 // Integer Store to Memory
5619 pipe_class ialu_mem_imm(memory mem)
5620 %{
5621     single_instruction;
5622     mem    : S3(read);
5623     D0     : S0;        // big decoder only
5624     ALU    : S4;        // any alu
5625     MEM    : S3;
5626 %}
5627 
5628 // Integer ALU0 reg-reg operation
5629 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5630 %{
5631     single_instruction;
5632     dst    : S4(write);
5633     src    : S3(read);
5634     D0     : S0;        // Big decoder only
5635     ALU0   : S3;        // only alu0
5636 %}
5637 
5638 // Integer ALU0 reg-mem operation
5639 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5640 %{
5641     single_instruction;
5642     dst    : S5(write);
5643     mem    : S3(read);
5644     D0     : S0;        // big decoder only
5645     ALU0   : S4;        // ALU0 only
5646     MEM    : S3;        // any mem
5647 %}
5648 
5649 // Integer ALU reg-reg operation
5650 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5651 %{
5652     single_instruction;
5653     cr     : S4(write);
5654     src1   : S3(read);
5655     src2   : S3(read);
5656     DECODE : S0;        // any decoder
5657     ALU    : S3;        // any alu
5658 %}
5659 
5660 // Integer ALU reg-imm operation
5661 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5662 %{
5663     single_instruction;
5664     cr     : S4(write);
5665     src1   : S3(read);
5666     DECODE : S0;        // any decoder
5667     ALU    : S3;        // any alu
5668 %}
5669 
5670 // Integer ALU reg-mem operation
5671 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5672 %{
5673     single_instruction;
5674     cr     : S4(write);
5675     src1   : S3(read);
5676     src2   : S3(read);
5677     D0     : S0;        // big decoder only
5678     ALU    : S4;        // any alu
5679     MEM    : S3;
5680 %}
5681 
5682 // Conditional move reg-reg
5683 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5684 %{
5685     instruction_count(4);
5686     y      : S4(read);
5687     q      : S3(read);
5688     p      : S3(read);
5689     DECODE : S0(4);     // any decoder
5690 %}
5691 
5692 // Conditional move reg-reg
5693 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5694 %{
5695     single_instruction;
5696     dst    : S4(write);
5697     src    : S3(read);
5698     cr     : S3(read);
5699     DECODE : S0;        // any decoder
5700 %}
5701 
5702 // Conditional move reg-mem
5703 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5704 %{
5705     single_instruction;
5706     dst    : S4(write);
5707     src    : S3(read);
5708     cr     : S3(read);
5709     DECODE : S0;        // any decoder
5710     MEM    : S3;
5711 %}
5712 
5713 // Conditional move reg-reg long
5714 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5715 %{
5716     single_instruction;
5717     dst    : S4(write);
5718     src    : S3(read);
5719     cr     : S3(read);
5720     DECODE : S0(2);     // any 2 decoders
5721 %}
5722 
5723 // XXX
5724 // // Conditional move double reg-reg
5725 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5726 // %{
5727 //     single_instruction;
5728 //     dst    : S4(write);
5729 //     src    : S3(read);
5730 //     cr     : S3(read);
5731 //     DECODE : S0;     // any decoder
5732 // %}
5733 
5734 // Float reg-reg operation
5735 pipe_class fpu_reg(regD dst)
5736 %{
5737     instruction_count(2);
5738     dst    : S3(read);
5739     DECODE : S0(2);     // any 2 decoders
5740     FPU    : S3;
5741 %}
5742 
5743 // Float reg-reg operation
5744 pipe_class fpu_reg_reg(regD dst, regD src)
5745 %{
5746     instruction_count(2);
5747     dst    : S4(write);
5748     src    : S3(read);
5749     DECODE : S0(2);     // any 2 decoders
5750     FPU    : S3;
5751 %}
5752 
5753 // Float reg-reg operation
5754 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5755 %{
5756     instruction_count(3);
5757     dst    : S4(write);
5758     src1   : S3(read);
5759     src2   : S3(read);
5760     DECODE : S0(3);     // any 3 decoders
5761     FPU    : S3(2);
5762 %}
5763 
5764 // Float reg-reg operation
5765 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5766 %{
5767     instruction_count(4);
5768     dst    : S4(write);
5769     src1   : S3(read);
5770     src2   : S3(read);
5771     src3   : S3(read);
5772     DECODE : S0(4);     // any 3 decoders
5773     FPU    : S3(2);
5774 %}
5775 
5776 // Float reg-reg operation
5777 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5778 %{
5779     instruction_count(4);
5780     dst    : S4(write);
5781     src1   : S3(read);
5782     src2   : S3(read);
5783     src3   : S3(read);
5784     DECODE : S1(3);     // any 3 decoders
5785     D0     : S0;        // Big decoder only
5786     FPU    : S3(2);
5787     MEM    : S3;
5788 %}
5789 
5790 // Float reg-mem operation
5791 pipe_class fpu_reg_mem(regD dst, memory mem)
5792 %{
5793     instruction_count(2);
5794     dst    : S5(write);
5795     mem    : S3(read);
5796     D0     : S0;        // big decoder only
5797     DECODE : S1;        // any decoder for FPU POP
5798     FPU    : S4;
5799     MEM    : S3;        // any mem
5800 %}
5801 
5802 // Float reg-mem operation
5803 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5804 %{
5805     instruction_count(3);
5806     dst    : S5(write);
5807     src1   : S3(read);
5808     mem    : S3(read);
5809     D0     : S0;        // big decoder only
5810     DECODE : S1(2);     // any decoder for FPU POP
5811     FPU    : S4;
5812     MEM    : S3;        // any mem
5813 %}
5814 
5815 // Float mem-reg operation
5816 pipe_class fpu_mem_reg(memory mem, regD src)
5817 %{
5818     instruction_count(2);
5819     src    : S5(read);
5820     mem    : S3(read);
5821     DECODE : S0;        // any decoder for FPU PUSH
5822     D0     : S1;        // big decoder only
5823     FPU    : S4;
5824     MEM    : S3;        // any mem
5825 %}
5826 
5827 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5828 %{
5829     instruction_count(3);
5830     src1   : S3(read);
5831     src2   : S3(read);
5832     mem    : S3(read);
5833     DECODE : S0(2);     // any decoder for FPU PUSH
5834     D0     : S1;        // big decoder only
5835     FPU    : S4;
5836     MEM    : S3;        // any mem
5837 %}
5838 
5839 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5840 %{
5841     instruction_count(3);
5842     src1   : S3(read);
5843     src2   : S3(read);
5844     mem    : S4(read);
5845     DECODE : S0;        // any decoder for FPU PUSH
5846     D0     : S0(2);     // big decoder only
5847     FPU    : S4;
5848     MEM    : S3(2);     // any mem
5849 %}
5850 
5851 pipe_class fpu_mem_mem(memory dst, memory src1)
5852 %{
5853     instruction_count(2);
5854     src1   : S3(read);
5855     dst    : S4(read);
5856     D0     : S0(2);     // big decoder only
5857     MEM    : S3(2);     // any mem
5858 %}
5859 
5860 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5861 %{
5862     instruction_count(3);
5863     src1   : S3(read);
5864     src2   : S3(read);
5865     dst    : S4(read);
5866     D0     : S0(3);     // big decoder only
5867     FPU    : S4;
5868     MEM    : S3(3);     // any mem
5869 %}
5870 
5871 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5872 %{
5873     instruction_count(3);
5874     src1   : S4(read);
5875     mem    : S4(read);
5876     DECODE : S0;        // any decoder for FPU PUSH
5877     D0     : S0(2);     // big decoder only
5878     FPU    : S4;
5879     MEM    : S3(2);     // any mem
5880 %}
5881 
5882 // Float load constant
5883 pipe_class fpu_reg_con(regD dst)
5884 %{
5885     instruction_count(2);
5886     dst    : S5(write);
5887     D0     : S0;        // big decoder only for the load
5888     DECODE : S1;        // any decoder for FPU POP
5889     FPU    : S4;
5890     MEM    : S3;        // any mem
5891 %}
5892 
5893 // Float load constant
5894 pipe_class fpu_reg_reg_con(regD dst, regD src)
5895 %{
5896     instruction_count(3);
5897     dst    : S5(write);
5898     src    : S3(read);
5899     D0     : S0;        // big decoder only for the load
5900     DECODE : S1(2);     // any decoder for FPU POP
5901     FPU    : S4;
5902     MEM    : S3;        // any mem
5903 %}
5904 
5905 // UnConditional branch
5906 pipe_class pipe_jmp(label labl)
5907 %{
5908     single_instruction;
5909     BR   : S3;
5910 %}
5911 
5912 // Conditional branch
5913 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5914 %{
5915     single_instruction;
5916     cr    : S1(read);
5917     BR    : S3;
5918 %}
5919 
5920 // Allocation idiom
5921 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5922 %{
5923     instruction_count(1); force_serialization;
5924     fixed_latency(6);
5925     heap_ptr : S3(read);
5926     DECODE   : S0(3);
5927     D0       : S2;
5928     MEM      : S3;
5929     ALU      : S3(2);
5930     dst      : S5(write);
5931     BR       : S5;
5932 %}
5933 
5934 // Generic big/slow expanded idiom
5935 pipe_class pipe_slow()
5936 %{
5937     instruction_count(10); multiple_bundles; force_serialization;
5938     fixed_latency(100);
5939     D0  : S0(2);
5940     MEM : S3(2);
5941 %}
5942 
5943 // The real do-nothing guy
5944 pipe_class empty()
5945 %{
5946     instruction_count(0);
5947 %}
5948 
5949 // Define the class for the Nop node
5950 define
5951 %{
5952    MachNop = empty;
5953 %}
5954 
5955 %}
5956 
5957 //----------INSTRUCTIONS-------------------------------------------------------
5958 //
5959 // match      -- States which machine-independent subtree may be replaced
5960 //               by this instruction.
5961 // ins_cost   -- The estimated cost of this instruction is used by instruction
5962 //               selection to identify a minimum cost tree of machine
5963 //               instructions that matches a tree of machine-independent
5964 //               instructions.
5965 // format     -- A string providing the disassembly for this instruction.
5966 //               The value of an instruction's operand may be inserted
5967 //               by referring to it with a '$' prefix.
5968 // opcode     -- Three instruction opcodes may be provided.  These are referred
5969 //               to within an encode class as $primary, $secondary, and $tertiary
5970 //               rrspectively.  The primary opcode is commonly used to
5971 //               indicate the type of machine instruction, while secondary
5972 //               and tertiary are often used for prefix options or addressing
5973 //               modes.
5974 // ins_encode -- A list of encode classes with parameters. The encode class
5975 //               name must have been defined in an 'enc_class' specification
5976 //               in the encode section of the architecture description.
5977 
5978 
5979 //----------Load/Store/Move Instructions---------------------------------------
5980 //----------Load Instructions--------------------------------------------------
5981 
5982 // Load Byte (8 bit signed)
5983 instruct loadB(rRegI dst, memory mem)
5984 %{
5985   match(Set dst (LoadB mem));
5986 
5987   ins_cost(125);
5988   format %{ "movsbl  $dst, $mem\t# byte" %}
5989 
5990   ins_encode %{
5991     __ movsbl($dst$$Register, $mem$$Address);
5992   %}
5993 
5994   ins_pipe(ialu_reg_mem);
5995 %}
5996 
5997 // Load Byte (8 bit signed) into Long Register
5998 instruct loadB2L(rRegL dst, memory mem)
5999 %{
6000   match(Set dst (ConvI2L (LoadB mem)));
6001 
6002   ins_cost(125);
6003   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6004 
6005   ins_encode %{
6006     __ movsbq($dst$$Register, $mem$$Address);
6007   %}
6008 
6009   ins_pipe(ialu_reg_mem);
6010 %}
6011 
6012 // Load Unsigned Byte (8 bit UNsigned)
6013 instruct loadUB(rRegI dst, memory mem)
6014 %{
6015   match(Set dst (LoadUB mem));
6016 
6017   ins_cost(125);
6018   format %{ "movzbl  $dst, $mem\t# ubyte" %}
6019 
6020   ins_encode %{
6021     __ movzbl($dst$$Register, $mem$$Address);
6022   %}
6023 
6024   ins_pipe(ialu_reg_mem);
6025 %}
6026 
6027 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6028 instruct loadUB2L(rRegL dst, memory mem)
6029 %{
6030   match(Set dst (ConvI2L (LoadUB mem)));
6031 
6032   ins_cost(125);
6033   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6034 
6035   ins_encode %{
6036     __ movzbq($dst$$Register, $mem$$Address);
6037   %}
6038 
6039   ins_pipe(ialu_reg_mem);
6040 %}
6041 
6042 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
6043 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
6044   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6045   effect(KILL cr);
6046 
6047   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
6048             "andl    $dst, $mask" %}
6049   ins_encode %{
6050     Register Rdst = $dst$$Register;
6051     __ movzbq(Rdst, $mem$$Address);
6052     __ andl(Rdst, $mask$$constant);
6053   %}
6054   ins_pipe(ialu_reg_mem);
6055 %}
6056 
6057 // Load Short (16 bit signed)
6058 instruct loadS(rRegI dst, memory mem)
6059 %{
6060   match(Set dst (LoadS mem));
6061 
6062   ins_cost(125);
6063   format %{ "movswl $dst, $mem\t# short" %}
6064 
6065   ins_encode %{
6066     __ movswl($dst$$Register, $mem$$Address);
6067   %}
6068 
6069   ins_pipe(ialu_reg_mem);
6070 %}
6071 
6072 // Load Short (16 bit signed) to Byte (8 bit signed)
6073 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6074   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6075 
6076   ins_cost(125);
6077   format %{ "movsbl $dst, $mem\t# short -> byte" %}
6078   ins_encode %{
6079     __ movsbl($dst$$Register, $mem$$Address);
6080   %}
6081   ins_pipe(ialu_reg_mem);
6082 %}
6083 
6084 // Load Short (16 bit signed) into Long Register
6085 instruct loadS2L(rRegL dst, memory mem)
6086 %{
6087   match(Set dst (ConvI2L (LoadS mem)));
6088 
6089   ins_cost(125);
6090   format %{ "movswq $dst, $mem\t# short -> long" %}
6091 
6092   ins_encode %{
6093     __ movswq($dst$$Register, $mem$$Address);
6094   %}
6095 
6096   ins_pipe(ialu_reg_mem);
6097 %}
6098 
6099 // Load Unsigned Short/Char (16 bit UNsigned)
6100 instruct loadUS(rRegI dst, memory mem)
6101 %{
6102   match(Set dst (LoadUS mem));
6103 
6104   ins_cost(125);
6105   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6106 
6107   ins_encode %{
6108     __ movzwl($dst$$Register, $mem$$Address);
6109   %}
6110 
6111   ins_pipe(ialu_reg_mem);
6112 %}
6113 
6114 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6115 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6116   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6117 
6118   ins_cost(125);
6119   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6120   ins_encode %{
6121     __ movsbl($dst$$Register, $mem$$Address);
6122   %}
6123   ins_pipe(ialu_reg_mem);
6124 %}
6125 
6126 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6127 instruct loadUS2L(rRegL dst, memory mem)
6128 %{
6129   match(Set dst (ConvI2L (LoadUS mem)));
6130 
6131   ins_cost(125);
6132   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6133 
6134   ins_encode %{
6135     __ movzwq($dst$$Register, $mem$$Address);
6136   %}
6137 
6138   ins_pipe(ialu_reg_mem);
6139 %}
6140 
6141 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6142 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6143   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6144 
6145   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6146   ins_encode %{
6147     __ movzbq($dst$$Register, $mem$$Address);
6148   %}
6149   ins_pipe(ialu_reg_mem);
6150 %}
6151 
6152 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6153 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6154   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6155   effect(KILL cr);
6156 
6157   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6158             "andl    $dst, $mask" %}
6159   ins_encode %{
6160     Register Rdst = $dst$$Register;
6161     __ movzwq(Rdst, $mem$$Address);
6162     __ andl(Rdst, $mask$$constant);
6163   %}
6164   ins_pipe(ialu_reg_mem);
6165 %}
6166 
6167 // Load Integer
6168 instruct loadI(rRegI dst, memory mem)
6169 %{
6170   match(Set dst (LoadI mem));
6171 
6172   ins_cost(125);
6173   format %{ "movl    $dst, $mem\t# int" %}
6174 
6175   ins_encode %{
6176     __ movl($dst$$Register, $mem$$Address);
6177   %}
6178 
6179   ins_pipe(ialu_reg_mem);
6180 %}
6181 
6182 // Load Integer (32 bit signed) to Byte (8 bit signed)
6183 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6184   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6185 
6186   ins_cost(125);
6187   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6188   ins_encode %{
6189     __ movsbl($dst$$Register, $mem$$Address);
6190   %}
6191   ins_pipe(ialu_reg_mem);
6192 %}
6193 
6194 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6195 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6196   match(Set dst (AndI (LoadI mem) mask));
6197 
6198   ins_cost(125);
6199   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6200   ins_encode %{
6201     __ movzbl($dst$$Register, $mem$$Address);
6202   %}
6203   ins_pipe(ialu_reg_mem);
6204 %}
6205 
6206 // Load Integer (32 bit signed) to Short (16 bit signed)
6207 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6208   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6209 
6210   ins_cost(125);
6211   format %{ "movswl  $dst, $mem\t# int -> short" %}
6212   ins_encode %{
6213     __ movswl($dst$$Register, $mem$$Address);
6214   %}
6215   ins_pipe(ialu_reg_mem);
6216 %}
6217 
6218 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6219 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6220   match(Set dst (AndI (LoadI mem) mask));
6221 
6222   ins_cost(125);
6223   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6224   ins_encode %{
6225     __ movzwl($dst$$Register, $mem$$Address);
6226   %}
6227   ins_pipe(ialu_reg_mem);
6228 %}
6229 
6230 // Load Integer into Long Register
6231 instruct loadI2L(rRegL dst, memory mem)
6232 %{
6233   match(Set dst (ConvI2L (LoadI mem)));
6234 
6235   ins_cost(125);
6236   format %{ "movslq  $dst, $mem\t# int -> long" %}
6237 
6238   ins_encode %{
6239     __ movslq($dst$$Register, $mem$$Address);
6240   %}
6241 
6242   ins_pipe(ialu_reg_mem);
6243 %}
6244 
6245 // Load Integer with mask 0xFF into Long Register
6246 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6247   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6248 
6249   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6250   ins_encode %{
6251     __ movzbq($dst$$Register, $mem$$Address);
6252   %}
6253   ins_pipe(ialu_reg_mem);
6254 %}
6255 
6256 // Load Integer with mask 0xFFFF into Long Register
6257 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6258   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6259 
6260   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6261   ins_encode %{
6262     __ movzwq($dst$$Register, $mem$$Address);
6263   %}
6264   ins_pipe(ialu_reg_mem);
6265 %}
6266 
6267 // Load Integer with a 32-bit mask into Long Register
6268 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6269   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6270   effect(KILL cr);
6271 
6272   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6273             "andl    $dst, $mask" %}
6274   ins_encode %{
6275     Register Rdst = $dst$$Register;
6276     __ movl(Rdst, $mem$$Address);
6277     __ andl(Rdst, $mask$$constant);
6278   %}
6279   ins_pipe(ialu_reg_mem);
6280 %}
6281 
6282 // Load Unsigned Integer into Long Register
6283 instruct loadUI2L(rRegL dst, memory mem)
6284 %{
6285   match(Set dst (LoadUI2L mem));
6286 
6287   ins_cost(125);
6288   format %{ "movl    $dst, $mem\t# uint -> long" %}
6289 
6290   ins_encode %{
6291     __ movl($dst$$Register, $mem$$Address);
6292   %}
6293 
6294   ins_pipe(ialu_reg_mem);
6295 %}
6296 
6297 // Load Long
6298 instruct loadL(rRegL dst, memory mem)
6299 %{
6300   match(Set dst (LoadL mem));
6301 
6302   ins_cost(125);
6303   format %{ "movq    $dst, $mem\t# long" %}
6304 
6305   ins_encode %{
6306     __ movq($dst$$Register, $mem$$Address);
6307   %}
6308 
6309   ins_pipe(ialu_reg_mem); // XXX
6310 %}
6311 
6312 // Load Range
6313 instruct loadRange(rRegI dst, memory mem)
6314 %{
6315   match(Set dst (LoadRange mem));
6316 
6317   ins_cost(125); // XXX
6318   format %{ "movl    $dst, $mem\t# range" %}
6319   opcode(0x8B);
6320   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6321   ins_pipe(ialu_reg_mem);
6322 %}
6323 
6324 // Load Pointer
6325 instruct loadP(rRegP dst, memory mem)
6326 %{
6327   match(Set dst (LoadP mem));
6328 
6329   ins_cost(125); // XXX
6330   format %{ "movq    $dst, $mem\t# ptr" %}
6331   opcode(0x8B);
6332   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6333   ins_pipe(ialu_reg_mem); // XXX
6334 %}
6335 
6336 // Load Compressed Pointer
6337 instruct loadN(rRegN dst, memory mem)
6338 %{
6339    match(Set dst (LoadN mem));
6340 
6341    ins_cost(125); // XXX
6342    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6343    ins_encode %{
6344      __ movl($dst$$Register, $mem$$Address);
6345    %}
6346    ins_pipe(ialu_reg_mem); // XXX
6347 %}
6348 
6349 
6350 // Load Klass Pointer
6351 instruct loadKlass(rRegP dst, memory mem)
6352 %{
6353   match(Set dst (LoadKlass mem));
6354 
6355   ins_cost(125); // XXX
6356   format %{ "movq    $dst, $mem\t# class" %}
6357   opcode(0x8B);
6358   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6359   ins_pipe(ialu_reg_mem); // XXX
6360 %}
6361 
6362 // Load narrow Klass Pointer
6363 instruct loadNKlass(rRegN dst, memory mem)
6364 %{
6365   match(Set dst (LoadNKlass mem));
6366 
6367   ins_cost(125); // XXX
6368   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6369   ins_encode %{
6370     __ movl($dst$$Register, $mem$$Address);
6371   %}
6372   ins_pipe(ialu_reg_mem); // XXX
6373 %}
6374 
6375 // Load Float
6376 instruct loadF(regF dst, memory mem)
6377 %{
6378   match(Set dst (LoadF mem));
6379 
6380   ins_cost(145); // XXX
6381   format %{ "movss   $dst, $mem\t# float" %}
6382   opcode(0xF3, 0x0F, 0x10);
6383   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6384   ins_pipe(pipe_slow); // XXX
6385 %}
6386 
6387 // Load Double
6388 instruct loadD_partial(regD dst, memory mem)
6389 %{
6390   predicate(!UseXmmLoadAndClearUpper);
6391   match(Set dst (LoadD mem));
6392 
6393   ins_cost(145); // XXX
6394   format %{ "movlpd  $dst, $mem\t# double" %}
6395   opcode(0x66, 0x0F, 0x12);
6396   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6397   ins_pipe(pipe_slow); // XXX
6398 %}
6399 
6400 instruct loadD(regD dst, memory mem)
6401 %{
6402   predicate(UseXmmLoadAndClearUpper);
6403   match(Set dst (LoadD mem));
6404 
6405   ins_cost(145); // XXX
6406   format %{ "movsd   $dst, $mem\t# double" %}
6407   opcode(0xF2, 0x0F, 0x10);
6408   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6409   ins_pipe(pipe_slow); // XXX
6410 %}
6411 
6412 // Load Aligned Packed Byte to XMM register
6413 instruct loadA8B(regD dst, memory mem) %{
6414   match(Set dst (Load8B mem));
6415   ins_cost(125);
6416   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6417   ins_encode( movq_ld(dst, mem));
6418   ins_pipe( pipe_slow );
6419 %}
6420 
6421 // Load Aligned Packed Short to XMM register
6422 instruct loadA4S(regD dst, memory mem) %{
6423   match(Set dst (Load4S mem));
6424   ins_cost(125);
6425   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6426   ins_encode( movq_ld(dst, mem));
6427   ins_pipe( pipe_slow );
6428 %}
6429 
6430 // Load Aligned Packed Char to XMM register
6431 instruct loadA4C(regD dst, memory mem) %{
6432   match(Set dst (Load4C mem));
6433   ins_cost(125);
6434   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6435   ins_encode( movq_ld(dst, mem));
6436   ins_pipe( pipe_slow );
6437 %}
6438 
6439 // Load Aligned Packed Integer to XMM register
6440 instruct load2IU(regD dst, memory mem) %{
6441   match(Set dst (Load2I mem));
6442   ins_cost(125);
6443   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6444   ins_encode( movq_ld(dst, mem));
6445   ins_pipe( pipe_slow );
6446 %}
6447 
6448 // Load Aligned Packed Single to XMM
6449 instruct loadA2F(regD dst, memory mem) %{
6450   match(Set dst (Load2F mem));
6451   ins_cost(145);
6452   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6453   ins_encode( movq_ld(dst, mem));
6454   ins_pipe( pipe_slow );
6455 %}
6456 
6457 // Load Effective Address
6458 instruct leaP8(rRegP dst, indOffset8 mem)
6459 %{
6460   match(Set dst mem);
6461 
6462   ins_cost(110); // XXX
6463   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6464   opcode(0x8D);
6465   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6466   ins_pipe(ialu_reg_reg_fat);
6467 %}
6468 
6469 instruct leaP32(rRegP dst, indOffset32 mem)
6470 %{
6471   match(Set dst mem);
6472 
6473   ins_cost(110);
6474   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6475   opcode(0x8D);
6476   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6477   ins_pipe(ialu_reg_reg_fat);
6478 %}
6479 
6480 // instruct leaPIdx(rRegP dst, indIndex mem)
6481 // %{
6482 //   match(Set dst mem);
6483 
6484 //   ins_cost(110);
6485 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6486 //   opcode(0x8D);
6487 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6488 //   ins_pipe(ialu_reg_reg_fat);
6489 // %}
6490 
6491 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6492 %{
6493   match(Set dst mem);
6494 
6495   ins_cost(110);
6496   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6497   opcode(0x8D);
6498   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6499   ins_pipe(ialu_reg_reg_fat);
6500 %}
6501 
6502 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6503 %{
6504   match(Set dst mem);
6505 
6506   ins_cost(110);
6507   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6508   opcode(0x8D);
6509   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6510   ins_pipe(ialu_reg_reg_fat);
6511 %}
6512 
6513 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6514 %{
6515   match(Set dst mem);
6516 
6517   ins_cost(110);
6518   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6519   opcode(0x8D);
6520   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6521   ins_pipe(ialu_reg_reg_fat);
6522 %}
6523 
6524 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6525 %{
6526   match(Set dst mem);
6527 
6528   ins_cost(110);
6529   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6530   opcode(0x8D);
6531   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6532   ins_pipe(ialu_reg_reg_fat);
6533 %}
6534 
6535 // Load Effective Address which uses Narrow (32-bits) oop
6536 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6537 %{
6538   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6539   match(Set dst mem);
6540 
6541   ins_cost(110);
6542   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6543   opcode(0x8D);
6544   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6545   ins_pipe(ialu_reg_reg_fat);
6546 %}
6547 
6548 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6549 %{
6550   predicate(Universe::narrow_oop_shift() == 0);
6551   match(Set dst mem);
6552 
6553   ins_cost(110); // XXX
6554   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6555   opcode(0x8D);
6556   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6557   ins_pipe(ialu_reg_reg_fat);
6558 %}
6559 
6560 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6561 %{
6562   predicate(Universe::narrow_oop_shift() == 0);
6563   match(Set dst mem);
6564 
6565   ins_cost(110);
6566   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6567   opcode(0x8D);
6568   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6569   ins_pipe(ialu_reg_reg_fat);
6570 %}
6571 
6572 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6573 %{
6574   predicate(Universe::narrow_oop_shift() == 0);
6575   match(Set dst mem);
6576 
6577   ins_cost(110);
6578   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6579   opcode(0x8D);
6580   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6581   ins_pipe(ialu_reg_reg_fat);
6582 %}
6583 
6584 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6585 %{
6586   predicate(Universe::narrow_oop_shift() == 0);
6587   match(Set dst mem);
6588 
6589   ins_cost(110);
6590   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6591   opcode(0x8D);
6592   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6593   ins_pipe(ialu_reg_reg_fat);
6594 %}
6595 
6596 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6597 %{
6598   predicate(Universe::narrow_oop_shift() == 0);
6599   match(Set dst mem);
6600 
6601   ins_cost(110);
6602   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6603   opcode(0x8D);
6604   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6605   ins_pipe(ialu_reg_reg_fat);
6606 %}
6607 
6608 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6609 %{
6610   predicate(Universe::narrow_oop_shift() == 0);
6611   match(Set dst mem);
6612 
6613   ins_cost(110);
6614   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6615   opcode(0x8D);
6616   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6617   ins_pipe(ialu_reg_reg_fat);
6618 %}
6619 
6620 instruct loadConI(rRegI dst, immI src)
6621 %{
6622   match(Set dst src);
6623 
6624   format %{ "movl    $dst, $src\t# int" %}
6625   ins_encode(load_immI(dst, src));
6626   ins_pipe(ialu_reg_fat); // XXX
6627 %}
6628 
6629 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6630 %{
6631   match(Set dst src);
6632   effect(KILL cr);
6633 
6634   ins_cost(50);
6635   format %{ "xorl    $dst, $dst\t# int" %}
6636   opcode(0x33); /* + rd */
6637   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6638   ins_pipe(ialu_reg);
6639 %}
6640 
6641 instruct loadConL(rRegL dst, immL src)
6642 %{
6643   match(Set dst src);
6644 
6645   ins_cost(150);
6646   format %{ "movq    $dst, $src\t# long" %}
6647   ins_encode(load_immL(dst, src));
6648   ins_pipe(ialu_reg);
6649 %}
6650 
6651 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6652 %{
6653   match(Set dst src);
6654   effect(KILL cr);
6655 
6656   ins_cost(50);
6657   format %{ "xorl    $dst, $dst\t# long" %}
6658   opcode(0x33); /* + rd */
6659   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6660   ins_pipe(ialu_reg); // XXX
6661 %}
6662 
6663 instruct loadConUL32(rRegL dst, immUL32 src)
6664 %{
6665   match(Set dst src);
6666 
6667   ins_cost(60);
6668   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6669   ins_encode(load_immUL32(dst, src));
6670   ins_pipe(ialu_reg);
6671 %}
6672 
6673 instruct loadConL32(rRegL dst, immL32 src)
6674 %{
6675   match(Set dst src);
6676 
6677   ins_cost(70);
6678   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6679   ins_encode(load_immL32(dst, src));
6680   ins_pipe(ialu_reg);
6681 %}
6682 
6683 instruct loadConP(rRegP dst, immP src)
6684 %{
6685   match(Set dst src);
6686 
6687   format %{ "movq    $dst, $src\t# ptr" %}
6688   ins_encode(load_immP(dst, src));
6689   ins_pipe(ialu_reg_fat); // XXX
6690 %}
6691 
6692 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6693 %{
6694   match(Set dst src);
6695   effect(KILL cr);
6696 
6697   ins_cost(50);
6698   format %{ "xorl    $dst, $dst\t# ptr" %}
6699   opcode(0x33); /* + rd */
6700   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6701   ins_pipe(ialu_reg);
6702 %}
6703 
6704 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6705 %{
6706   match(Set dst src);
6707   effect(KILL cr);
6708 
6709   ins_cost(60);
6710   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6711   ins_encode(load_immP31(dst, src));
6712   ins_pipe(ialu_reg);
6713 %}
6714 
6715 instruct loadConF(regF dst, immF src)
6716 %{
6717   match(Set dst src);
6718   ins_cost(125);
6719 
6720   format %{ "movss   $dst, [$src]" %}
6721   ins_encode(load_conF(dst, src));
6722   ins_pipe(pipe_slow);
6723 %}
6724 
6725 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6726   match(Set dst src);
6727   effect(KILL cr);
6728   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6729   ins_encode %{
6730     __ xorq($dst$$Register, $dst$$Register);
6731   %}
6732   ins_pipe(ialu_reg);
6733 %}
6734 
6735 instruct loadConN(rRegN dst, immN src) %{
6736   match(Set dst src);
6737 
6738   ins_cost(125);
6739   format %{ "movl    $dst, $src\t# compressed ptr" %}
6740   ins_encode %{
6741     address con = (address)$src$$constant;
6742     if (con == NULL) {
6743       ShouldNotReachHere();
6744     } else {
6745       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6746     }
6747   %}
6748   ins_pipe(ialu_reg_fat); // XXX
6749 %}
6750 
6751 instruct loadConF0(regF dst, immF0 src)
6752 %{
6753   match(Set dst src);
6754   ins_cost(100);
6755 
6756   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6757   opcode(0x0F, 0x57);
6758   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6759   ins_pipe(pipe_slow);
6760 %}
6761 
6762 // Use the same format since predicate() can not be used here.
6763 instruct loadConD(regD dst, immD src)
6764 %{
6765   match(Set dst src);
6766   ins_cost(125);
6767 
6768   format %{ "movsd   $dst, [$src]" %}
6769   ins_encode(load_conD(dst, src));
6770   ins_pipe(pipe_slow);
6771 %}
6772 
6773 instruct loadConD0(regD dst, immD0 src)
6774 %{
6775   match(Set dst src);
6776   ins_cost(100);
6777 
6778   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6779   opcode(0x66, 0x0F, 0x57);
6780   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6781   ins_pipe(pipe_slow);
6782 %}
6783 
6784 instruct loadSSI(rRegI dst, stackSlotI src)
6785 %{
6786   match(Set dst src);
6787 
6788   ins_cost(125);
6789   format %{ "movl    $dst, $src\t# int stk" %}
6790   opcode(0x8B);
6791   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6792   ins_pipe(ialu_reg_mem);
6793 %}
6794 
6795 instruct loadSSL(rRegL dst, stackSlotL src)
6796 %{
6797   match(Set dst src);
6798 
6799   ins_cost(125);
6800   format %{ "movq    $dst, $src\t# long stk" %}
6801   opcode(0x8B);
6802   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6803   ins_pipe(ialu_reg_mem);
6804 %}
6805 
6806 instruct loadSSP(rRegP dst, stackSlotP src)
6807 %{
6808   match(Set dst src);
6809 
6810   ins_cost(125);
6811   format %{ "movq    $dst, $src\t# ptr stk" %}
6812   opcode(0x8B);
6813   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6814   ins_pipe(ialu_reg_mem);
6815 %}
6816 
6817 instruct loadSSF(regF dst, stackSlotF src)
6818 %{
6819   match(Set dst src);
6820 
6821   ins_cost(125);
6822   format %{ "movss   $dst, $src\t# float stk" %}
6823   opcode(0xF3, 0x0F, 0x10);
6824   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6825   ins_pipe(pipe_slow); // XXX
6826 %}
6827 
6828 // Use the same format since predicate() can not be used here.
6829 instruct loadSSD(regD dst, stackSlotD src)
6830 %{
6831   match(Set dst src);
6832 
6833   ins_cost(125);
6834   format %{ "movsd   $dst, $src\t# double stk" %}
6835   ins_encode  %{
6836     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6837   %}
6838   ins_pipe(pipe_slow); // XXX
6839 %}
6840 
6841 // Prefetch instructions.
6842 // Must be safe to execute with invalid address (cannot fault).
6843 
6844 instruct prefetchr( memory mem ) %{
6845   predicate(ReadPrefetchInstr==3);
6846   match(PrefetchRead mem);
6847   ins_cost(125);
6848 
6849   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6850   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6851   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6852   ins_pipe(ialu_mem);
6853 %}
6854 
6855 instruct prefetchrNTA( memory mem ) %{
6856   predicate(ReadPrefetchInstr==0);
6857   match(PrefetchRead mem);
6858   ins_cost(125);
6859 
6860   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6861   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6862   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6863   ins_pipe(ialu_mem);
6864 %}
6865 
6866 instruct prefetchrT0( memory mem ) %{
6867   predicate(ReadPrefetchInstr==1);
6868   match(PrefetchRead mem);
6869   ins_cost(125);
6870 
6871   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6872   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6873   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6874   ins_pipe(ialu_mem);
6875 %}
6876 
6877 instruct prefetchrT2( memory mem ) %{
6878   predicate(ReadPrefetchInstr==2);
6879   match(PrefetchRead mem);
6880   ins_cost(125);
6881 
6882   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6883   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6884   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6885   ins_pipe(ialu_mem);
6886 %}
6887 
6888 instruct prefetchw( memory mem ) %{
6889   predicate(AllocatePrefetchInstr==3);
6890   match(PrefetchWrite mem);
6891   ins_cost(125);
6892 
6893   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6894   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6895   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6896   ins_pipe(ialu_mem);
6897 %}
6898 
6899 instruct prefetchwNTA( memory mem ) %{
6900   predicate(AllocatePrefetchInstr==0);
6901   match(PrefetchWrite mem);
6902   ins_cost(125);
6903 
6904   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6905   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6906   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6907   ins_pipe(ialu_mem);
6908 %}
6909 
6910 instruct prefetchwT0( memory mem ) %{
6911   predicate(AllocatePrefetchInstr==1);
6912   match(PrefetchWrite mem);
6913   ins_cost(125);
6914 
6915   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6916   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6917   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6918   ins_pipe(ialu_mem);
6919 %}
6920 
6921 instruct prefetchwT2( memory mem ) %{
6922   predicate(AllocatePrefetchInstr==2);
6923   match(PrefetchWrite mem);
6924   ins_cost(125);
6925 
6926   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6927   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6928   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6929   ins_pipe(ialu_mem);
6930 %}
6931 
6932 //----------Store Instructions-------------------------------------------------
6933 
6934 // Store Byte
6935 instruct storeB(memory mem, rRegI src)
6936 %{
6937   match(Set mem (StoreB mem src));
6938 
6939   ins_cost(125); // XXX
6940   format %{ "movb    $mem, $src\t# byte" %}
6941   opcode(0x88);
6942   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6943   ins_pipe(ialu_mem_reg);
6944 %}
6945 
6946 // Store Char/Short
6947 instruct storeC(memory mem, rRegI src)
6948 %{
6949   match(Set mem (StoreC mem src));
6950 
6951   ins_cost(125); // XXX
6952   format %{ "movw    $mem, $src\t# char/short" %}
6953   opcode(0x89);
6954   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6955   ins_pipe(ialu_mem_reg);
6956 %}
6957 
6958 // Store Integer
6959 instruct storeI(memory mem, rRegI src)
6960 %{
6961   match(Set mem (StoreI mem src));
6962 
6963   ins_cost(125); // XXX
6964   format %{ "movl    $mem, $src\t# int" %}
6965   opcode(0x89);
6966   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6967   ins_pipe(ialu_mem_reg);
6968 %}
6969 
6970 // Store Long
6971 instruct storeL(memory mem, rRegL src)
6972 %{
6973   match(Set mem (StoreL mem src));
6974 
6975   ins_cost(125); // XXX
6976   format %{ "movq    $mem, $src\t# long" %}
6977   opcode(0x89);
6978   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6979   ins_pipe(ialu_mem_reg); // XXX
6980 %}
6981 
6982 // Store Pointer
6983 instruct storeP(memory mem, any_RegP src)
6984 %{
6985   match(Set mem (StoreP mem src));
6986 
6987   ins_cost(125); // XXX
6988   format %{ "movq    $mem, $src\t# ptr" %}
6989   opcode(0x89);
6990   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6991   ins_pipe(ialu_mem_reg);
6992 %}
6993 
6994 instruct storeImmP0(memory mem, immP0 zero)
6995 %{
6996   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6997   match(Set mem (StoreP mem zero));
6998 
6999   ins_cost(125); // XXX
7000   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
7001   ins_encode %{
7002     __ movq($mem$$Address, r12);
7003   %}
7004   ins_pipe(ialu_mem_reg);
7005 %}
7006 
7007 // Store NULL Pointer, mark word, or other simple pointer constant.
7008 instruct storeImmP(memory mem, immP31 src)
7009 %{
7010   match(Set mem (StoreP mem src));
7011 
7012   ins_cost(150); // XXX
7013   format %{ "movq    $mem, $src\t# ptr" %}
7014   opcode(0xC7); /* C7 /0 */
7015   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7016   ins_pipe(ialu_mem_imm);
7017 %}
7018 
7019 // Store Compressed Pointer
7020 instruct storeN(memory mem, rRegN src)
7021 %{
7022   match(Set mem (StoreN mem src));
7023 
7024   ins_cost(125); // XXX
7025   format %{ "movl    $mem, $src\t# compressed ptr" %}
7026   ins_encode %{
7027     __ movl($mem$$Address, $src$$Register);
7028   %}
7029   ins_pipe(ialu_mem_reg);
7030 %}
7031 
7032 instruct storeImmN0(memory mem, immN0 zero)
7033 %{
7034   predicate(Universe::narrow_oop_base() == NULL);
7035   match(Set mem (StoreN mem zero));
7036 
7037   ins_cost(125); // XXX
7038   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7039   ins_encode %{
7040     __ movl($mem$$Address, r12);
7041   %}
7042   ins_pipe(ialu_mem_reg);
7043 %}
7044 
7045 instruct storeImmN(memory mem, immN src)
7046 %{
7047   match(Set mem (StoreN mem src));
7048 
7049   ins_cost(150); // XXX
7050   format %{ "movl    $mem, $src\t# compressed ptr" %}
7051   ins_encode %{
7052     address con = (address)$src$$constant;
7053     if (con == NULL) {
7054       __ movl($mem$$Address, (int32_t)0);
7055     } else {
7056       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7057     }
7058   %}
7059   ins_pipe(ialu_mem_imm);
7060 %}
7061 
7062 // Store Integer Immediate
7063 instruct storeImmI0(memory mem, immI0 zero)
7064 %{
7065   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7066   match(Set mem (StoreI mem zero));
7067 
7068   ins_cost(125); // XXX
7069   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7070   ins_encode %{
7071     __ movl($mem$$Address, r12);
7072   %}
7073   ins_pipe(ialu_mem_reg);
7074 %}
7075 
7076 instruct storeImmI(memory mem, immI src)
7077 %{
7078   match(Set mem (StoreI mem src));
7079 
7080   ins_cost(150);
7081   format %{ "movl    $mem, $src\t# int" %}
7082   opcode(0xC7); /* C7 /0 */
7083   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7084   ins_pipe(ialu_mem_imm);
7085 %}
7086 
7087 // Store Long Immediate
7088 instruct storeImmL0(memory mem, immL0 zero)
7089 %{
7090   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7091   match(Set mem (StoreL mem zero));
7092 
7093   ins_cost(125); // XXX
7094   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7095   ins_encode %{
7096     __ movq($mem$$Address, r12);
7097   %}
7098   ins_pipe(ialu_mem_reg);
7099 %}
7100 
7101 instruct storeImmL(memory mem, immL32 src)
7102 %{
7103   match(Set mem (StoreL mem src));
7104 
7105   ins_cost(150);
7106   format %{ "movq    $mem, $src\t# long" %}
7107   opcode(0xC7); /* C7 /0 */
7108   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7109   ins_pipe(ialu_mem_imm);
7110 %}
7111 
7112 // Store Short/Char Immediate
7113 instruct storeImmC0(memory mem, immI0 zero)
7114 %{
7115   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7116   match(Set mem (StoreC mem zero));
7117 
7118   ins_cost(125); // XXX
7119   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7120   ins_encode %{
7121     __ movw($mem$$Address, r12);
7122   %}
7123   ins_pipe(ialu_mem_reg);
7124 %}
7125 
7126 instruct storeImmI16(memory mem, immI16 src)
7127 %{
7128   predicate(UseStoreImmI16);
7129   match(Set mem (StoreC mem src));
7130 
7131   ins_cost(150);
7132   format %{ "movw    $mem, $src\t# short/char" %}
7133   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7134   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7135   ins_pipe(ialu_mem_imm);
7136 %}
7137 
7138 // Store Byte Immediate
7139 instruct storeImmB0(memory mem, immI0 zero)
7140 %{
7141   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7142   match(Set mem (StoreB mem zero));
7143 
7144   ins_cost(125); // XXX
7145   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7146   ins_encode %{
7147     __ movb($mem$$Address, r12);
7148   %}
7149   ins_pipe(ialu_mem_reg);
7150 %}
7151 
7152 instruct storeImmB(memory mem, immI8 src)
7153 %{
7154   match(Set mem (StoreB mem src));
7155 
7156   ins_cost(150); // XXX
7157   format %{ "movb    $mem, $src\t# byte" %}
7158   opcode(0xC6); /* C6 /0 */
7159   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7160   ins_pipe(ialu_mem_imm);
7161 %}
7162 
7163 // Store Aligned Packed Byte XMM register to memory
7164 instruct storeA8B(memory mem, regD src) %{
7165   match(Set mem (Store8B mem src));
7166   ins_cost(145);
7167   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7168   ins_encode( movq_st(mem, src));
7169   ins_pipe( pipe_slow );
7170 %}
7171 
7172 // Store Aligned Packed Char/Short XMM register to memory
7173 instruct storeA4C(memory mem, regD src) %{
7174   match(Set mem (Store4C mem src));
7175   ins_cost(145);
7176   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7177   ins_encode( movq_st(mem, src));
7178   ins_pipe( pipe_slow );
7179 %}
7180 
7181 // Store Aligned Packed Integer XMM register to memory
7182 instruct storeA2I(memory mem, regD src) %{
7183   match(Set mem (Store2I mem src));
7184   ins_cost(145);
7185   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7186   ins_encode( movq_st(mem, src));
7187   ins_pipe( pipe_slow );
7188 %}
7189 
7190 // Store CMS card-mark Immediate
7191 instruct storeImmCM0_reg(memory mem, immI0 zero)
7192 %{
7193   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7194   match(Set mem (StoreCM mem zero));
7195 
7196   ins_cost(125); // XXX
7197   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7198   ins_encode %{
7199     __ movb($mem$$Address, r12);
7200   %}
7201   ins_pipe(ialu_mem_reg);
7202 %}
7203 
7204 instruct storeImmCM0(memory mem, immI0 src)
7205 %{
7206   match(Set mem (StoreCM mem src));
7207 
7208   ins_cost(150); // XXX
7209   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7210   opcode(0xC6); /* C6 /0 */
7211   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7212   ins_pipe(ialu_mem_imm);
7213 %}
7214 
7215 // Store Aligned Packed Single Float XMM register to memory
7216 instruct storeA2F(memory mem, regD src) %{
7217   match(Set mem (Store2F mem src));
7218   ins_cost(145);
7219   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7220   ins_encode( movq_st(mem, src));
7221   ins_pipe( pipe_slow );
7222 %}
7223 
7224 // Store Float
7225 instruct storeF(memory mem, regF src)
7226 %{
7227   match(Set mem (StoreF mem src));
7228 
7229   ins_cost(95); // XXX
7230   format %{ "movss   $mem, $src\t# float" %}
7231   opcode(0xF3, 0x0F, 0x11);
7232   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7233   ins_pipe(pipe_slow); // XXX
7234 %}
7235 
7236 // Store immediate Float value (it is faster than store from XMM register)
7237 instruct storeF0(memory mem, immF0 zero)
7238 %{
7239   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7240   match(Set mem (StoreF mem zero));
7241 
7242   ins_cost(25); // XXX
7243   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7244   ins_encode %{
7245     __ movl($mem$$Address, r12);
7246   %}
7247   ins_pipe(ialu_mem_reg);
7248 %}
7249 
7250 instruct storeF_imm(memory mem, immF src)
7251 %{
7252   match(Set mem (StoreF mem src));
7253 
7254   ins_cost(50);
7255   format %{ "movl    $mem, $src\t# float" %}
7256   opcode(0xC7); /* C7 /0 */
7257   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7258   ins_pipe(ialu_mem_imm);
7259 %}
7260 
7261 // Store Double
7262 instruct storeD(memory mem, regD src)
7263 %{
7264   match(Set mem (StoreD mem src));
7265 
7266   ins_cost(95); // XXX
7267   format %{ "movsd   $mem, $src\t# double" %}
7268   opcode(0xF2, 0x0F, 0x11);
7269   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7270   ins_pipe(pipe_slow); // XXX
7271 %}
7272 
7273 // Store immediate double 0.0 (it is faster than store from XMM register)
7274 instruct storeD0_imm(memory mem, immD0 src)
7275 %{
7276   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7277   match(Set mem (StoreD mem src));
7278 
7279   ins_cost(50);
7280   format %{ "movq    $mem, $src\t# double 0." %}
7281   opcode(0xC7); /* C7 /0 */
7282   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7283   ins_pipe(ialu_mem_imm);
7284 %}
7285 
7286 instruct storeD0(memory mem, immD0 zero)
7287 %{
7288   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7289   match(Set mem (StoreD mem zero));
7290 
7291   ins_cost(25); // XXX
7292   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7293   ins_encode %{
7294     __ movq($mem$$Address, r12);
7295   %}
7296   ins_pipe(ialu_mem_reg);
7297 %}
7298 
7299 instruct storeSSI(stackSlotI dst, rRegI src)
7300 %{
7301   match(Set dst src);
7302 
7303   ins_cost(100);
7304   format %{ "movl    $dst, $src\t# int stk" %}
7305   opcode(0x89);
7306   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7307   ins_pipe( ialu_mem_reg );
7308 %}
7309 
7310 instruct storeSSL(stackSlotL dst, rRegL src)
7311 %{
7312   match(Set dst src);
7313 
7314   ins_cost(100);
7315   format %{ "movq    $dst, $src\t# long stk" %}
7316   opcode(0x89);
7317   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7318   ins_pipe(ialu_mem_reg);
7319 %}
7320 
7321 instruct storeSSP(stackSlotP dst, rRegP src)
7322 %{
7323   match(Set dst src);
7324 
7325   ins_cost(100);
7326   format %{ "movq    $dst, $src\t# ptr stk" %}
7327   opcode(0x89);
7328   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7329   ins_pipe(ialu_mem_reg);
7330 %}
7331 
7332 instruct storeSSF(stackSlotF dst, regF src)
7333 %{
7334   match(Set dst src);
7335 
7336   ins_cost(95); // XXX
7337   format %{ "movss   $dst, $src\t# float stk" %}
7338   opcode(0xF3, 0x0F, 0x11);
7339   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7340   ins_pipe(pipe_slow); // XXX
7341 %}
7342 
7343 instruct storeSSD(stackSlotD dst, regD src)
7344 %{
7345   match(Set dst src);
7346 
7347   ins_cost(95); // XXX
7348   format %{ "movsd   $dst, $src\t# double stk" %}
7349   opcode(0xF2, 0x0F, 0x11);
7350   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7351   ins_pipe(pipe_slow); // XXX
7352 %}
7353 
7354 //----------BSWAP Instructions-------------------------------------------------
7355 instruct bytes_reverse_int(rRegI dst) %{
7356   match(Set dst (ReverseBytesI dst));
7357 
7358   format %{ "bswapl  $dst" %}
7359   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7360   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7361   ins_pipe( ialu_reg );
7362 %}
7363 
7364 instruct bytes_reverse_long(rRegL dst) %{
7365   match(Set dst (ReverseBytesL dst));
7366 
7367   format %{ "bswapq  $dst" %}
7368 
7369   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7370   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7371   ins_pipe( ialu_reg);
7372 %}
7373 
7374 instruct loadI_reversed(rRegI dst, memory src) %{
7375   match(Set dst (ReverseBytesI (LoadI src)));
7376 
7377   format %{ "bswap_movl $dst, $src" %}
7378   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7379   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7380   ins_pipe( ialu_reg_mem );
7381 %}
7382 
7383 instruct loadL_reversed(rRegL dst, memory src) %{
7384   match(Set dst (ReverseBytesL (LoadL src)));
7385 
7386   format %{ "bswap_movq $dst, $src" %}
7387   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7388   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7389   ins_pipe( ialu_reg_mem );
7390 %}
7391 
7392 instruct storeI_reversed(memory dst, rRegI src) %{
7393   match(Set dst (StoreI dst (ReverseBytesI  src)));
7394 
7395   format %{ "movl_bswap $dst, $src" %}
7396   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7397   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7398   ins_pipe( ialu_mem_reg );
7399 %}
7400 
7401 instruct storeL_reversed(memory dst, rRegL src) %{
7402   match(Set dst (StoreL dst (ReverseBytesL  src)));
7403 
7404   format %{ "movq_bswap $dst, $src" %}
7405   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7406   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7407   ins_pipe( ialu_mem_reg );
7408 %}
7409 
7410 
7411 //---------- Zeros Count Instructions ------------------------------------------
7412 
7413 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7414   predicate(UseCountLeadingZerosInstruction);
7415   match(Set dst (CountLeadingZerosI src));
7416   effect(KILL cr);
7417 
7418   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7419   ins_encode %{
7420     __ lzcntl($dst$$Register, $src$$Register);
7421   %}
7422   ins_pipe(ialu_reg);
7423 %}
7424 
7425 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7426   predicate(!UseCountLeadingZerosInstruction);
7427   match(Set dst (CountLeadingZerosI src));
7428   effect(KILL cr);
7429 
7430   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7431             "jnz     skip\n\t"
7432             "movl    $dst, -1\n"
7433       "skip:\n\t"
7434             "negl    $dst\n\t"
7435             "addl    $dst, 31" %}
7436   ins_encode %{
7437     Register Rdst = $dst$$Register;
7438     Register Rsrc = $src$$Register;
7439     Label skip;
7440     __ bsrl(Rdst, Rsrc);
7441     __ jccb(Assembler::notZero, skip);
7442     __ movl(Rdst, -1);
7443     __ bind(skip);
7444     __ negl(Rdst);
7445     __ addl(Rdst, BitsPerInt - 1);
7446   %}
7447   ins_pipe(ialu_reg);
7448 %}
7449 
7450 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7451   predicate(UseCountLeadingZerosInstruction);
7452   match(Set dst (CountLeadingZerosL src));
7453   effect(KILL cr);
7454 
7455   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7456   ins_encode %{
7457     __ lzcntq($dst$$Register, $src$$Register);
7458   %}
7459   ins_pipe(ialu_reg);
7460 %}
7461 
7462 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7463   predicate(!UseCountLeadingZerosInstruction);
7464   match(Set dst (CountLeadingZerosL src));
7465   effect(KILL cr);
7466 
7467   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7468             "jnz     skip\n\t"
7469             "movl    $dst, -1\n"
7470       "skip:\n\t"
7471             "negl    $dst\n\t"
7472             "addl    $dst, 63" %}
7473   ins_encode %{
7474     Register Rdst = $dst$$Register;
7475     Register Rsrc = $src$$Register;
7476     Label skip;
7477     __ bsrq(Rdst, Rsrc);
7478     __ jccb(Assembler::notZero, skip);
7479     __ movl(Rdst, -1);
7480     __ bind(skip);
7481     __ negl(Rdst);
7482     __ addl(Rdst, BitsPerLong - 1);
7483   %}
7484   ins_pipe(ialu_reg);
7485 %}
7486 
7487 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7488   match(Set dst (CountTrailingZerosI src));
7489   effect(KILL cr);
7490 
7491   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7492             "jnz     done\n\t"
7493             "movl    $dst, 32\n"
7494       "done:" %}
7495   ins_encode %{
7496     Register Rdst = $dst$$Register;
7497     Label done;
7498     __ bsfl(Rdst, $src$$Register);
7499     __ jccb(Assembler::notZero, done);
7500     __ movl(Rdst, BitsPerInt);
7501     __ bind(done);
7502   %}
7503   ins_pipe(ialu_reg);
7504 %}
7505 
7506 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7507   match(Set dst (CountTrailingZerosL src));
7508   effect(KILL cr);
7509 
7510   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7511             "jnz     done\n\t"
7512             "movl    $dst, 64\n"
7513       "done:" %}
7514   ins_encode %{
7515     Register Rdst = $dst$$Register;
7516     Label done;
7517     __ bsfq(Rdst, $src$$Register);
7518     __ jccb(Assembler::notZero, done);
7519     __ movl(Rdst, BitsPerLong);
7520     __ bind(done);
7521   %}
7522   ins_pipe(ialu_reg);
7523 %}
7524 
7525 
7526 //---------- Population Count Instructions -------------------------------------
7527 
7528 instruct popCountI(rRegI dst, rRegI src) %{
7529   predicate(UsePopCountInstruction);
7530   match(Set dst (PopCountI src));
7531 
7532   format %{ "popcnt  $dst, $src" %}
7533   ins_encode %{
7534     __ popcntl($dst$$Register, $src$$Register);
7535   %}
7536   ins_pipe(ialu_reg);
7537 %}
7538 
7539 instruct popCountI_mem(rRegI dst, memory mem) %{
7540   predicate(UsePopCountInstruction);
7541   match(Set dst (PopCountI (LoadI mem)));
7542 
7543   format %{ "popcnt  $dst, $mem" %}
7544   ins_encode %{
7545     __ popcntl($dst$$Register, $mem$$Address);
7546   %}
7547   ins_pipe(ialu_reg);
7548 %}
7549 
7550 // Note: Long.bitCount(long) returns an int.
7551 instruct popCountL(rRegI dst, rRegL src) %{
7552   predicate(UsePopCountInstruction);
7553   match(Set dst (PopCountL src));
7554 
7555   format %{ "popcnt  $dst, $src" %}
7556   ins_encode %{
7557     __ popcntq($dst$$Register, $src$$Register);
7558   %}
7559   ins_pipe(ialu_reg);
7560 %}
7561 
7562 // Note: Long.bitCount(long) returns an int.
7563 instruct popCountL_mem(rRegI dst, memory mem) %{
7564   predicate(UsePopCountInstruction);
7565   match(Set dst (PopCountL (LoadL mem)));
7566 
7567   format %{ "popcnt  $dst, $mem" %}
7568   ins_encode %{
7569     __ popcntq($dst$$Register, $mem$$Address);
7570   %}
7571   ins_pipe(ialu_reg);
7572 %}
7573 
7574 
7575 //----------MemBar Instructions-----------------------------------------------
7576 // Memory barrier flavors
7577 
7578 instruct membar_acquire()
7579 %{
7580   match(MemBarAcquire);
7581   ins_cost(0);
7582 
7583   size(0);
7584   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7585   ins_encode();
7586   ins_pipe(empty);
7587 %}
7588 
7589 instruct membar_acquire_lock()
7590 %{
7591   match(MemBarAcquire);
7592   predicate(Matcher::prior_fast_lock(n));
7593   ins_cost(0);
7594 
7595   size(0);
7596   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7597   ins_encode();
7598   ins_pipe(empty);
7599 %}
7600 
7601 instruct membar_release()
7602 %{
7603   match(MemBarRelease);
7604   ins_cost(0);
7605 
7606   size(0);
7607   format %{ "MEMBAR-release ! (empty encoding)" %}
7608   ins_encode();
7609   ins_pipe(empty);
7610 %}
7611 
7612 instruct membar_release_lock()
7613 %{
7614   match(MemBarRelease);
7615   predicate(Matcher::post_fast_unlock(n));
7616   ins_cost(0);
7617 
7618   size(0);
7619   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7620   ins_encode();
7621   ins_pipe(empty);
7622 %}
7623 
7624 instruct membar_volatile(rFlagsReg cr) %{
7625   match(MemBarVolatile);
7626   effect(KILL cr);
7627   ins_cost(400);
7628 
7629   format %{ 
7630     $$template
7631     if (os::is_MP()) {
7632       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7633     } else {
7634       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7635     }
7636   %}
7637   ins_encode %{
7638     __ membar(Assembler::StoreLoad);
7639   %}
7640   ins_pipe(pipe_slow);
7641 %}
7642 
7643 instruct unnecessary_membar_volatile()
7644 %{
7645   match(MemBarVolatile);
7646   predicate(Matcher::post_store_load_barrier(n));
7647   ins_cost(0);
7648 
7649   size(0);
7650   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7651   ins_encode();
7652   ins_pipe(empty);
7653 %}
7654 
7655 //----------Move Instructions--------------------------------------------------
7656 
7657 instruct castX2P(rRegP dst, rRegL src)
7658 %{
7659   match(Set dst (CastX2P src));
7660 
7661   format %{ "movq    $dst, $src\t# long->ptr" %}
7662   ins_encode(enc_copy_wide(dst, src));
7663   ins_pipe(ialu_reg_reg); // XXX
7664 %}
7665 
7666 instruct castP2X(rRegL dst, rRegP src)
7667 %{
7668   match(Set dst (CastP2X src));
7669 
7670   format %{ "movq    $dst, $src\t# ptr -> long" %}
7671   ins_encode(enc_copy_wide(dst, src));
7672   ins_pipe(ialu_reg_reg); // XXX
7673 %}
7674 
7675 
7676 // Convert oop pointer into compressed form
7677 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7678   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7679   match(Set dst (EncodeP src));
7680   effect(KILL cr);
7681   format %{ "encode_heap_oop $dst,$src" %}
7682   ins_encode %{
7683     Register s = $src$$Register;
7684     Register d = $dst$$Register;
7685     if (s != d) {
7686       __ movq(d, s);
7687     }
7688     __ encode_heap_oop(d);
7689   %}
7690   ins_pipe(ialu_reg_long);
7691 %}
7692 
7693 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7694   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7695   match(Set dst (EncodeP src));
7696   effect(KILL cr);
7697   format %{ "encode_heap_oop_not_null $dst,$src" %}
7698   ins_encode %{
7699     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7700   %}
7701   ins_pipe(ialu_reg_long);
7702 %}
7703 
7704 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7705   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7706             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7707   match(Set dst (DecodeN src));
7708   effect(KILL cr);
7709   format %{ "decode_heap_oop $dst,$src" %}
7710   ins_encode %{
7711     Register s = $src$$Register;
7712     Register d = $dst$$Register;
7713     if (s != d) {
7714       __ movq(d, s);
7715     }
7716     __ decode_heap_oop(d);
7717   %}
7718   ins_pipe(ialu_reg_long);
7719 %}
7720 
7721 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7722   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7723             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7724   match(Set dst (DecodeN src));
7725   format %{ "decode_heap_oop_not_null $dst,$src" %}
7726   ins_encode %{
7727     Register s = $src$$Register;
7728     Register d = $dst$$Register;
7729     if (s != d) {
7730       __ decode_heap_oop_not_null(d, s);
7731     } else {
7732       __ decode_heap_oop_not_null(d);
7733     }
7734   %}
7735   ins_pipe(ialu_reg_long);
7736 %}
7737 
7738 
7739 //----------Conditional Move---------------------------------------------------
7740 // Jump
7741 // dummy instruction for generating temp registers
7742 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7743   match(Jump (LShiftL switch_val shift));
7744   ins_cost(350);
7745   predicate(false);
7746   effect(TEMP dest);
7747 
7748   format %{ "leaq    $dest, table_base\n\t"
7749             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7750   ins_encode(jump_enc_offset(switch_val, shift, dest));
7751   ins_pipe(pipe_jmp);
7752   ins_pc_relative(1);
7753 %}
7754 
7755 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7756   match(Jump (AddL (LShiftL switch_val shift) offset));
7757   ins_cost(350);
7758   effect(TEMP dest);
7759 
7760   format %{ "leaq    $dest, table_base\n\t"
7761             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7762   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7763   ins_pipe(pipe_jmp);
7764   ins_pc_relative(1);
7765 %}
7766 
7767 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7768   match(Jump switch_val);
7769   ins_cost(350);
7770   effect(TEMP dest);
7771 
7772   format %{ "leaq    $dest, table_base\n\t"
7773             "jmp     [$dest + $switch_val]\n\t" %}
7774   ins_encode(jump_enc(switch_val, dest));
7775   ins_pipe(pipe_jmp);
7776   ins_pc_relative(1);
7777 %}
7778 
7779 // Conditional move
7780 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7781 %{
7782   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7783 
7784   ins_cost(200); // XXX
7785   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7786   opcode(0x0F, 0x40);
7787   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7788   ins_pipe(pipe_cmov_reg);
7789 %}
7790 
7791 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7792   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7793 
7794   ins_cost(200); // XXX
7795   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7796   opcode(0x0F, 0x40);
7797   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7798   ins_pipe(pipe_cmov_reg);
7799 %}
7800 
7801 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7802   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7803   ins_cost(200);
7804   expand %{
7805     cmovI_regU(cop, cr, dst, src);
7806   %}
7807 %}
7808 
7809 // Conditional move
7810 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7811   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7812 
7813   ins_cost(250); // XXX
7814   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7815   opcode(0x0F, 0x40);
7816   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7817   ins_pipe(pipe_cmov_mem);
7818 %}
7819 
7820 // Conditional move
7821 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7822 %{
7823   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7824 
7825   ins_cost(250); // XXX
7826   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7827   opcode(0x0F, 0x40);
7828   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7829   ins_pipe(pipe_cmov_mem);
7830 %}
7831 
7832 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7833   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7834   ins_cost(250);
7835   expand %{
7836     cmovI_memU(cop, cr, dst, src);
7837   %}
7838 %}
7839 
7840 // Conditional move
7841 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7842 %{
7843   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7844 
7845   ins_cost(200); // XXX
7846   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7847   opcode(0x0F, 0x40);
7848   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7849   ins_pipe(pipe_cmov_reg);
7850 %}
7851 
7852 // Conditional move
7853 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7854 %{
7855   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7856 
7857   ins_cost(200); // XXX
7858   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7859   opcode(0x0F, 0x40);
7860   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7861   ins_pipe(pipe_cmov_reg);
7862 %}
7863 
7864 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7865   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7866   ins_cost(200);
7867   expand %{
7868     cmovN_regU(cop, cr, dst, src);
7869   %}
7870 %}
7871 
7872 // Conditional move
7873 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7874 %{
7875   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7876 
7877   ins_cost(200); // XXX
7878   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7879   opcode(0x0F, 0x40);
7880   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7881   ins_pipe(pipe_cmov_reg);  // XXX
7882 %}
7883 
7884 // Conditional move
7885 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7886 %{
7887   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7888 
7889   ins_cost(200); // XXX
7890   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7891   opcode(0x0F, 0x40);
7892   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7893   ins_pipe(pipe_cmov_reg); // XXX
7894 %}
7895 
7896 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7897   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7898   ins_cost(200);
7899   expand %{
7900     cmovP_regU(cop, cr, dst, src);
7901   %}
7902 %}
7903 
7904 // DISABLED: Requires the ADLC to emit a bottom_type call that
7905 // correctly meets the two pointer arguments; one is an incoming
7906 // register but the other is a memory operand.  ALSO appears to
7907 // be buggy with implicit null checks.
7908 //
7909 //// Conditional move
7910 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7911 //%{
7912 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7913 //  ins_cost(250);
7914 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7915 //  opcode(0x0F,0x40);
7916 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7917 //  ins_pipe( pipe_cmov_mem );
7918 //%}
7919 //
7920 //// Conditional move
7921 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7922 //%{
7923 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7924 //  ins_cost(250);
7925 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7926 //  opcode(0x0F,0x40);
7927 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7928 //  ins_pipe( pipe_cmov_mem );
7929 //%}
7930 
7931 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7932 %{
7933   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7934 
7935   ins_cost(200); // XXX
7936   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7937   opcode(0x0F, 0x40);
7938   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7939   ins_pipe(pipe_cmov_reg);  // XXX
7940 %}
7941 
7942 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7943 %{
7944   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7945 
7946   ins_cost(200); // XXX
7947   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7948   opcode(0x0F, 0x40);
7949   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7950   ins_pipe(pipe_cmov_mem);  // XXX
7951 %}
7952 
7953 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7954 %{
7955   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7956 
7957   ins_cost(200); // XXX
7958   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7959   opcode(0x0F, 0x40);
7960   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7961   ins_pipe(pipe_cmov_reg); // XXX
7962 %}
7963 
7964 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7965   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7966   ins_cost(200);
7967   expand %{
7968     cmovL_regU(cop, cr, dst, src);
7969   %}
7970 %}
7971 
7972 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7973 %{
7974   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7975 
7976   ins_cost(200); // XXX
7977   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7978   opcode(0x0F, 0x40);
7979   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7980   ins_pipe(pipe_cmov_mem); // XXX
7981 %}
7982 
7983 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7984   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7985   ins_cost(200);
7986   expand %{
7987     cmovL_memU(cop, cr, dst, src);
7988   %}
7989 %}
7990 
7991 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7992 %{
7993   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7994 
7995   ins_cost(200); // XXX
7996   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7997             "movss     $dst, $src\n"
7998     "skip:" %}
7999   ins_encode(enc_cmovf_branch(cop, dst, src));
8000   ins_pipe(pipe_slow);
8001 %}
8002 
8003 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
8004 // %{
8005 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
8006 
8007 //   ins_cost(200); // XXX
8008 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8009 //             "movss     $dst, $src\n"
8010 //     "skip:" %}
8011 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
8012 //   ins_pipe(pipe_slow);
8013 // %}
8014 
8015 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
8016 %{
8017   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8018 
8019   ins_cost(200); // XXX
8020   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
8021             "movss     $dst, $src\n"
8022     "skip:" %}
8023   ins_encode(enc_cmovf_branch(cop, dst, src));
8024   ins_pipe(pipe_slow);
8025 %}
8026 
8027 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
8028   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8029   ins_cost(200);
8030   expand %{
8031     cmovF_regU(cop, cr, dst, src);
8032   %}
8033 %}
8034 
8035 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
8036 %{
8037   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8038 
8039   ins_cost(200); // XXX
8040   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8041             "movsd     $dst, $src\n"
8042     "skip:" %}
8043   ins_encode(enc_cmovd_branch(cop, dst, src));
8044   ins_pipe(pipe_slow);
8045 %}
8046 
8047 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8048 %{
8049   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8050 
8051   ins_cost(200); // XXX
8052   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8053             "movsd     $dst, $src\n"
8054     "skip:" %}
8055   ins_encode(enc_cmovd_branch(cop, dst, src));
8056   ins_pipe(pipe_slow);
8057 %}
8058 
8059 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8060   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8061   ins_cost(200);
8062   expand %{
8063     cmovD_regU(cop, cr, dst, src);
8064   %}
8065 %}
8066 
8067 //----------Arithmetic Instructions--------------------------------------------
8068 //----------Addition Instructions----------------------------------------------
8069 
8070 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8071 %{
8072   match(Set dst (AddI dst src));
8073   effect(KILL cr);
8074 
8075   format %{ "addl    $dst, $src\t# int" %}
8076   opcode(0x03);
8077   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8078   ins_pipe(ialu_reg_reg);
8079 %}
8080 
8081 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8082 %{
8083   match(Set dst (AddI dst src));
8084   effect(KILL cr);
8085 
8086   format %{ "addl    $dst, $src\t# int" %}
8087   opcode(0x81, 0x00); /* /0 id */
8088   ins_encode(OpcSErm(dst, src), Con8or32(src));
8089   ins_pipe( ialu_reg );
8090 %}
8091 
8092 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8093 %{
8094   match(Set dst (AddI dst (LoadI src)));
8095   effect(KILL cr);
8096 
8097   ins_cost(125); // XXX
8098   format %{ "addl    $dst, $src\t# int" %}
8099   opcode(0x03);
8100   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8101   ins_pipe(ialu_reg_mem);
8102 %}
8103 
8104 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8105 %{
8106   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8107   effect(KILL cr);
8108 
8109   ins_cost(150); // XXX
8110   format %{ "addl    $dst, $src\t# int" %}
8111   opcode(0x01); /* Opcode 01 /r */
8112   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8113   ins_pipe(ialu_mem_reg);
8114 %}
8115 
8116 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8117 %{
8118   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8119   effect(KILL cr);
8120 
8121   ins_cost(125); // XXX
8122   format %{ "addl    $dst, $src\t# int" %}
8123   opcode(0x81); /* Opcode 81 /0 id */
8124   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8125   ins_pipe(ialu_mem_imm);
8126 %}
8127 
8128 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8129 %{
8130   predicate(UseIncDec);
8131   match(Set dst (AddI dst src));
8132   effect(KILL cr);
8133 
8134   format %{ "incl    $dst\t# int" %}
8135   opcode(0xFF, 0x00); // FF /0
8136   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8137   ins_pipe(ialu_reg);
8138 %}
8139 
8140 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8141 %{
8142   predicate(UseIncDec);
8143   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8144   effect(KILL cr);
8145 
8146   ins_cost(125); // XXX
8147   format %{ "incl    $dst\t# int" %}
8148   opcode(0xFF); /* Opcode FF /0 */
8149   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8150   ins_pipe(ialu_mem_imm);
8151 %}
8152 
8153 // XXX why does that use AddI
8154 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8155 %{
8156   predicate(UseIncDec);
8157   match(Set dst (AddI dst src));
8158   effect(KILL cr);
8159 
8160   format %{ "decl    $dst\t# int" %}
8161   opcode(0xFF, 0x01); // FF /1
8162   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8163   ins_pipe(ialu_reg);
8164 %}
8165 
8166 // XXX why does that use AddI
8167 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8168 %{
8169   predicate(UseIncDec);
8170   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8171   effect(KILL cr);
8172 
8173   ins_cost(125); // XXX
8174   format %{ "decl    $dst\t# int" %}
8175   opcode(0xFF); /* Opcode FF /1 */
8176   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8177   ins_pipe(ialu_mem_imm);
8178 %}
8179 
8180 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8181 %{
8182   match(Set dst (AddI src0 src1));
8183 
8184   ins_cost(110);
8185   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8186   opcode(0x8D); /* 0x8D /r */
8187   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8188   ins_pipe(ialu_reg_reg);
8189 %}
8190 
8191 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8192 %{
8193   match(Set dst (AddL dst src));
8194   effect(KILL cr);
8195 
8196   format %{ "addq    $dst, $src\t# long" %}
8197   opcode(0x03);
8198   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8199   ins_pipe(ialu_reg_reg);
8200 %}
8201 
8202 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8203 %{
8204   match(Set dst (AddL dst src));
8205   effect(KILL cr);
8206 
8207   format %{ "addq    $dst, $src\t# long" %}
8208   opcode(0x81, 0x00); /* /0 id */
8209   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8210   ins_pipe( ialu_reg );
8211 %}
8212 
8213 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8214 %{
8215   match(Set dst (AddL dst (LoadL src)));
8216   effect(KILL cr);
8217 
8218   ins_cost(125); // XXX
8219   format %{ "addq    $dst, $src\t# long" %}
8220   opcode(0x03);
8221   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8222   ins_pipe(ialu_reg_mem);
8223 %}
8224 
8225 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8226 %{
8227   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8228   effect(KILL cr);
8229 
8230   ins_cost(150); // XXX
8231   format %{ "addq    $dst, $src\t# long" %}
8232   opcode(0x01); /* Opcode 01 /r */
8233   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8234   ins_pipe(ialu_mem_reg);
8235 %}
8236 
8237 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8238 %{
8239   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8240   effect(KILL cr);
8241 
8242   ins_cost(125); // XXX
8243   format %{ "addq    $dst, $src\t# long" %}
8244   opcode(0x81); /* Opcode 81 /0 id */
8245   ins_encode(REX_mem_wide(dst),
8246              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8247   ins_pipe(ialu_mem_imm);
8248 %}
8249 
8250 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8251 %{
8252   predicate(UseIncDec);
8253   match(Set dst (AddL dst src));
8254   effect(KILL cr);
8255 
8256   format %{ "incq    $dst\t# long" %}
8257   opcode(0xFF, 0x00); // FF /0
8258   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8259   ins_pipe(ialu_reg);
8260 %}
8261 
8262 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8263 %{
8264   predicate(UseIncDec);
8265   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8266   effect(KILL cr);
8267 
8268   ins_cost(125); // XXX
8269   format %{ "incq    $dst\t# long" %}
8270   opcode(0xFF); /* Opcode FF /0 */
8271   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8272   ins_pipe(ialu_mem_imm);
8273 %}
8274 
8275 // XXX why does that use AddL
8276 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8277 %{
8278   predicate(UseIncDec);
8279   match(Set dst (AddL dst src));
8280   effect(KILL cr);
8281 
8282   format %{ "decq    $dst\t# long" %}
8283   opcode(0xFF, 0x01); // FF /1
8284   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8285   ins_pipe(ialu_reg);
8286 %}
8287 
8288 // XXX why does that use AddL
8289 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8290 %{
8291   predicate(UseIncDec);
8292   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8293   effect(KILL cr);
8294 
8295   ins_cost(125); // XXX
8296   format %{ "decq    $dst\t# long" %}
8297   opcode(0xFF); /* Opcode FF /1 */
8298   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8299   ins_pipe(ialu_mem_imm);
8300 %}
8301 
8302 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8303 %{
8304   match(Set dst (AddL src0 src1));
8305 
8306   ins_cost(110);
8307   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8308   opcode(0x8D); /* 0x8D /r */
8309   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8310   ins_pipe(ialu_reg_reg);
8311 %}
8312 
8313 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8314 %{
8315   match(Set dst (AddP dst src));
8316   effect(KILL cr);
8317 
8318   format %{ "addq    $dst, $src\t# ptr" %}
8319   opcode(0x03);
8320   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8321   ins_pipe(ialu_reg_reg);
8322 %}
8323 
8324 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8325 %{
8326   match(Set dst (AddP dst src));
8327   effect(KILL cr);
8328 
8329   format %{ "addq    $dst, $src\t# ptr" %}
8330   opcode(0x81, 0x00); /* /0 id */
8331   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8332   ins_pipe( ialu_reg );
8333 %}
8334 
8335 // XXX addP mem ops ????
8336 
8337 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8338 %{
8339   match(Set dst (AddP src0 src1));
8340 
8341   ins_cost(110);
8342   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8343   opcode(0x8D); /* 0x8D /r */
8344   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8345   ins_pipe(ialu_reg_reg);
8346 %}
8347 
8348 instruct checkCastPP(rRegP dst)
8349 %{
8350   match(Set dst (CheckCastPP dst));
8351 
8352   size(0);
8353   format %{ "# checkcastPP of $dst" %}
8354   ins_encode(/* empty encoding */);
8355   ins_pipe(empty);
8356 %}
8357 
8358 instruct castPP(rRegP dst)
8359 %{
8360   match(Set dst (CastPP dst));
8361 
8362   size(0);
8363   format %{ "# castPP of $dst" %}
8364   ins_encode(/* empty encoding */);
8365   ins_pipe(empty);
8366 %}
8367 
8368 instruct castII(rRegI dst)
8369 %{
8370   match(Set dst (CastII dst));
8371 
8372   size(0);
8373   format %{ "# castII of $dst" %}
8374   ins_encode(/* empty encoding */);
8375   ins_cost(0);
8376   ins_pipe(empty);
8377 %}
8378 
8379 // LoadP-locked same as a regular LoadP when used with compare-swap
8380 instruct loadPLocked(rRegP dst, memory mem)
8381 %{
8382   match(Set dst (LoadPLocked mem));
8383 
8384   ins_cost(125); // XXX
8385   format %{ "movq    $dst, $mem\t# ptr locked" %}
8386   opcode(0x8B);
8387   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8388   ins_pipe(ialu_reg_mem); // XXX
8389 %}
8390 
8391 // LoadL-locked - same as a regular LoadL when used with compare-swap
8392 instruct loadLLocked(rRegL dst, memory mem)
8393 %{
8394   match(Set dst (LoadLLocked mem));
8395 
8396   ins_cost(125); // XXX
8397   format %{ "movq    $dst, $mem\t# long locked" %}
8398   opcode(0x8B);
8399   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8400   ins_pipe(ialu_reg_mem); // XXX
8401 %}
8402 
8403 // Conditional-store of the updated heap-top.
8404 // Used during allocation of the shared heap.
8405 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8406 
8407 instruct storePConditional(memory heap_top_ptr,
8408                            rax_RegP oldval, rRegP newval,
8409                            rFlagsReg cr)
8410 %{
8411   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8412  
8413   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8414             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8415   opcode(0x0F, 0xB1);
8416   ins_encode(lock_prefix,
8417              REX_reg_mem_wide(newval, heap_top_ptr),
8418              OpcP, OpcS,
8419              reg_mem(newval, heap_top_ptr));
8420   ins_pipe(pipe_cmpxchg);
8421 %}
8422 
8423 // Conditional-store of an int value.
8424 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8425 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8426 %{
8427   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8428   effect(KILL oldval);
8429 
8430   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8431   opcode(0x0F, 0xB1);
8432   ins_encode(lock_prefix,
8433              REX_reg_mem(newval, mem),
8434              OpcP, OpcS,
8435              reg_mem(newval, mem));
8436   ins_pipe(pipe_cmpxchg);
8437 %}
8438 
8439 // Conditional-store of a long value.
8440 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8441 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8442 %{
8443   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8444   effect(KILL oldval);
8445 
8446   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8447   opcode(0x0F, 0xB1);
8448   ins_encode(lock_prefix,
8449              REX_reg_mem_wide(newval, mem),
8450              OpcP, OpcS,
8451              reg_mem(newval, mem));
8452   ins_pipe(pipe_cmpxchg);
8453 %}
8454 
8455 
8456 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8457 instruct compareAndSwapP(rRegI res,
8458                          memory mem_ptr,
8459                          rax_RegP oldval, rRegP newval,
8460                          rFlagsReg cr)
8461 %{
8462   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8463   effect(KILL cr, KILL oldval);
8464 
8465   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8466             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8467             "sete    $res\n\t"
8468             "movzbl  $res, $res" %}
8469   opcode(0x0F, 0xB1);
8470   ins_encode(lock_prefix,
8471              REX_reg_mem_wide(newval, mem_ptr),
8472              OpcP, OpcS,
8473              reg_mem(newval, mem_ptr),
8474              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8475              REX_reg_breg(res, res), // movzbl
8476              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8477   ins_pipe( pipe_cmpxchg );
8478 %}
8479 
8480 instruct compareAndSwapL(rRegI res,
8481                          memory mem_ptr,
8482                          rax_RegL oldval, rRegL newval,
8483                          rFlagsReg cr)
8484 %{
8485   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8486   effect(KILL cr, KILL oldval);
8487 
8488   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8489             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8490             "sete    $res\n\t"
8491             "movzbl  $res, $res" %}
8492   opcode(0x0F, 0xB1);
8493   ins_encode(lock_prefix,
8494              REX_reg_mem_wide(newval, mem_ptr),
8495              OpcP, OpcS,
8496              reg_mem(newval, mem_ptr),
8497              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8498              REX_reg_breg(res, res), // movzbl
8499              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8500   ins_pipe( pipe_cmpxchg );
8501 %}
8502 
8503 instruct compareAndSwapI(rRegI res,
8504                          memory mem_ptr,
8505                          rax_RegI oldval, rRegI newval,
8506                          rFlagsReg cr)
8507 %{
8508   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8509   effect(KILL cr, KILL oldval);
8510 
8511   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8512             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8513             "sete    $res\n\t"
8514             "movzbl  $res, $res" %}
8515   opcode(0x0F, 0xB1);
8516   ins_encode(lock_prefix,
8517              REX_reg_mem(newval, mem_ptr),
8518              OpcP, OpcS,
8519              reg_mem(newval, mem_ptr),
8520              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8521              REX_reg_breg(res, res), // movzbl
8522              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8523   ins_pipe( pipe_cmpxchg );
8524 %}
8525 
8526 
8527 instruct compareAndSwapN(rRegI res,
8528                           memory mem_ptr,
8529                           rax_RegN oldval, rRegN newval,
8530                           rFlagsReg cr) %{
8531   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8532   effect(KILL cr, KILL oldval);
8533 
8534   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8535             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8536             "sete    $res\n\t"
8537             "movzbl  $res, $res" %}
8538   opcode(0x0F, 0xB1);
8539   ins_encode(lock_prefix,
8540              REX_reg_mem(newval, mem_ptr),
8541              OpcP, OpcS,
8542              reg_mem(newval, mem_ptr),
8543              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8544              REX_reg_breg(res, res), // movzbl
8545              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8546   ins_pipe( pipe_cmpxchg );
8547 %}
8548 
8549 //----------Subtraction Instructions-------------------------------------------
8550 
8551 // Integer Subtraction Instructions
8552 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8553 %{
8554   match(Set dst (SubI dst src));
8555   effect(KILL cr);
8556 
8557   format %{ "subl    $dst, $src\t# int" %}
8558   opcode(0x2B);
8559   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8560   ins_pipe(ialu_reg_reg);
8561 %}
8562 
8563 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8564 %{
8565   match(Set dst (SubI dst src));
8566   effect(KILL cr);
8567 
8568   format %{ "subl    $dst, $src\t# int" %}
8569   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8570   ins_encode(OpcSErm(dst, src), Con8or32(src));
8571   ins_pipe(ialu_reg);
8572 %}
8573 
8574 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8575 %{
8576   match(Set dst (SubI dst (LoadI src)));
8577   effect(KILL cr);
8578 
8579   ins_cost(125);
8580   format %{ "subl    $dst, $src\t# int" %}
8581   opcode(0x2B);
8582   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8583   ins_pipe(ialu_reg_mem);
8584 %}
8585 
8586 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8587 %{
8588   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8589   effect(KILL cr);
8590 
8591   ins_cost(150);
8592   format %{ "subl    $dst, $src\t# int" %}
8593   opcode(0x29); /* Opcode 29 /r */
8594   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8595   ins_pipe(ialu_mem_reg);
8596 %}
8597 
8598 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8599 %{
8600   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8601   effect(KILL cr);
8602 
8603   ins_cost(125); // XXX
8604   format %{ "subl    $dst, $src\t# int" %}
8605   opcode(0x81); /* Opcode 81 /5 id */
8606   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8607   ins_pipe(ialu_mem_imm);
8608 %}
8609 
8610 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8611 %{
8612   match(Set dst (SubL dst src));
8613   effect(KILL cr);
8614 
8615   format %{ "subq    $dst, $src\t# long" %}
8616   opcode(0x2B);
8617   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8618   ins_pipe(ialu_reg_reg);
8619 %}
8620 
8621 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8622 %{
8623   match(Set dst (SubL dst src));
8624   effect(KILL cr);
8625 
8626   format %{ "subq    $dst, $src\t# long" %}
8627   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8628   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8629   ins_pipe(ialu_reg);
8630 %}
8631 
8632 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8633 %{
8634   match(Set dst (SubL dst (LoadL src)));
8635   effect(KILL cr);
8636 
8637   ins_cost(125);
8638   format %{ "subq    $dst, $src\t# long" %}
8639   opcode(0x2B);
8640   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8641   ins_pipe(ialu_reg_mem);
8642 %}
8643 
8644 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8645 %{
8646   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8647   effect(KILL cr);
8648 
8649   ins_cost(150);
8650   format %{ "subq    $dst, $src\t# long" %}
8651   opcode(0x29); /* Opcode 29 /r */
8652   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8653   ins_pipe(ialu_mem_reg);
8654 %}
8655 
8656 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8657 %{
8658   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8659   effect(KILL cr);
8660 
8661   ins_cost(125); // XXX
8662   format %{ "subq    $dst, $src\t# long" %}
8663   opcode(0x81); /* Opcode 81 /5 id */
8664   ins_encode(REX_mem_wide(dst),
8665              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8666   ins_pipe(ialu_mem_imm);
8667 %}
8668 
8669 // Subtract from a pointer
8670 // XXX hmpf???
8671 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8672 %{
8673   match(Set dst (AddP dst (SubI zero src)));
8674   effect(KILL cr);
8675 
8676   format %{ "subq    $dst, $src\t# ptr - int" %}
8677   opcode(0x2B);
8678   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8679   ins_pipe(ialu_reg_reg);
8680 %}
8681 
8682 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8683 %{
8684   match(Set dst (SubI zero dst));
8685   effect(KILL cr);
8686 
8687   format %{ "negl    $dst\t# int" %}
8688   opcode(0xF7, 0x03);  // Opcode F7 /3
8689   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8690   ins_pipe(ialu_reg);
8691 %}
8692 
8693 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8694 %{
8695   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8696   effect(KILL cr);
8697 
8698   format %{ "negl    $dst\t# int" %}
8699   opcode(0xF7, 0x03);  // Opcode F7 /3
8700   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8701   ins_pipe(ialu_reg);
8702 %}
8703 
8704 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8705 %{
8706   match(Set dst (SubL zero dst));
8707   effect(KILL cr);
8708 
8709   format %{ "negq    $dst\t# long" %}
8710   opcode(0xF7, 0x03);  // Opcode F7 /3
8711   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8712   ins_pipe(ialu_reg);
8713 %}
8714 
8715 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8716 %{
8717   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8718   effect(KILL cr);
8719 
8720   format %{ "negq    $dst\t# long" %}
8721   opcode(0xF7, 0x03);  // Opcode F7 /3
8722   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8723   ins_pipe(ialu_reg);
8724 %}
8725 
8726 
8727 //----------Multiplication/Division Instructions-------------------------------
8728 // Integer Multiplication Instructions
8729 // Multiply Register
8730 
8731 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8732 %{
8733   match(Set dst (MulI dst src));
8734   effect(KILL cr);
8735 
8736   ins_cost(300);
8737   format %{ "imull   $dst, $src\t# int" %}
8738   opcode(0x0F, 0xAF);
8739   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8740   ins_pipe(ialu_reg_reg_alu0);
8741 %}
8742 
8743 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8744 %{
8745   match(Set dst (MulI src imm));
8746   effect(KILL cr);
8747 
8748   ins_cost(300);
8749   format %{ "imull   $dst, $src, $imm\t# int" %}
8750   opcode(0x69); /* 69 /r id */
8751   ins_encode(REX_reg_reg(dst, src),
8752              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8753   ins_pipe(ialu_reg_reg_alu0);
8754 %}
8755 
8756 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8757 %{
8758   match(Set dst (MulI dst (LoadI src)));
8759   effect(KILL cr);
8760 
8761   ins_cost(350);
8762   format %{ "imull   $dst, $src\t# int" %}
8763   opcode(0x0F, 0xAF);
8764   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8765   ins_pipe(ialu_reg_mem_alu0);
8766 %}
8767 
8768 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8769 %{
8770   match(Set dst (MulI (LoadI src) imm));
8771   effect(KILL cr);
8772 
8773   ins_cost(300);
8774   format %{ "imull   $dst, $src, $imm\t# int" %}
8775   opcode(0x69); /* 69 /r id */
8776   ins_encode(REX_reg_mem(dst, src),
8777              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8778   ins_pipe(ialu_reg_mem_alu0);
8779 %}
8780 
8781 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8782 %{
8783   match(Set dst (MulL dst src));
8784   effect(KILL cr);
8785 
8786   ins_cost(300);
8787   format %{ "imulq   $dst, $src\t# long" %}
8788   opcode(0x0F, 0xAF);
8789   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8790   ins_pipe(ialu_reg_reg_alu0);
8791 %}
8792 
8793 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8794 %{
8795   match(Set dst (MulL src imm));
8796   effect(KILL cr);
8797 
8798   ins_cost(300);
8799   format %{ "imulq   $dst, $src, $imm\t# long" %}
8800   opcode(0x69); /* 69 /r id */
8801   ins_encode(REX_reg_reg_wide(dst, src),
8802              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8803   ins_pipe(ialu_reg_reg_alu0);
8804 %}
8805 
8806 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8807 %{
8808   match(Set dst (MulL dst (LoadL src)));
8809   effect(KILL cr);
8810 
8811   ins_cost(350);
8812   format %{ "imulq   $dst, $src\t# long" %}
8813   opcode(0x0F, 0xAF);
8814   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8815   ins_pipe(ialu_reg_mem_alu0);
8816 %}
8817 
8818 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8819 %{
8820   match(Set dst (MulL (LoadL src) imm));
8821   effect(KILL cr);
8822 
8823   ins_cost(300);
8824   format %{ "imulq   $dst, $src, $imm\t# long" %}
8825   opcode(0x69); /* 69 /r id */
8826   ins_encode(REX_reg_mem_wide(dst, src),
8827              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8828   ins_pipe(ialu_reg_mem_alu0);
8829 %}
8830 
8831 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8832 %{
8833   match(Set dst (MulHiL src rax));
8834   effect(USE_KILL rax, KILL cr);
8835 
8836   ins_cost(300);
8837   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8838   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8839   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8840   ins_pipe(ialu_reg_reg_alu0);
8841 %}
8842 
8843 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8844                    rFlagsReg cr)
8845 %{
8846   match(Set rax (DivI rax div));
8847   effect(KILL rdx, KILL cr);
8848 
8849   ins_cost(30*100+10*100); // XXX
8850   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8851             "jne,s   normal\n\t"
8852             "xorl    rdx, rdx\n\t"
8853             "cmpl    $div, -1\n\t"
8854             "je,s    done\n"
8855     "normal: cdql\n\t"
8856             "idivl   $div\n"
8857     "done:"        %}
8858   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8859   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8860   ins_pipe(ialu_reg_reg_alu0);
8861 %}
8862 
8863 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8864                    rFlagsReg cr)
8865 %{
8866   match(Set rax (DivL rax div));
8867   effect(KILL rdx, KILL cr);
8868 
8869   ins_cost(30*100+10*100); // XXX
8870   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8871             "cmpq    rax, rdx\n\t"
8872             "jne,s   normal\n\t"
8873             "xorl    rdx, rdx\n\t"
8874             "cmpq    $div, -1\n\t"
8875             "je,s    done\n"
8876     "normal: cdqq\n\t"
8877             "idivq   $div\n"
8878     "done:"        %}
8879   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8880   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8881   ins_pipe(ialu_reg_reg_alu0);
8882 %}
8883 
8884 // Integer DIVMOD with Register, both quotient and mod results
8885 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8886                              rFlagsReg cr)
8887 %{
8888   match(DivModI rax div);
8889   effect(KILL cr);
8890 
8891   ins_cost(30*100+10*100); // XXX
8892   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8893             "jne,s   normal\n\t"
8894             "xorl    rdx, rdx\n\t"
8895             "cmpl    $div, -1\n\t"
8896             "je,s    done\n"
8897     "normal: cdql\n\t"
8898             "idivl   $div\n"
8899     "done:"        %}
8900   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8901   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8902   ins_pipe(pipe_slow);
8903 %}
8904 
8905 // Long DIVMOD with Register, both quotient and mod results
8906 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8907                              rFlagsReg cr)
8908 %{
8909   match(DivModL rax div);
8910   effect(KILL cr);
8911 
8912   ins_cost(30*100+10*100); // XXX
8913   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8914             "cmpq    rax, rdx\n\t"
8915             "jne,s   normal\n\t"
8916             "xorl    rdx, rdx\n\t"
8917             "cmpq    $div, -1\n\t"
8918             "je,s    done\n"
8919     "normal: cdqq\n\t"
8920             "idivq   $div\n"
8921     "done:"        %}
8922   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8923   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8924   ins_pipe(pipe_slow);
8925 %}
8926 
8927 //----------- DivL-By-Constant-Expansions--------------------------------------
8928 // DivI cases are handled by the compiler
8929 
8930 // Magic constant, reciprocal of 10
8931 instruct loadConL_0x6666666666666667(rRegL dst)
8932 %{
8933   effect(DEF dst);
8934 
8935   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8936   ins_encode(load_immL(dst, 0x6666666666666667));
8937   ins_pipe(ialu_reg);
8938 %}
8939 
8940 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8941 %{
8942   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8943 
8944   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8945   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8946   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8947   ins_pipe(ialu_reg_reg_alu0);
8948 %}
8949 
8950 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8951 %{
8952   effect(USE_DEF dst, KILL cr);
8953 
8954   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8955   opcode(0xC1, 0x7); /* C1 /7 ib */
8956   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8957   ins_pipe(ialu_reg);
8958 %}
8959 
8960 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8961 %{
8962   effect(USE_DEF dst, KILL cr);
8963 
8964   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8965   opcode(0xC1, 0x7); /* C1 /7 ib */
8966   ins_encode(reg_opc_imm_wide(dst, 0x2));
8967   ins_pipe(ialu_reg);
8968 %}
8969 
8970 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8971 %{
8972   match(Set dst (DivL src div));
8973 
8974   ins_cost((5+8)*100);
8975   expand %{
8976     rax_RegL rax;                     // Killed temp
8977     rFlagsReg cr;                     // Killed
8978     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8979     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8980     sarL_rReg_63(src, cr);            // sarq  src, 63
8981     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8982     subL_rReg(dst, src, cr);          // subl  rdx, src
8983   %}
8984 %}
8985 
8986 //-----------------------------------------------------------------------------
8987 
8988 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8989                    rFlagsReg cr)
8990 %{
8991   match(Set rdx (ModI rax div));
8992   effect(KILL rax, KILL cr);
8993 
8994   ins_cost(300); // XXX
8995   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8996             "jne,s   normal\n\t"
8997             "xorl    rdx, rdx\n\t"
8998             "cmpl    $div, -1\n\t"
8999             "je,s    done\n"
9000     "normal: cdql\n\t"
9001             "idivl   $div\n"
9002     "done:"        %}
9003   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9004   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9005   ins_pipe(ialu_reg_reg_alu0);
9006 %}
9007 
9008 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
9009                    rFlagsReg cr)
9010 %{
9011   match(Set rdx (ModL rax div));
9012   effect(KILL rax, KILL cr);
9013 
9014   ins_cost(300); // XXX
9015   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
9016             "cmpq    rax, rdx\n\t"
9017             "jne,s   normal\n\t"
9018             "xorl    rdx, rdx\n\t"
9019             "cmpq    $div, -1\n\t"
9020             "je,s    done\n"
9021     "normal: cdqq\n\t"
9022             "idivq   $div\n"
9023     "done:"        %}
9024   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9025   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9026   ins_pipe(ialu_reg_reg_alu0);
9027 %}
9028 
9029 // Integer Shift Instructions
9030 // Shift Left by one
9031 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9032 %{
9033   match(Set dst (LShiftI dst shift));
9034   effect(KILL cr);
9035 
9036   format %{ "sall    $dst, $shift" %}
9037   opcode(0xD1, 0x4); /* D1 /4 */
9038   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9039   ins_pipe(ialu_reg);
9040 %}
9041 
9042 // Shift Left by one
9043 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9044 %{
9045   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9046   effect(KILL cr);
9047 
9048   format %{ "sall    $dst, $shift\t" %}
9049   opcode(0xD1, 0x4); /* D1 /4 */
9050   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9051   ins_pipe(ialu_mem_imm);
9052 %}
9053 
9054 // Shift Left by 8-bit immediate
9055 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9056 %{
9057   match(Set dst (LShiftI dst shift));
9058   effect(KILL cr);
9059 
9060   format %{ "sall    $dst, $shift" %}
9061   opcode(0xC1, 0x4); /* C1 /4 ib */
9062   ins_encode(reg_opc_imm(dst, shift));
9063   ins_pipe(ialu_reg);
9064 %}
9065 
9066 // Shift Left by 8-bit immediate
9067 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9068 %{
9069   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9070   effect(KILL cr);
9071 
9072   format %{ "sall    $dst, $shift" %}
9073   opcode(0xC1, 0x4); /* C1 /4 ib */
9074   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9075   ins_pipe(ialu_mem_imm);
9076 %}
9077 
9078 // Shift Left by variable
9079 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9080 %{
9081   match(Set dst (LShiftI dst shift));
9082   effect(KILL cr);
9083 
9084   format %{ "sall    $dst, $shift" %}
9085   opcode(0xD3, 0x4); /* D3 /4 */
9086   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9087   ins_pipe(ialu_reg_reg);
9088 %}
9089 
9090 // Shift Left by variable
9091 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9092 %{
9093   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9094   effect(KILL cr);
9095 
9096   format %{ "sall    $dst, $shift" %}
9097   opcode(0xD3, 0x4); /* D3 /4 */
9098   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9099   ins_pipe(ialu_mem_reg);
9100 %}
9101 
9102 // Arithmetic shift right by one
9103 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9104 %{
9105   match(Set dst (RShiftI dst shift));
9106   effect(KILL cr);
9107 
9108   format %{ "sarl    $dst, $shift" %}
9109   opcode(0xD1, 0x7); /* D1 /7 */
9110   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9111   ins_pipe(ialu_reg);
9112 %}
9113 
9114 // Arithmetic shift right by one
9115 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9116 %{
9117   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9118   effect(KILL cr);
9119 
9120   format %{ "sarl    $dst, $shift" %}
9121   opcode(0xD1, 0x7); /* D1 /7 */
9122   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9123   ins_pipe(ialu_mem_imm);
9124 %}
9125 
9126 // Arithmetic Shift Right by 8-bit immediate
9127 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9128 %{
9129   match(Set dst (RShiftI dst shift));
9130   effect(KILL cr);
9131 
9132   format %{ "sarl    $dst, $shift" %}
9133   opcode(0xC1, 0x7); /* C1 /7 ib */
9134   ins_encode(reg_opc_imm(dst, shift));
9135   ins_pipe(ialu_mem_imm);
9136 %}
9137 
9138 // Arithmetic Shift Right by 8-bit immediate
9139 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9140 %{
9141   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9142   effect(KILL cr);
9143 
9144   format %{ "sarl    $dst, $shift" %}
9145   opcode(0xC1, 0x7); /* C1 /7 ib */
9146   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9147   ins_pipe(ialu_mem_imm);
9148 %}
9149 
9150 // Arithmetic Shift Right by variable
9151 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9152 %{
9153   match(Set dst (RShiftI dst shift));
9154   effect(KILL cr);
9155 
9156   format %{ "sarl    $dst, $shift" %}
9157   opcode(0xD3, 0x7); /* D3 /7 */
9158   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9159   ins_pipe(ialu_reg_reg);
9160 %}
9161 
9162 // Arithmetic Shift Right by variable
9163 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9164 %{
9165   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9166   effect(KILL cr);
9167 
9168   format %{ "sarl    $dst, $shift" %}
9169   opcode(0xD3, 0x7); /* D3 /7 */
9170   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9171   ins_pipe(ialu_mem_reg);
9172 %}
9173 
9174 // Logical shift right by one
9175 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9176 %{
9177   match(Set dst (URShiftI dst shift));
9178   effect(KILL cr);
9179 
9180   format %{ "shrl    $dst, $shift" %}
9181   opcode(0xD1, 0x5); /* D1 /5 */
9182   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9183   ins_pipe(ialu_reg);
9184 %}
9185 
9186 // Logical shift right by one
9187 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9188 %{
9189   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9190   effect(KILL cr);
9191 
9192   format %{ "shrl    $dst, $shift" %}
9193   opcode(0xD1, 0x5); /* D1 /5 */
9194   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9195   ins_pipe(ialu_mem_imm);
9196 %}
9197 
9198 // Logical Shift Right by 8-bit immediate
9199 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9200 %{
9201   match(Set dst (URShiftI dst shift));
9202   effect(KILL cr);
9203 
9204   format %{ "shrl    $dst, $shift" %}
9205   opcode(0xC1, 0x5); /* C1 /5 ib */
9206   ins_encode(reg_opc_imm(dst, shift));
9207   ins_pipe(ialu_reg);
9208 %}
9209 
9210 // Logical Shift Right by 8-bit immediate
9211 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9212 %{
9213   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9214   effect(KILL cr);
9215 
9216   format %{ "shrl    $dst, $shift" %}
9217   opcode(0xC1, 0x5); /* C1 /5 ib */
9218   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9219   ins_pipe(ialu_mem_imm);
9220 %}
9221 
9222 // Logical Shift Right by variable
9223 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9224 %{
9225   match(Set dst (URShiftI dst shift));
9226   effect(KILL cr);
9227 
9228   format %{ "shrl    $dst, $shift" %}
9229   opcode(0xD3, 0x5); /* D3 /5 */
9230   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9231   ins_pipe(ialu_reg_reg);
9232 %}
9233 
9234 // Logical Shift Right by variable
9235 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9236 %{
9237   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9238   effect(KILL cr);
9239 
9240   format %{ "shrl    $dst, $shift" %}
9241   opcode(0xD3, 0x5); /* D3 /5 */
9242   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9243   ins_pipe(ialu_mem_reg);
9244 %}
9245 
9246 // Long Shift Instructions
9247 // Shift Left by one
9248 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9249 %{
9250   match(Set dst (LShiftL dst shift));
9251   effect(KILL cr);
9252 
9253   format %{ "salq    $dst, $shift" %}
9254   opcode(0xD1, 0x4); /* D1 /4 */
9255   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9256   ins_pipe(ialu_reg);
9257 %}
9258 
9259 // Shift Left by one
9260 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9261 %{
9262   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9263   effect(KILL cr);
9264 
9265   format %{ "salq    $dst, $shift" %}
9266   opcode(0xD1, 0x4); /* D1 /4 */
9267   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9268   ins_pipe(ialu_mem_imm);
9269 %}
9270 
9271 // Shift Left by 8-bit immediate
9272 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9273 %{
9274   match(Set dst (LShiftL dst shift));
9275   effect(KILL cr);
9276 
9277   format %{ "salq    $dst, $shift" %}
9278   opcode(0xC1, 0x4); /* C1 /4 ib */
9279   ins_encode(reg_opc_imm_wide(dst, shift));
9280   ins_pipe(ialu_reg);
9281 %}
9282 
9283 // Shift Left by 8-bit immediate
9284 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9285 %{
9286   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9287   effect(KILL cr);
9288 
9289   format %{ "salq    $dst, $shift" %}
9290   opcode(0xC1, 0x4); /* C1 /4 ib */
9291   ins_encode(REX_mem_wide(dst), OpcP,
9292              RM_opc_mem(secondary, dst), Con8or32(shift));
9293   ins_pipe(ialu_mem_imm);
9294 %}
9295 
9296 // Shift Left by variable
9297 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9298 %{
9299   match(Set dst (LShiftL dst shift));
9300   effect(KILL cr);
9301 
9302   format %{ "salq    $dst, $shift" %}
9303   opcode(0xD3, 0x4); /* D3 /4 */
9304   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9305   ins_pipe(ialu_reg_reg);
9306 %}
9307 
9308 // Shift Left by variable
9309 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9310 %{
9311   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9312   effect(KILL cr);
9313 
9314   format %{ "salq    $dst, $shift" %}
9315   opcode(0xD3, 0x4); /* D3 /4 */
9316   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9317   ins_pipe(ialu_mem_reg);
9318 %}
9319 
9320 // Arithmetic shift right by one
9321 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9322 %{
9323   match(Set dst (RShiftL dst shift));
9324   effect(KILL cr);
9325 
9326   format %{ "sarq    $dst, $shift" %}
9327   opcode(0xD1, 0x7); /* D1 /7 */
9328   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9329   ins_pipe(ialu_reg);
9330 %}
9331 
9332 // Arithmetic shift right by one
9333 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9334 %{
9335   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9336   effect(KILL cr);
9337 
9338   format %{ "sarq    $dst, $shift" %}
9339   opcode(0xD1, 0x7); /* D1 /7 */
9340   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9341   ins_pipe(ialu_mem_imm);
9342 %}
9343 
9344 // Arithmetic Shift Right by 8-bit immediate
9345 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9346 %{
9347   match(Set dst (RShiftL dst shift));
9348   effect(KILL cr);
9349 
9350   format %{ "sarq    $dst, $shift" %}
9351   opcode(0xC1, 0x7); /* C1 /7 ib */
9352   ins_encode(reg_opc_imm_wide(dst, shift));
9353   ins_pipe(ialu_mem_imm);
9354 %}
9355 
9356 // Arithmetic Shift Right by 8-bit immediate
9357 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9358 %{
9359   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9360   effect(KILL cr);
9361 
9362   format %{ "sarq    $dst, $shift" %}
9363   opcode(0xC1, 0x7); /* C1 /7 ib */
9364   ins_encode(REX_mem_wide(dst), OpcP,
9365              RM_opc_mem(secondary, dst), Con8or32(shift));
9366   ins_pipe(ialu_mem_imm);
9367 %}
9368 
9369 // Arithmetic Shift Right by variable
9370 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9371 %{
9372   match(Set dst (RShiftL dst shift));
9373   effect(KILL cr);
9374 
9375   format %{ "sarq    $dst, $shift" %}
9376   opcode(0xD3, 0x7); /* D3 /7 */
9377   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9378   ins_pipe(ialu_reg_reg);
9379 %}
9380 
9381 // Arithmetic Shift Right by variable
9382 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9383 %{
9384   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9385   effect(KILL cr);
9386 
9387   format %{ "sarq    $dst, $shift" %}
9388   opcode(0xD3, 0x7); /* D3 /7 */
9389   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9390   ins_pipe(ialu_mem_reg);
9391 %}
9392 
9393 // Logical shift right by one
9394 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9395 %{
9396   match(Set dst (URShiftL dst shift));
9397   effect(KILL cr);
9398 
9399   format %{ "shrq    $dst, $shift" %}
9400   opcode(0xD1, 0x5); /* D1 /5 */
9401   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9402   ins_pipe(ialu_reg);
9403 %}
9404 
9405 // Logical shift right by one
9406 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9407 %{
9408   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9409   effect(KILL cr);
9410 
9411   format %{ "shrq    $dst, $shift" %}
9412   opcode(0xD1, 0x5); /* D1 /5 */
9413   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9414   ins_pipe(ialu_mem_imm);
9415 %}
9416 
9417 // Logical Shift Right by 8-bit immediate
9418 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9419 %{
9420   match(Set dst (URShiftL dst shift));
9421   effect(KILL cr);
9422 
9423   format %{ "shrq    $dst, $shift" %}
9424   opcode(0xC1, 0x5); /* C1 /5 ib */
9425   ins_encode(reg_opc_imm_wide(dst, shift));
9426   ins_pipe(ialu_reg);
9427 %}
9428 
9429 
9430 // Logical Shift Right by 8-bit immediate
9431 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9432 %{
9433   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9434   effect(KILL cr);
9435 
9436   format %{ "shrq    $dst, $shift" %}
9437   opcode(0xC1, 0x5); /* C1 /5 ib */
9438   ins_encode(REX_mem_wide(dst), OpcP,
9439              RM_opc_mem(secondary, dst), Con8or32(shift));
9440   ins_pipe(ialu_mem_imm);
9441 %}
9442 
9443 // Logical Shift Right by variable
9444 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9445 %{
9446   match(Set dst (URShiftL dst shift));
9447   effect(KILL cr);
9448 
9449   format %{ "shrq    $dst, $shift" %}
9450   opcode(0xD3, 0x5); /* D3 /5 */
9451   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9452   ins_pipe(ialu_reg_reg);
9453 %}
9454 
9455 // Logical Shift Right by variable
9456 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9457 %{
9458   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9459   effect(KILL cr);
9460 
9461   format %{ "shrq    $dst, $shift" %}
9462   opcode(0xD3, 0x5); /* D3 /5 */
9463   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9464   ins_pipe(ialu_mem_reg);
9465 %}
9466 
9467 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9468 // This idiom is used by the compiler for the i2b bytecode.
9469 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9470 %{
9471   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9472 
9473   format %{ "movsbl  $dst, $src\t# i2b" %}
9474   opcode(0x0F, 0xBE);
9475   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9476   ins_pipe(ialu_reg_reg);
9477 %}
9478 
9479 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9480 // This idiom is used by the compiler the i2s bytecode.
9481 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9482 %{
9483   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9484 
9485   format %{ "movswl  $dst, $src\t# i2s" %}
9486   opcode(0x0F, 0xBF);
9487   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9488   ins_pipe(ialu_reg_reg);
9489 %}
9490 
9491 // ROL/ROR instructions
9492 
9493 // ROL expand
9494 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9495   effect(KILL cr, USE_DEF dst);
9496 
9497   format %{ "roll    $dst" %}
9498   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9499   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9500   ins_pipe(ialu_reg);
9501 %}
9502 
9503 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9504   effect(USE_DEF dst, USE shift, KILL cr);
9505 
9506   format %{ "roll    $dst, $shift" %}
9507   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9508   ins_encode( reg_opc_imm(dst, shift) );
9509   ins_pipe(ialu_reg);
9510 %}
9511 
9512 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9513 %{
9514   effect(USE_DEF dst, USE shift, KILL cr);
9515 
9516   format %{ "roll    $dst, $shift" %}
9517   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9518   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9519   ins_pipe(ialu_reg_reg);
9520 %}
9521 // end of ROL expand
9522 
9523 // Rotate Left by one
9524 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9525 %{
9526   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9527 
9528   expand %{
9529     rolI_rReg_imm1(dst, cr);
9530   %}
9531 %}
9532 
9533 // Rotate Left by 8-bit immediate
9534 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9535 %{
9536   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9537   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9538 
9539   expand %{
9540     rolI_rReg_imm8(dst, lshift, cr);
9541   %}
9542 %}
9543 
9544 // Rotate Left by variable
9545 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9546 %{
9547   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9548 
9549   expand %{
9550     rolI_rReg_CL(dst, shift, cr);
9551   %}
9552 %}
9553 
9554 // Rotate Left by variable
9555 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9556 %{
9557   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9558 
9559   expand %{
9560     rolI_rReg_CL(dst, shift, cr);
9561   %}
9562 %}
9563 
9564 // ROR expand
9565 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9566 %{
9567   effect(USE_DEF dst, KILL cr);
9568 
9569   format %{ "rorl    $dst" %}
9570   opcode(0xD1, 0x1); /* D1 /1 */
9571   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9572   ins_pipe(ialu_reg);
9573 %}
9574 
9575 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9576 %{
9577   effect(USE_DEF dst, USE shift, KILL cr);
9578 
9579   format %{ "rorl    $dst, $shift" %}
9580   opcode(0xC1, 0x1); /* C1 /1 ib */
9581   ins_encode(reg_opc_imm(dst, shift));
9582   ins_pipe(ialu_reg);
9583 %}
9584 
9585 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9586 %{
9587   effect(USE_DEF dst, USE shift, KILL cr);
9588 
9589   format %{ "rorl    $dst, $shift" %}
9590   opcode(0xD3, 0x1); /* D3 /1 */
9591   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9592   ins_pipe(ialu_reg_reg);
9593 %}
9594 // end of ROR expand
9595 
9596 // Rotate Right by one
9597 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9598 %{
9599   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9600 
9601   expand %{
9602     rorI_rReg_imm1(dst, cr);
9603   %}
9604 %}
9605 
9606 // Rotate Right by 8-bit immediate
9607 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9608 %{
9609   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9610   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9611 
9612   expand %{
9613     rorI_rReg_imm8(dst, rshift, cr);
9614   %}
9615 %}
9616 
9617 // Rotate Right by variable
9618 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9619 %{
9620   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9621 
9622   expand %{
9623     rorI_rReg_CL(dst, shift, cr);
9624   %}
9625 %}
9626 
9627 // Rotate Right by variable
9628 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9629 %{
9630   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9631 
9632   expand %{
9633     rorI_rReg_CL(dst, shift, cr);
9634   %}
9635 %}
9636 
9637 // for long rotate
9638 // ROL expand
9639 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9640   effect(USE_DEF dst, KILL cr);
9641 
9642   format %{ "rolq    $dst" %}
9643   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9644   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9645   ins_pipe(ialu_reg);
9646 %}
9647 
9648 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9649   effect(USE_DEF dst, USE shift, KILL cr);
9650 
9651   format %{ "rolq    $dst, $shift" %}
9652   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9653   ins_encode( reg_opc_imm_wide(dst, shift) );
9654   ins_pipe(ialu_reg);
9655 %}
9656 
9657 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9658 %{
9659   effect(USE_DEF dst, USE shift, KILL cr);
9660 
9661   format %{ "rolq    $dst, $shift" %}
9662   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9663   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9664   ins_pipe(ialu_reg_reg);
9665 %}
9666 // end of ROL expand
9667 
9668 // Rotate Left by one
9669 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9670 %{
9671   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9672 
9673   expand %{
9674     rolL_rReg_imm1(dst, cr);
9675   %}
9676 %}
9677 
9678 // Rotate Left by 8-bit immediate
9679 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9680 %{
9681   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9682   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9683 
9684   expand %{
9685     rolL_rReg_imm8(dst, lshift, cr);
9686   %}
9687 %}
9688 
9689 // Rotate Left by variable
9690 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9691 %{
9692   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9693 
9694   expand %{
9695     rolL_rReg_CL(dst, shift, cr);
9696   %}
9697 %}
9698 
9699 // Rotate Left by variable
9700 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9701 %{
9702   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9703 
9704   expand %{
9705     rolL_rReg_CL(dst, shift, cr);
9706   %}
9707 %}
9708 
9709 // ROR expand
9710 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9711 %{
9712   effect(USE_DEF dst, KILL cr);
9713 
9714   format %{ "rorq    $dst" %}
9715   opcode(0xD1, 0x1); /* D1 /1 */
9716   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9717   ins_pipe(ialu_reg);
9718 %}
9719 
9720 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9721 %{
9722   effect(USE_DEF dst, USE shift, KILL cr);
9723 
9724   format %{ "rorq    $dst, $shift" %}
9725   opcode(0xC1, 0x1); /* C1 /1 ib */
9726   ins_encode(reg_opc_imm_wide(dst, shift));
9727   ins_pipe(ialu_reg);
9728 %}
9729 
9730 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9731 %{
9732   effect(USE_DEF dst, USE shift, KILL cr);
9733 
9734   format %{ "rorq    $dst, $shift" %}
9735   opcode(0xD3, 0x1); /* D3 /1 */
9736   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9737   ins_pipe(ialu_reg_reg);
9738 %}
9739 // end of ROR expand
9740 
9741 // Rotate Right by one
9742 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9743 %{
9744   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9745 
9746   expand %{
9747     rorL_rReg_imm1(dst, cr);
9748   %}
9749 %}
9750 
9751 // Rotate Right by 8-bit immediate
9752 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9753 %{
9754   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9755   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9756 
9757   expand %{
9758     rorL_rReg_imm8(dst, rshift, cr);
9759   %}
9760 %}
9761 
9762 // Rotate Right by variable
9763 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9764 %{
9765   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9766 
9767   expand %{
9768     rorL_rReg_CL(dst, shift, cr);
9769   %}
9770 %}
9771 
9772 // Rotate Right by variable
9773 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9774 %{
9775   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9776 
9777   expand %{
9778     rorL_rReg_CL(dst, shift, cr);
9779   %}
9780 %}
9781 
9782 // Logical Instructions
9783 
9784 // Integer Logical Instructions
9785 
9786 // And Instructions
9787 // And Register with Register
9788 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9789 %{
9790   match(Set dst (AndI dst src));
9791   effect(KILL cr);
9792 
9793   format %{ "andl    $dst, $src\t# int" %}
9794   opcode(0x23);
9795   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9796   ins_pipe(ialu_reg_reg);
9797 %}
9798 
9799 // And Register with Immediate 255
9800 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9801 %{
9802   match(Set dst (AndI dst src));
9803 
9804   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9805   opcode(0x0F, 0xB6);
9806   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9807   ins_pipe(ialu_reg);
9808 %}
9809 
9810 // And Register with Immediate 255 and promote to long
9811 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9812 %{
9813   match(Set dst (ConvI2L (AndI src mask)));
9814 
9815   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9816   opcode(0x0F, 0xB6);
9817   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9818   ins_pipe(ialu_reg);
9819 %}
9820 
9821 // And Register with Immediate 65535
9822 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9823 %{
9824   match(Set dst (AndI dst src));
9825 
9826   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9827   opcode(0x0F, 0xB7);
9828   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9829   ins_pipe(ialu_reg);
9830 %}
9831 
9832 // And Register with Immediate 65535 and promote to long
9833 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9834 %{
9835   match(Set dst (ConvI2L (AndI src mask)));
9836 
9837   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9838   opcode(0x0F, 0xB7);
9839   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9840   ins_pipe(ialu_reg);
9841 %}
9842 
9843 // And Register with Immediate
9844 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9845 %{
9846   match(Set dst (AndI dst src));
9847   effect(KILL cr);
9848 
9849   format %{ "andl    $dst, $src\t# int" %}
9850   opcode(0x81, 0x04); /* Opcode 81 /4 */
9851   ins_encode(OpcSErm(dst, src), Con8or32(src));
9852   ins_pipe(ialu_reg);
9853 %}
9854 
9855 // And Register with Memory
9856 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9857 %{
9858   match(Set dst (AndI dst (LoadI src)));
9859   effect(KILL cr);
9860 
9861   ins_cost(125);
9862   format %{ "andl    $dst, $src\t# int" %}
9863   opcode(0x23);
9864   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9865   ins_pipe(ialu_reg_mem);
9866 %}
9867 
9868 // And Memory with Register
9869 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9870 %{
9871   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9872   effect(KILL cr);
9873 
9874   ins_cost(150);
9875   format %{ "andl    $dst, $src\t# int" %}
9876   opcode(0x21); /* Opcode 21 /r */
9877   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9878   ins_pipe(ialu_mem_reg);
9879 %}
9880 
9881 // And Memory with Immediate
9882 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9883 %{
9884   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9885   effect(KILL cr);
9886 
9887   ins_cost(125);
9888   format %{ "andl    $dst, $src\t# int" %}
9889   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9890   ins_encode(REX_mem(dst), OpcSE(src),
9891              RM_opc_mem(secondary, dst), Con8or32(src));
9892   ins_pipe(ialu_mem_imm);
9893 %}
9894 
9895 // Or Instructions
9896 // Or Register with Register
9897 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9898 %{
9899   match(Set dst (OrI dst src));
9900   effect(KILL cr);
9901 
9902   format %{ "orl     $dst, $src\t# int" %}
9903   opcode(0x0B);
9904   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9905   ins_pipe(ialu_reg_reg);
9906 %}
9907 
9908 // Or Register with Immediate
9909 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9910 %{
9911   match(Set dst (OrI dst src));
9912   effect(KILL cr);
9913 
9914   format %{ "orl     $dst, $src\t# int" %}
9915   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9916   ins_encode(OpcSErm(dst, src), Con8or32(src));
9917   ins_pipe(ialu_reg);
9918 %}
9919 
9920 // Or Register with Memory
9921 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9922 %{
9923   match(Set dst (OrI dst (LoadI src)));
9924   effect(KILL cr);
9925 
9926   ins_cost(125);
9927   format %{ "orl     $dst, $src\t# int" %}
9928   opcode(0x0B);
9929   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9930   ins_pipe(ialu_reg_mem);
9931 %}
9932 
9933 // Or Memory with Register
9934 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9935 %{
9936   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9937   effect(KILL cr);
9938 
9939   ins_cost(150);
9940   format %{ "orl     $dst, $src\t# int" %}
9941   opcode(0x09); /* Opcode 09 /r */
9942   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9943   ins_pipe(ialu_mem_reg);
9944 %}
9945 
9946 // Or Memory with Immediate
9947 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9948 %{
9949   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9950   effect(KILL cr);
9951 
9952   ins_cost(125);
9953   format %{ "orl     $dst, $src\t# int" %}
9954   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9955   ins_encode(REX_mem(dst), OpcSE(src),
9956              RM_opc_mem(secondary, dst), Con8or32(src));
9957   ins_pipe(ialu_mem_imm);
9958 %}
9959 
9960 // Xor Instructions
9961 // Xor Register with Register
9962 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9963 %{
9964   match(Set dst (XorI dst src));
9965   effect(KILL cr);
9966 
9967   format %{ "xorl    $dst, $src\t# int" %}
9968   opcode(0x33);
9969   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9970   ins_pipe(ialu_reg_reg);
9971 %}
9972 
9973 // Xor Register with Immediate -1
9974 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9975   match(Set dst (XorI dst imm));  
9976 
9977   format %{ "not    $dst" %}  
9978   ins_encode %{
9979      __ notl($dst$$Register);
9980   %}
9981   ins_pipe(ialu_reg);
9982 %}
9983 
9984 // Xor Register with Immediate
9985 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9986 %{
9987   match(Set dst (XorI dst src));
9988   effect(KILL cr);
9989 
9990   format %{ "xorl    $dst, $src\t# int" %}
9991   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9992   ins_encode(OpcSErm(dst, src), Con8or32(src));
9993   ins_pipe(ialu_reg);
9994 %}
9995 
9996 // Xor Register with Memory
9997 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9998 %{
9999   match(Set dst (XorI dst (LoadI src)));
10000   effect(KILL cr);
10001 
10002   ins_cost(125);
10003   format %{ "xorl    $dst, $src\t# int" %}
10004   opcode(0x33);
10005   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10006   ins_pipe(ialu_reg_mem);
10007 %}
10008 
10009 // Xor Memory with Register
10010 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10011 %{
10012   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10013   effect(KILL cr);
10014 
10015   ins_cost(150);
10016   format %{ "xorl    $dst, $src\t# int" %}
10017   opcode(0x31); /* Opcode 31 /r */
10018   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10019   ins_pipe(ialu_mem_reg);
10020 %}
10021 
10022 // Xor Memory with Immediate
10023 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10024 %{
10025   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10026   effect(KILL cr);
10027 
10028   ins_cost(125);
10029   format %{ "xorl    $dst, $src\t# int" %}
10030   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10031   ins_encode(REX_mem(dst), OpcSE(src),
10032              RM_opc_mem(secondary, dst), Con8or32(src));
10033   ins_pipe(ialu_mem_imm);
10034 %}
10035 
10036 
10037 // Long Logical Instructions
10038 
10039 // And Instructions
10040 // And Register with Register
10041 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10042 %{
10043   match(Set dst (AndL dst src));
10044   effect(KILL cr);
10045 
10046   format %{ "andq    $dst, $src\t# long" %}
10047   opcode(0x23);
10048   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10049   ins_pipe(ialu_reg_reg);
10050 %}
10051 
10052 // And Register with Immediate 255
10053 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10054 %{
10055   match(Set dst (AndL dst src));
10056 
10057   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10058   opcode(0x0F, 0xB6);
10059   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10060   ins_pipe(ialu_reg);
10061 %}
10062 
10063 // And Register with Immediate 65535
10064 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10065 %{
10066   match(Set dst (AndL dst src));
10067 
10068   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10069   opcode(0x0F, 0xB7);
10070   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10071   ins_pipe(ialu_reg);
10072 %}
10073 
10074 // And Register with Immediate
10075 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10076 %{
10077   match(Set dst (AndL dst src));
10078   effect(KILL cr);
10079 
10080   format %{ "andq    $dst, $src\t# long" %}
10081   opcode(0x81, 0x04); /* Opcode 81 /4 */
10082   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10083   ins_pipe(ialu_reg);
10084 %}
10085 
10086 // And Register with Memory
10087 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10088 %{
10089   match(Set dst (AndL dst (LoadL src)));
10090   effect(KILL cr);
10091 
10092   ins_cost(125);
10093   format %{ "andq    $dst, $src\t# long" %}
10094   opcode(0x23);
10095   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10096   ins_pipe(ialu_reg_mem);
10097 %}
10098 
10099 // And Memory with Register
10100 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10101 %{
10102   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10103   effect(KILL cr);
10104 
10105   ins_cost(150);
10106   format %{ "andq    $dst, $src\t# long" %}
10107   opcode(0x21); /* Opcode 21 /r */
10108   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10109   ins_pipe(ialu_mem_reg);
10110 %}
10111 
10112 // And Memory with Immediate
10113 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10114 %{
10115   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10116   effect(KILL cr);
10117 
10118   ins_cost(125);
10119   format %{ "andq    $dst, $src\t# long" %}
10120   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10121   ins_encode(REX_mem_wide(dst), OpcSE(src),
10122              RM_opc_mem(secondary, dst), Con8or32(src));
10123   ins_pipe(ialu_mem_imm);
10124 %}
10125 
10126 // Or Instructions
10127 // Or Register with Register
10128 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10129 %{
10130   match(Set dst (OrL dst src));
10131   effect(KILL cr);
10132 
10133   format %{ "orq     $dst, $src\t# long" %}
10134   opcode(0x0B);
10135   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10136   ins_pipe(ialu_reg_reg);
10137 %}
10138 
10139 // Use any_RegP to match R15 (TLS register) without spilling.
10140 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10141   match(Set dst (OrL dst (CastP2X src)));
10142   effect(KILL cr);
10143 
10144   format %{ "orq     $dst, $src\t# long" %}
10145   opcode(0x0B);
10146   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10147   ins_pipe(ialu_reg_reg);
10148 %}
10149 
10150 
10151 // Or Register with Immediate
10152 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10153 %{
10154   match(Set dst (OrL dst src));
10155   effect(KILL cr);
10156 
10157   format %{ "orq     $dst, $src\t# long" %}
10158   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10159   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10160   ins_pipe(ialu_reg);
10161 %}
10162 
10163 // Or Register with Memory
10164 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10165 %{
10166   match(Set dst (OrL dst (LoadL src)));
10167   effect(KILL cr);
10168 
10169   ins_cost(125);
10170   format %{ "orq     $dst, $src\t# long" %}
10171   opcode(0x0B);
10172   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10173   ins_pipe(ialu_reg_mem);
10174 %}
10175 
10176 // Or Memory with Register
10177 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10178 %{
10179   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10180   effect(KILL cr);
10181 
10182   ins_cost(150);
10183   format %{ "orq     $dst, $src\t# long" %}
10184   opcode(0x09); /* Opcode 09 /r */
10185   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10186   ins_pipe(ialu_mem_reg);
10187 %}
10188 
10189 // Or Memory with Immediate
10190 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10191 %{
10192   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10193   effect(KILL cr);
10194 
10195   ins_cost(125);
10196   format %{ "orq     $dst, $src\t# long" %}
10197   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10198   ins_encode(REX_mem_wide(dst), OpcSE(src),
10199              RM_opc_mem(secondary, dst), Con8or32(src));
10200   ins_pipe(ialu_mem_imm);
10201 %}
10202 
10203 // Xor Instructions
10204 // Xor Register with Register
10205 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10206 %{
10207   match(Set dst (XorL dst src));
10208   effect(KILL cr);
10209 
10210   format %{ "xorq    $dst, $src\t# long" %}
10211   opcode(0x33);
10212   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10213   ins_pipe(ialu_reg_reg);
10214 %}
10215 
10216 // Xor Register with Immediate -1
10217 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10218   match(Set dst (XorL dst imm));  
10219 
10220   format %{ "notq   $dst" %}  
10221   ins_encode %{
10222      __ notq($dst$$Register);
10223   %}
10224   ins_pipe(ialu_reg);
10225 %}
10226 
10227 // Xor Register with Immediate
10228 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10229 %{
10230   match(Set dst (XorL dst src));
10231   effect(KILL cr);
10232 
10233   format %{ "xorq    $dst, $src\t# long" %}
10234   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10235   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10236   ins_pipe(ialu_reg);
10237 %}
10238 
10239 // Xor Register with Memory
10240 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10241 %{
10242   match(Set dst (XorL dst (LoadL src)));
10243   effect(KILL cr);
10244 
10245   ins_cost(125);
10246   format %{ "xorq    $dst, $src\t# long" %}
10247   opcode(0x33);
10248   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10249   ins_pipe(ialu_reg_mem);
10250 %}
10251 
10252 // Xor Memory with Register
10253 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10254 %{
10255   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10256   effect(KILL cr);
10257 
10258   ins_cost(150);
10259   format %{ "xorq    $dst, $src\t# long" %}
10260   opcode(0x31); /* Opcode 31 /r */
10261   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10262   ins_pipe(ialu_mem_reg);
10263 %}
10264 
10265 // Xor Memory with Immediate
10266 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10267 %{
10268   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10269   effect(KILL cr);
10270 
10271   ins_cost(125);
10272   format %{ "xorq    $dst, $src\t# long" %}
10273   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10274   ins_encode(REX_mem_wide(dst), OpcSE(src),
10275              RM_opc_mem(secondary, dst), Con8or32(src));
10276   ins_pipe(ialu_mem_imm);
10277 %}
10278 
10279 // Convert Int to Boolean
10280 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10281 %{
10282   match(Set dst (Conv2B src));
10283   effect(KILL cr);
10284 
10285   format %{ "testl   $src, $src\t# ci2b\n\t"
10286             "setnz   $dst\n\t"
10287             "movzbl  $dst, $dst" %}
10288   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10289              setNZ_reg(dst),
10290              REX_reg_breg(dst, dst), // movzbl
10291              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10292   ins_pipe(pipe_slow); // XXX
10293 %}
10294 
10295 // Convert Pointer to Boolean
10296 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10297 %{
10298   match(Set dst (Conv2B src));
10299   effect(KILL cr);
10300 
10301   format %{ "testq   $src, $src\t# cp2b\n\t"
10302             "setnz   $dst\n\t"
10303             "movzbl  $dst, $dst" %}
10304   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10305              setNZ_reg(dst),
10306              REX_reg_breg(dst, dst), // movzbl
10307              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10308   ins_pipe(pipe_slow); // XXX
10309 %}
10310 
10311 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10312 %{
10313   match(Set dst (CmpLTMask p q));
10314   effect(KILL cr);
10315 
10316   ins_cost(400); // XXX
10317   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10318             "setlt   $dst\n\t"
10319             "movzbl  $dst, $dst\n\t"
10320             "negl    $dst" %}
10321   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10322              setLT_reg(dst),
10323              REX_reg_breg(dst, dst), // movzbl
10324              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10325              neg_reg(dst));
10326   ins_pipe(pipe_slow);
10327 %}
10328 
10329 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10330 %{
10331   match(Set dst (CmpLTMask dst zero));
10332   effect(KILL cr);
10333 
10334   ins_cost(100); // XXX
10335   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10336   opcode(0xC1, 0x7);  /* C1 /7 ib */
10337   ins_encode(reg_opc_imm(dst, 0x1F));
10338   ins_pipe(ialu_reg);
10339 %}
10340 
10341 
10342 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10343                          rRegI tmp,
10344                          rFlagsReg cr)
10345 %{
10346   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10347   effect(TEMP tmp, KILL cr);
10348 
10349   ins_cost(400); // XXX
10350   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10351             "sbbl    $tmp, $tmp\n\t"
10352             "andl    $tmp, $y\n\t"
10353             "addl    $p, $tmp" %}
10354   ins_encode(enc_cmpLTP(p, q, y, tmp));
10355   ins_pipe(pipe_cmplt);
10356 %}
10357 
10358 /* If I enable this, I encourage spilling in the inner loop of compress.
10359 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10360 %{
10361   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10362   effect( TEMP tmp, KILL cr );
10363   ins_cost(400);
10364 
10365   format %{ "SUB    $p,$q\n\t"
10366             "SBB    RCX,RCX\n\t"
10367             "AND    RCX,$y\n\t"
10368             "ADD    $p,RCX" %}
10369   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10370 %}
10371 */
10372 
10373 //---------- FP Instructions------------------------------------------------
10374 
10375 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10376 %{
10377   match(Set cr (CmpF src1 src2));
10378 
10379   ins_cost(145);
10380   format %{ "ucomiss $src1, $src2\n\t"
10381             "jnp,s   exit\n\t"
10382             "pushfq\t# saw NaN, set CF\n\t"
10383             "andq    [rsp], #0xffffff2b\n\t"
10384             "popfq\n"
10385     "exit:   nop\t# avoid branch to branch" %}
10386   opcode(0x0F, 0x2E);
10387   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10388              cmpfp_fixup);
10389   ins_pipe(pipe_slow);
10390 %}
10391 
10392 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10393   match(Set cr (CmpF src1 src2));
10394 
10395   ins_cost(145);
10396   format %{ "ucomiss $src1, $src2" %}
10397   ins_encode %{
10398     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10399   %}
10400   ins_pipe(pipe_slow);
10401 %}
10402 
10403 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10404 %{
10405   match(Set cr (CmpF src1 (LoadF src2)));
10406 
10407   ins_cost(145);
10408   format %{ "ucomiss $src1, $src2\n\t"
10409             "jnp,s   exit\n\t"
10410             "pushfq\t# saw NaN, set CF\n\t"
10411             "andq    [rsp], #0xffffff2b\n\t"
10412             "popfq\n"
10413     "exit:   nop\t# avoid branch to branch" %}
10414   opcode(0x0F, 0x2E);
10415   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10416              cmpfp_fixup);
10417   ins_pipe(pipe_slow);
10418 %}
10419 
10420 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10421   match(Set cr (CmpF src1 (LoadF src2)));
10422 
10423   ins_cost(100);
10424   format %{ "ucomiss $src1, $src2" %}
10425   opcode(0x0F, 0x2E);
10426   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10427   ins_pipe(pipe_slow);
10428 %}
10429 
10430 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10431 %{
10432   match(Set cr (CmpF src1 src2));
10433 
10434   ins_cost(145);
10435   format %{ "ucomiss $src1, $src2\n\t"
10436             "jnp,s   exit\n\t"
10437             "pushfq\t# saw NaN, set CF\n\t"
10438             "andq    [rsp], #0xffffff2b\n\t"
10439             "popfq\n"
10440     "exit:   nop\t# avoid branch to branch" %}
10441   opcode(0x0F, 0x2E);
10442   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10443              cmpfp_fixup);
10444   ins_pipe(pipe_slow);
10445 %}
10446 
10447 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10448   match(Set cr (CmpF src1 src2));
10449 
10450   ins_cost(100);
10451   format %{ "ucomiss $src1, $src2" %}
10452   opcode(0x0F, 0x2E);
10453   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10454   ins_pipe(pipe_slow);
10455 %}
10456 
10457 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10458 %{
10459   match(Set cr (CmpD src1 src2));
10460 
10461   ins_cost(145);
10462   format %{ "ucomisd $src1, $src2\n\t"
10463             "jnp,s   exit\n\t"
10464             "pushfq\t# saw NaN, set CF\n\t"
10465             "andq    [rsp], #0xffffff2b\n\t"
10466             "popfq\n"
10467     "exit:   nop\t# avoid branch to branch" %}
10468   opcode(0x66, 0x0F, 0x2E);
10469   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10470              cmpfp_fixup);
10471   ins_pipe(pipe_slow);
10472 %}
10473 
10474 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10475   match(Set cr (CmpD src1 src2));
10476 
10477   ins_cost(100);
10478   format %{ "ucomisd $src1, $src2 test" %}
10479   ins_encode %{
10480     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10481   %}
10482   ins_pipe(pipe_slow);
10483 %}
10484 
10485 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10486 %{
10487   match(Set cr (CmpD src1 (LoadD src2)));
10488 
10489   ins_cost(145);
10490   format %{ "ucomisd $src1, $src2\n\t"
10491             "jnp,s   exit\n\t"
10492             "pushfq\t# saw NaN, set CF\n\t"
10493             "andq    [rsp], #0xffffff2b\n\t"
10494             "popfq\n"
10495     "exit:   nop\t# avoid branch to branch" %}
10496   opcode(0x66, 0x0F, 0x2E);
10497   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10498              cmpfp_fixup);
10499   ins_pipe(pipe_slow);
10500 %}
10501 
10502 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10503   match(Set cr (CmpD src1 (LoadD src2)));
10504 
10505   ins_cost(100);
10506   format %{ "ucomisd $src1, $src2" %}
10507   opcode(0x66, 0x0F, 0x2E);
10508   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10509   ins_pipe(pipe_slow);
10510 %}
10511 
10512 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10513 %{
10514   match(Set cr (CmpD src1 src2));
10515 
10516   ins_cost(145);
10517   format %{ "ucomisd $src1, [$src2]\n\t"
10518             "jnp,s   exit\n\t"
10519             "pushfq\t# saw NaN, set CF\n\t"
10520             "andq    [rsp], #0xffffff2b\n\t"
10521             "popfq\n"
10522     "exit:   nop\t# avoid branch to branch" %}
10523   opcode(0x66, 0x0F, 0x2E);
10524   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10525              cmpfp_fixup);
10526   ins_pipe(pipe_slow);
10527 %}
10528 
10529 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10530   match(Set cr (CmpD src1 src2));
10531 
10532   ins_cost(100);
10533   format %{ "ucomisd $src1, [$src2]" %}
10534   opcode(0x66, 0x0F, 0x2E);
10535   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10536   ins_pipe(pipe_slow);
10537 %}
10538 
10539 // Compare into -1,0,1
10540 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10541 %{
10542   match(Set dst (CmpF3 src1 src2));
10543   effect(KILL cr);
10544 
10545   ins_cost(275);
10546   format %{ "ucomiss $src1, $src2\n\t"
10547             "movl    $dst, #-1\n\t"
10548             "jp,s    done\n\t"
10549             "jb,s    done\n\t"
10550             "setne   $dst\n\t"
10551             "movzbl  $dst, $dst\n"
10552     "done:" %}
10553 
10554   opcode(0x0F, 0x2E);
10555   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10556              cmpfp3(dst));
10557   ins_pipe(pipe_slow);
10558 %}
10559 
10560 // Compare into -1,0,1
10561 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10562 %{
10563   match(Set dst (CmpF3 src1 (LoadF src2)));
10564   effect(KILL cr);
10565 
10566   ins_cost(275);
10567   format %{ "ucomiss $src1, $src2\n\t"
10568             "movl    $dst, #-1\n\t"
10569             "jp,s    done\n\t"
10570             "jb,s    done\n\t"
10571             "setne   $dst\n\t"
10572             "movzbl  $dst, $dst\n"
10573     "done:" %}
10574 
10575   opcode(0x0F, 0x2E);
10576   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10577              cmpfp3(dst));
10578   ins_pipe(pipe_slow);
10579 %}
10580 
10581 // Compare into -1,0,1
10582 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10583 %{
10584   match(Set dst (CmpF3 src1 src2));
10585   effect(KILL cr);
10586 
10587   ins_cost(275);
10588   format %{ "ucomiss $src1, [$src2]\n\t"
10589             "movl    $dst, #-1\n\t"
10590             "jp,s    done\n\t"
10591             "jb,s    done\n\t"
10592             "setne   $dst\n\t"
10593             "movzbl  $dst, $dst\n"
10594     "done:" %}
10595 
10596   opcode(0x0F, 0x2E);
10597   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10598              cmpfp3(dst));
10599   ins_pipe(pipe_slow);
10600 %}
10601 
10602 // Compare into -1,0,1
10603 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10604 %{
10605   match(Set dst (CmpD3 src1 src2));
10606   effect(KILL cr);
10607 
10608   ins_cost(275);
10609   format %{ "ucomisd $src1, $src2\n\t"
10610             "movl    $dst, #-1\n\t"
10611             "jp,s    done\n\t"
10612             "jb,s    done\n\t"
10613             "setne   $dst\n\t"
10614             "movzbl  $dst, $dst\n"
10615     "done:" %}
10616 
10617   opcode(0x66, 0x0F, 0x2E);
10618   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10619              cmpfp3(dst));
10620   ins_pipe(pipe_slow);
10621 %}
10622 
10623 // Compare into -1,0,1
10624 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10625 %{
10626   match(Set dst (CmpD3 src1 (LoadD src2)));
10627   effect(KILL cr);
10628 
10629   ins_cost(275);
10630   format %{ "ucomisd $src1, $src2\n\t"
10631             "movl    $dst, #-1\n\t"
10632             "jp,s    done\n\t"
10633             "jb,s    done\n\t"
10634             "setne   $dst\n\t"
10635             "movzbl  $dst, $dst\n"
10636     "done:" %}
10637 
10638   opcode(0x66, 0x0F, 0x2E);
10639   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10640              cmpfp3(dst));
10641   ins_pipe(pipe_slow);
10642 %}
10643 
10644 // Compare into -1,0,1
10645 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10646 %{
10647   match(Set dst (CmpD3 src1 src2));
10648   effect(KILL cr);
10649 
10650   ins_cost(275);
10651   format %{ "ucomisd $src1, [$src2]\n\t"
10652             "movl    $dst, #-1\n\t"
10653             "jp,s    done\n\t"
10654             "jb,s    done\n\t"
10655             "setne   $dst\n\t"
10656             "movzbl  $dst, $dst\n"
10657     "done:" %}
10658 
10659   opcode(0x66, 0x0F, 0x2E);
10660   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10661              cmpfp3(dst));
10662   ins_pipe(pipe_slow);
10663 %}
10664 
10665 instruct addF_reg(regF dst, regF src)
10666 %{
10667   match(Set dst (AddF dst src));
10668 
10669   format %{ "addss   $dst, $src" %}
10670   ins_cost(150); // XXX
10671   opcode(0xF3, 0x0F, 0x58);
10672   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10673   ins_pipe(pipe_slow);
10674 %}
10675 
10676 instruct addF_mem(regF dst, memory src)
10677 %{
10678   match(Set dst (AddF dst (LoadF src)));
10679 
10680   format %{ "addss   $dst, $src" %}
10681   ins_cost(150); // XXX
10682   opcode(0xF3, 0x0F, 0x58);
10683   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10684   ins_pipe(pipe_slow);
10685 %}
10686 
10687 instruct addF_imm(regF dst, immF src)
10688 %{
10689   match(Set dst (AddF dst src));
10690 
10691   format %{ "addss   $dst, [$src]" %}
10692   ins_cost(150); // XXX
10693   opcode(0xF3, 0x0F, 0x58);
10694   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10695   ins_pipe(pipe_slow);
10696 %}
10697 
10698 instruct addD_reg(regD dst, regD src)
10699 %{
10700   match(Set dst (AddD dst src));
10701 
10702   format %{ "addsd   $dst, $src" %}
10703   ins_cost(150); // XXX
10704   opcode(0xF2, 0x0F, 0x58);
10705   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10706   ins_pipe(pipe_slow);
10707 %}
10708 
10709 instruct addD_mem(regD dst, memory src)
10710 %{
10711   match(Set dst (AddD dst (LoadD src)));
10712 
10713   format %{ "addsd   $dst, $src" %}
10714   ins_cost(150); // XXX
10715   opcode(0xF2, 0x0F, 0x58);
10716   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10717   ins_pipe(pipe_slow);
10718 %}
10719 
10720 instruct addD_imm(regD dst, immD src)
10721 %{
10722   match(Set dst (AddD dst src));
10723 
10724   format %{ "addsd   $dst, [$src]" %}
10725   ins_cost(150); // XXX
10726   opcode(0xF2, 0x0F, 0x58);
10727   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10728   ins_pipe(pipe_slow);
10729 %}
10730 
10731 instruct subF_reg(regF dst, regF src)
10732 %{
10733   match(Set dst (SubF dst src));
10734 
10735   format %{ "subss   $dst, $src" %}
10736   ins_cost(150); // XXX
10737   opcode(0xF3, 0x0F, 0x5C);
10738   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10739   ins_pipe(pipe_slow);
10740 %}
10741 
10742 instruct subF_mem(regF dst, memory src)
10743 %{
10744   match(Set dst (SubF dst (LoadF src)));
10745 
10746   format %{ "subss   $dst, $src" %}
10747   ins_cost(150); // XXX
10748   opcode(0xF3, 0x0F, 0x5C);
10749   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10750   ins_pipe(pipe_slow);
10751 %}
10752 
10753 instruct subF_imm(regF dst, immF src)
10754 %{
10755   match(Set dst (SubF dst src));
10756 
10757   format %{ "subss   $dst, [$src]" %}
10758   ins_cost(150); // XXX
10759   opcode(0xF3, 0x0F, 0x5C);
10760   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10761   ins_pipe(pipe_slow);
10762 %}
10763 
10764 instruct subD_reg(regD dst, regD src)
10765 %{
10766   match(Set dst (SubD dst src));
10767 
10768   format %{ "subsd   $dst, $src" %}
10769   ins_cost(150); // XXX
10770   opcode(0xF2, 0x0F, 0x5C);
10771   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10772   ins_pipe(pipe_slow);
10773 %}
10774 
10775 instruct subD_mem(regD dst, memory src)
10776 %{
10777   match(Set dst (SubD dst (LoadD src)));
10778 
10779   format %{ "subsd   $dst, $src" %}
10780   ins_cost(150); // XXX
10781   opcode(0xF2, 0x0F, 0x5C);
10782   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10783   ins_pipe(pipe_slow);
10784 %}
10785 
10786 instruct subD_imm(regD dst, immD src)
10787 %{
10788   match(Set dst (SubD dst src));
10789 
10790   format %{ "subsd   $dst, [$src]" %}
10791   ins_cost(150); // XXX
10792   opcode(0xF2, 0x0F, 0x5C);
10793   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10794   ins_pipe(pipe_slow);
10795 %}
10796 
10797 instruct mulF_reg(regF dst, regF src)
10798 %{
10799   match(Set dst (MulF dst src));
10800 
10801   format %{ "mulss   $dst, $src" %}
10802   ins_cost(150); // XXX
10803   opcode(0xF3, 0x0F, 0x59);
10804   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10805   ins_pipe(pipe_slow);
10806 %}
10807 
10808 instruct mulF_mem(regF dst, memory src)
10809 %{
10810   match(Set dst (MulF dst (LoadF src)));
10811 
10812   format %{ "mulss   $dst, $src" %}
10813   ins_cost(150); // XXX
10814   opcode(0xF3, 0x0F, 0x59);
10815   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10816   ins_pipe(pipe_slow);
10817 %}
10818 
10819 instruct mulF_imm(regF dst, immF src)
10820 %{
10821   match(Set dst (MulF dst src));
10822 
10823   format %{ "mulss   $dst, [$src]" %}
10824   ins_cost(150); // XXX
10825   opcode(0xF3, 0x0F, 0x59);
10826   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10827   ins_pipe(pipe_slow);
10828 %}
10829 
10830 instruct mulD_reg(regD dst, regD src)
10831 %{
10832   match(Set dst (MulD dst src));
10833 
10834   format %{ "mulsd   $dst, $src" %}
10835   ins_cost(150); // XXX
10836   opcode(0xF2, 0x0F, 0x59);
10837   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10838   ins_pipe(pipe_slow);
10839 %}
10840 
10841 instruct mulD_mem(regD dst, memory src)
10842 %{
10843   match(Set dst (MulD dst (LoadD src)));
10844 
10845   format %{ "mulsd   $dst, $src" %}
10846   ins_cost(150); // XXX
10847   opcode(0xF2, 0x0F, 0x59);
10848   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10849   ins_pipe(pipe_slow);
10850 %}
10851 
10852 instruct mulD_imm(regD dst, immD src)
10853 %{
10854   match(Set dst (MulD dst src));
10855 
10856   format %{ "mulsd   $dst, [$src]" %}
10857   ins_cost(150); // XXX
10858   opcode(0xF2, 0x0F, 0x59);
10859   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10860   ins_pipe(pipe_slow);
10861 %}
10862 
10863 instruct divF_reg(regF dst, regF src)
10864 %{
10865   match(Set dst (DivF dst src));
10866 
10867   format %{ "divss   $dst, $src" %}
10868   ins_cost(150); // XXX
10869   opcode(0xF3, 0x0F, 0x5E);
10870   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10871   ins_pipe(pipe_slow);
10872 %}
10873 
10874 instruct divF_mem(regF dst, memory src)
10875 %{
10876   match(Set dst (DivF dst (LoadF src)));
10877 
10878   format %{ "divss   $dst, $src" %}
10879   ins_cost(150); // XXX
10880   opcode(0xF3, 0x0F, 0x5E);
10881   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10882   ins_pipe(pipe_slow);
10883 %}
10884 
10885 instruct divF_imm(regF dst, immF src)
10886 %{
10887   match(Set dst (DivF dst src));
10888 
10889   format %{ "divss   $dst, [$src]" %}
10890   ins_cost(150); // XXX
10891   opcode(0xF3, 0x0F, 0x5E);
10892   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10893   ins_pipe(pipe_slow);
10894 %}
10895 
10896 instruct divD_reg(regD dst, regD src)
10897 %{
10898   match(Set dst (DivD dst src));
10899 
10900   format %{ "divsd   $dst, $src" %}
10901   ins_cost(150); // XXX
10902   opcode(0xF2, 0x0F, 0x5E);
10903   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10904   ins_pipe(pipe_slow);
10905 %}
10906 
10907 instruct divD_mem(regD dst, memory src)
10908 %{
10909   match(Set dst (DivD dst (LoadD src)));
10910 
10911   format %{ "divsd   $dst, $src" %}
10912   ins_cost(150); // XXX
10913   opcode(0xF2, 0x0F, 0x5E);
10914   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10915   ins_pipe(pipe_slow);
10916 %}
10917 
10918 instruct divD_imm(regD dst, immD src)
10919 %{
10920   match(Set dst (DivD dst src));
10921 
10922   format %{ "divsd   $dst, [$src]" %}
10923   ins_cost(150); // XXX
10924   opcode(0xF2, 0x0F, 0x5E);
10925   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10926   ins_pipe(pipe_slow);
10927 %}
10928 
10929 instruct sqrtF_reg(regF dst, regF src)
10930 %{
10931   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10932 
10933   format %{ "sqrtss  $dst, $src" %}
10934   ins_cost(150); // XXX
10935   opcode(0xF3, 0x0F, 0x51);
10936   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10937   ins_pipe(pipe_slow);
10938 %}
10939 
10940 instruct sqrtF_mem(regF dst, memory src)
10941 %{
10942   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10943 
10944   format %{ "sqrtss  $dst, $src" %}
10945   ins_cost(150); // XXX
10946   opcode(0xF3, 0x0F, 0x51);
10947   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10948   ins_pipe(pipe_slow);
10949 %}
10950 
10951 instruct sqrtF_imm(regF dst, immF src)
10952 %{
10953   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10954 
10955   format %{ "sqrtss  $dst, [$src]" %}
10956   ins_cost(150); // XXX
10957   opcode(0xF3, 0x0F, 0x51);
10958   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10959   ins_pipe(pipe_slow);
10960 %}
10961 
10962 instruct sqrtD_reg(regD dst, regD src)
10963 %{
10964   match(Set dst (SqrtD src));
10965 
10966   format %{ "sqrtsd  $dst, $src" %}
10967   ins_cost(150); // XXX
10968   opcode(0xF2, 0x0F, 0x51);
10969   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10970   ins_pipe(pipe_slow);
10971 %}
10972 
10973 instruct sqrtD_mem(regD dst, memory src)
10974 %{
10975   match(Set dst (SqrtD (LoadD src)));
10976 
10977   format %{ "sqrtsd  $dst, $src" %}
10978   ins_cost(150); // XXX
10979   opcode(0xF2, 0x0F, 0x51);
10980   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10981   ins_pipe(pipe_slow);
10982 %}
10983 
10984 instruct sqrtD_imm(regD dst, immD src)
10985 %{
10986   match(Set dst (SqrtD src));
10987 
10988   format %{ "sqrtsd  $dst, [$src]" %}
10989   ins_cost(150); // XXX
10990   opcode(0xF2, 0x0F, 0x51);
10991   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10992   ins_pipe(pipe_slow);
10993 %}
10994 
10995 instruct absF_reg(regF dst)
10996 %{
10997   match(Set dst (AbsF dst));
10998 
10999   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
11000   ins_encode(absF_encoding(dst));
11001   ins_pipe(pipe_slow);
11002 %}
11003 
11004 instruct absD_reg(regD dst)
11005 %{
11006   match(Set dst (AbsD dst));
11007 
11008   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
11009             "# abs double by sign masking" %}
11010   ins_encode(absD_encoding(dst));
11011   ins_pipe(pipe_slow);
11012 %}
11013 
11014 instruct negF_reg(regF dst)
11015 %{
11016   match(Set dst (NegF dst));
11017 
11018   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
11019   ins_encode(negF_encoding(dst));
11020   ins_pipe(pipe_slow);
11021 %}
11022 
11023 instruct negD_reg(regD dst)
11024 %{
11025   match(Set dst (NegD dst));
11026 
11027   format %{ "xorpd   $dst, [0x8000000000000000]\t"
11028             "# neg double by sign flipping" %}
11029   ins_encode(negD_encoding(dst));
11030   ins_pipe(pipe_slow);
11031 %}
11032 
11033 // -----------Trig and Trancendental Instructions------------------------------
11034 instruct cosD_reg(regD dst) %{
11035   match(Set dst (CosD dst));
11036 
11037   format %{ "dcos   $dst\n\t" %}
11038   opcode(0xD9, 0xFF);
11039   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11040   ins_pipe( pipe_slow );
11041 %}
11042 
11043 instruct sinD_reg(regD dst) %{
11044   match(Set dst (SinD dst));
11045 
11046   format %{ "dsin   $dst\n\t" %}
11047   opcode(0xD9, 0xFE);
11048   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11049   ins_pipe( pipe_slow );
11050 %}
11051 
11052 instruct tanD_reg(regD dst) %{
11053   match(Set dst (TanD dst));
11054 
11055   format %{ "dtan   $dst\n\t" %}
11056   ins_encode( Push_SrcXD(dst),
11057               Opcode(0xD9), Opcode(0xF2),   //fptan
11058               Opcode(0xDD), Opcode(0xD8),   //fstp st
11059               Push_ResultXD(dst) );
11060   ins_pipe( pipe_slow );
11061 %}
11062 
11063 instruct log10D_reg(regD dst) %{
11064   // The source and result Double operands in XMM registers
11065   match(Set dst (Log10D dst));
11066   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11067   // fyl2x        ; compute log_10(2) * log_2(x)
11068   format %{ "fldlg2\t\t\t#Log10\n\t"
11069             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11070          %}
11071    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11072               Push_SrcXD(dst),
11073               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11074               Push_ResultXD(dst));
11075 
11076   ins_pipe( pipe_slow );
11077 %}
11078 
11079 instruct logD_reg(regD dst) %{
11080   // The source and result Double operands in XMM registers
11081   match(Set dst (LogD dst));
11082   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11083   // fyl2x        ; compute log_e(2) * log_2(x)
11084   format %{ "fldln2\t\t\t#Log_e\n\t"
11085             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11086          %}
11087   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11088               Push_SrcXD(dst),
11089               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11090               Push_ResultXD(dst));
11091   ins_pipe( pipe_slow );
11092 %}
11093 
11094 
11095 
11096 //----------Arithmetic Conversion Instructions---------------------------------
11097 
11098 instruct roundFloat_nop(regF dst)
11099 %{
11100   match(Set dst (RoundFloat dst));
11101 
11102   ins_cost(0);
11103   ins_encode();
11104   ins_pipe(empty);
11105 %}
11106 
11107 instruct roundDouble_nop(regD dst)
11108 %{
11109   match(Set dst (RoundDouble dst));
11110 
11111   ins_cost(0);
11112   ins_encode();
11113   ins_pipe(empty);
11114 %}
11115 
11116 instruct convF2D_reg_reg(regD dst, regF src)
11117 %{
11118   match(Set dst (ConvF2D src));
11119 
11120   format %{ "cvtss2sd $dst, $src" %}
11121   opcode(0xF3, 0x0F, 0x5A);
11122   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11123   ins_pipe(pipe_slow); // XXX
11124 %}
11125 
11126 instruct convF2D_reg_mem(regD dst, memory src)
11127 %{
11128   match(Set dst (ConvF2D (LoadF src)));
11129 
11130   format %{ "cvtss2sd $dst, $src" %}
11131   opcode(0xF3, 0x0F, 0x5A);
11132   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11133   ins_pipe(pipe_slow); // XXX
11134 %}
11135 
11136 instruct convD2F_reg_reg(regF dst, regD src)
11137 %{
11138   match(Set dst (ConvD2F src));
11139 
11140   format %{ "cvtsd2ss $dst, $src" %}
11141   opcode(0xF2, 0x0F, 0x5A);
11142   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11143   ins_pipe(pipe_slow); // XXX
11144 %}
11145 
11146 instruct convD2F_reg_mem(regF dst, memory src)
11147 %{
11148   match(Set dst (ConvD2F (LoadD src)));
11149 
11150   format %{ "cvtsd2ss $dst, $src" %}
11151   opcode(0xF2, 0x0F, 0x5A);
11152   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11153   ins_pipe(pipe_slow); // XXX
11154 %}
11155 
11156 // XXX do mem variants
11157 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11158 %{
11159   match(Set dst (ConvF2I src));
11160   effect(KILL cr);
11161 
11162   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11163             "cmpl    $dst, #0x80000000\n\t"
11164             "jne,s   done\n\t"
11165             "subq    rsp, #8\n\t"
11166             "movss   [rsp], $src\n\t"
11167             "call    f2i_fixup\n\t"
11168             "popq    $dst\n"
11169     "done:   "%}
11170   opcode(0xF3, 0x0F, 0x2C);
11171   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11172              f2i_fixup(dst, src));
11173   ins_pipe(pipe_slow);
11174 %}
11175 
11176 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11177 %{
11178   match(Set dst (ConvF2L src));
11179   effect(KILL cr);
11180 
11181   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11182             "cmpq    $dst, [0x8000000000000000]\n\t"
11183             "jne,s   done\n\t"
11184             "subq    rsp, #8\n\t"
11185             "movss   [rsp], $src\n\t"
11186             "call    f2l_fixup\n\t"
11187             "popq    $dst\n"
11188     "done:   "%}
11189   opcode(0xF3, 0x0F, 0x2C);
11190   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11191              f2l_fixup(dst, src));
11192   ins_pipe(pipe_slow);
11193 %}
11194 
11195 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11196 %{
11197   match(Set dst (ConvD2I src));
11198   effect(KILL cr);
11199 
11200   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11201             "cmpl    $dst, #0x80000000\n\t"
11202             "jne,s   done\n\t"
11203             "subq    rsp, #8\n\t"
11204             "movsd   [rsp], $src\n\t"
11205             "call    d2i_fixup\n\t"
11206             "popq    $dst\n"
11207     "done:   "%}
11208   opcode(0xF2, 0x0F, 0x2C);
11209   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11210              d2i_fixup(dst, src));
11211   ins_pipe(pipe_slow);
11212 %}
11213 
11214 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11215 %{
11216   match(Set dst (ConvD2L src));
11217   effect(KILL cr);
11218 
11219   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11220             "cmpq    $dst, [0x8000000000000000]\n\t"
11221             "jne,s   done\n\t"
11222             "subq    rsp, #8\n\t"
11223             "movsd   [rsp], $src\n\t"
11224             "call    d2l_fixup\n\t"
11225             "popq    $dst\n"
11226     "done:   "%}
11227   opcode(0xF2, 0x0F, 0x2C);
11228   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11229              d2l_fixup(dst, src));
11230   ins_pipe(pipe_slow);
11231 %}
11232 
11233 instruct convI2F_reg_reg(regF dst, rRegI src)
11234 %{
11235   predicate(!UseXmmI2F);
11236   match(Set dst (ConvI2F src));
11237 
11238   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11239   opcode(0xF3, 0x0F, 0x2A);
11240   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11241   ins_pipe(pipe_slow); // XXX
11242 %}
11243 
11244 instruct convI2F_reg_mem(regF dst, memory src)
11245 %{
11246   match(Set dst (ConvI2F (LoadI src)));
11247 
11248   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11249   opcode(0xF3, 0x0F, 0x2A);
11250   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11251   ins_pipe(pipe_slow); // XXX
11252 %}
11253 
11254 instruct convI2D_reg_reg(regD dst, rRegI src)
11255 %{
11256   predicate(!UseXmmI2D);
11257   match(Set dst (ConvI2D src));
11258 
11259   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11260   opcode(0xF2, 0x0F, 0x2A);
11261   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11262   ins_pipe(pipe_slow); // XXX
11263 %}
11264 
11265 instruct convI2D_reg_mem(regD dst, memory src)
11266 %{
11267   match(Set dst (ConvI2D (LoadI src)));
11268 
11269   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11270   opcode(0xF2, 0x0F, 0x2A);
11271   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11272   ins_pipe(pipe_slow); // XXX
11273 %}
11274 
11275 instruct convXI2F_reg(regF dst, rRegI src)
11276 %{
11277   predicate(UseXmmI2F);
11278   match(Set dst (ConvI2F src));
11279 
11280   format %{ "movdl $dst, $src\n\t"
11281             "cvtdq2psl $dst, $dst\t# i2f" %}
11282   ins_encode %{
11283     __ movdl($dst$$XMMRegister, $src$$Register);
11284     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11285   %}
11286   ins_pipe(pipe_slow); // XXX
11287 %}
11288 
11289 instruct convXI2D_reg(regD dst, rRegI src)
11290 %{
11291   predicate(UseXmmI2D);
11292   match(Set dst (ConvI2D src));
11293 
11294   format %{ "movdl $dst, $src\n\t"
11295             "cvtdq2pdl $dst, $dst\t# i2d" %}
11296   ins_encode %{
11297     __ movdl($dst$$XMMRegister, $src$$Register);
11298     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11299   %}
11300   ins_pipe(pipe_slow); // XXX
11301 %}
11302 
11303 instruct convL2F_reg_reg(regF dst, rRegL src)
11304 %{
11305   match(Set dst (ConvL2F src));
11306 
11307   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11308   opcode(0xF3, 0x0F, 0x2A);
11309   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11310   ins_pipe(pipe_slow); // XXX
11311 %}
11312 
11313 instruct convL2F_reg_mem(regF dst, memory src)
11314 %{
11315   match(Set dst (ConvL2F (LoadL src)));
11316 
11317   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11318   opcode(0xF3, 0x0F, 0x2A);
11319   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11320   ins_pipe(pipe_slow); // XXX
11321 %}
11322 
11323 instruct convL2D_reg_reg(regD dst, rRegL src)
11324 %{
11325   match(Set dst (ConvL2D src));
11326 
11327   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11328   opcode(0xF2, 0x0F, 0x2A);
11329   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11330   ins_pipe(pipe_slow); // XXX
11331 %}
11332 
11333 instruct convL2D_reg_mem(regD dst, memory src)
11334 %{
11335   match(Set dst (ConvL2D (LoadL src)));
11336 
11337   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11338   opcode(0xF2, 0x0F, 0x2A);
11339   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11340   ins_pipe(pipe_slow); // XXX
11341 %}
11342 
11343 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11344 %{
11345   match(Set dst (ConvI2L src));
11346 
11347   ins_cost(125);
11348   format %{ "movslq  $dst, $src\t# i2l" %}
11349   ins_encode %{
11350     __ movslq($dst$$Register, $src$$Register);
11351   %}
11352   ins_pipe(ialu_reg_reg);
11353 %}
11354 
11355 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11356 // %{
11357 //   match(Set dst (ConvI2L src));
11358 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11359 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11360 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11361 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11362 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11363 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11364 
11365 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11366 //   ins_encode(enc_copy(dst, src));
11367 // //   opcode(0x63); // needs REX.W
11368 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11369 //   ins_pipe(ialu_reg_reg);
11370 // %}
11371 
11372 // Zero-extend convert int to long
11373 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11374 %{
11375   match(Set dst (AndL (ConvI2L src) mask));
11376 
11377   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11378   ins_encode(enc_copy(dst, src));
11379   ins_pipe(ialu_reg_reg);
11380 %}
11381 
11382 // Zero-extend convert int to long
11383 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11384 %{
11385   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11386 
11387   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11388   opcode(0x8B);
11389   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11390   ins_pipe(ialu_reg_mem);
11391 %}
11392 
11393 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11394 %{
11395   match(Set dst (AndL src mask));
11396 
11397   format %{ "movl    $dst, $src\t# zero-extend long" %}
11398   ins_encode(enc_copy_always(dst, src));
11399   ins_pipe(ialu_reg_reg);
11400 %}
11401 
11402 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11403 %{
11404   match(Set dst (ConvL2I src));
11405 
11406   format %{ "movl    $dst, $src\t# l2i" %}
11407   ins_encode(enc_copy_always(dst, src));
11408   ins_pipe(ialu_reg_reg);
11409 %}
11410 
11411 
11412 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11413   match(Set dst (MoveF2I src));
11414   effect(DEF dst, USE src);
11415 
11416   ins_cost(125);
11417   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11418   opcode(0x8B);
11419   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11420   ins_pipe(ialu_reg_mem);
11421 %}
11422 
11423 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11424   match(Set dst (MoveI2F src));
11425   effect(DEF dst, USE src);
11426 
11427   ins_cost(125);
11428   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11429   opcode(0xF3, 0x0F, 0x10);
11430   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11431   ins_pipe(pipe_slow);
11432 %}
11433 
11434 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11435   match(Set dst (MoveD2L src));
11436   effect(DEF dst, USE src);
11437 
11438   ins_cost(125);
11439   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11440   opcode(0x8B);
11441   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11442   ins_pipe(ialu_reg_mem);
11443 %}
11444 
11445 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11446   predicate(!UseXmmLoadAndClearUpper);
11447   match(Set dst (MoveL2D src));
11448   effect(DEF dst, USE src);
11449 
11450   ins_cost(125);
11451   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11452   opcode(0x66, 0x0F, 0x12);
11453   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11454   ins_pipe(pipe_slow);
11455 %}
11456 
11457 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11458   predicate(UseXmmLoadAndClearUpper);
11459   match(Set dst (MoveL2D src));
11460   effect(DEF dst, USE src);
11461 
11462   ins_cost(125);
11463   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11464   opcode(0xF2, 0x0F, 0x10);
11465   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11466   ins_pipe(pipe_slow);
11467 %}
11468 
11469 
11470 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11471   match(Set dst (MoveF2I src));
11472   effect(DEF dst, USE src);
11473 
11474   ins_cost(95); // XXX
11475   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11476   opcode(0xF3, 0x0F, 0x11);
11477   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11478   ins_pipe(pipe_slow);
11479 %}
11480 
11481 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11482   match(Set dst (MoveI2F src));
11483   effect(DEF dst, USE src);
11484 
11485   ins_cost(100);
11486   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11487   opcode(0x89);
11488   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11489   ins_pipe( ialu_mem_reg );
11490 %}
11491 
11492 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11493   match(Set dst (MoveD2L src));
11494   effect(DEF dst, USE src);
11495 
11496   ins_cost(95); // XXX
11497   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11498   opcode(0xF2, 0x0F, 0x11);
11499   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11500   ins_pipe(pipe_slow);
11501 %}
11502 
11503 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11504   match(Set dst (MoveL2D src));
11505   effect(DEF dst, USE src);
11506 
11507   ins_cost(100);
11508   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11509   opcode(0x89);
11510   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11511   ins_pipe(ialu_mem_reg);
11512 %}
11513 
11514 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11515   match(Set dst (MoveF2I src));
11516   effect(DEF dst, USE src);
11517   ins_cost(85);
11518   format %{ "movd    $dst,$src\t# MoveF2I" %}
11519   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11520   ins_pipe( pipe_slow );
11521 %}
11522 
11523 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11524   match(Set dst (MoveD2L src));
11525   effect(DEF dst, USE src);
11526   ins_cost(85);
11527   format %{ "movd    $dst,$src\t# MoveD2L" %}
11528   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11529   ins_pipe( pipe_slow );
11530 %}
11531 
11532 // The next instructions have long latency and use Int unit. Set high cost.
11533 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11534   match(Set dst (MoveI2F src));
11535   effect(DEF dst, USE src);
11536   ins_cost(300);
11537   format %{ "movd    $dst,$src\t# MoveI2F" %}
11538   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11539   ins_pipe( pipe_slow );
11540 %}
11541 
11542 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11543   match(Set dst (MoveL2D src));
11544   effect(DEF dst, USE src);
11545   ins_cost(300);
11546   format %{ "movd    $dst,$src\t# MoveL2D" %}
11547   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11548   ins_pipe( pipe_slow );
11549 %}
11550 
11551 // Replicate scalar to packed byte (1 byte) values in xmm
11552 instruct Repl8B_reg(regD dst, regD src) %{
11553   match(Set dst (Replicate8B src));
11554   format %{ "MOVDQA  $dst,$src\n\t"
11555             "PUNPCKLBW $dst,$dst\n\t"
11556             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11557   ins_encode( pshufd_8x8(dst, src));
11558   ins_pipe( pipe_slow );
11559 %}
11560 
11561 // Replicate scalar to packed byte (1 byte) values in xmm
11562 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11563   match(Set dst (Replicate8B src));
11564   format %{ "MOVD    $dst,$src\n\t"
11565             "PUNPCKLBW $dst,$dst\n\t"
11566             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11567   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11568   ins_pipe( pipe_slow );
11569 %}
11570 
11571 // Replicate scalar zero to packed byte (1 byte) values in xmm
11572 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11573   match(Set dst (Replicate8B zero));
11574   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11575   ins_encode( pxor(dst, dst));
11576   ins_pipe( fpu_reg_reg );
11577 %}
11578 
11579 // Replicate scalar to packed shore (2 byte) values in xmm
11580 instruct Repl4S_reg(regD dst, regD src) %{
11581   match(Set dst (Replicate4S src));
11582   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11583   ins_encode( pshufd_4x16(dst, src));
11584   ins_pipe( fpu_reg_reg );
11585 %}
11586 
11587 // Replicate scalar to packed shore (2 byte) values in xmm
11588 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11589   match(Set dst (Replicate4S src));
11590   format %{ "MOVD    $dst,$src\n\t"
11591             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11592   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11593   ins_pipe( fpu_reg_reg );
11594 %}
11595 
11596 // Replicate scalar zero to packed short (2 byte) values in xmm
11597 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11598   match(Set dst (Replicate4S zero));
11599   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11600   ins_encode( pxor(dst, dst));
11601   ins_pipe( fpu_reg_reg );
11602 %}
11603 
11604 // Replicate scalar to packed char (2 byte) values in xmm
11605 instruct Repl4C_reg(regD dst, regD src) %{
11606   match(Set dst (Replicate4C src));
11607   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11608   ins_encode( pshufd_4x16(dst, src));
11609   ins_pipe( fpu_reg_reg );
11610 %}
11611 
11612 // Replicate scalar to packed char (2 byte) values in xmm
11613 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11614   match(Set dst (Replicate4C src));
11615   format %{ "MOVD    $dst,$src\n\t"
11616             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11617   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11618   ins_pipe( fpu_reg_reg );
11619 %}
11620 
11621 // Replicate scalar zero to packed char (2 byte) values in xmm
11622 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11623   match(Set dst (Replicate4C zero));
11624   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11625   ins_encode( pxor(dst, dst));
11626   ins_pipe( fpu_reg_reg );
11627 %}
11628 
11629 // Replicate scalar to packed integer (4 byte) values in xmm
11630 instruct Repl2I_reg(regD dst, regD src) %{
11631   match(Set dst (Replicate2I src));
11632   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11633   ins_encode( pshufd(dst, src, 0x00));
11634   ins_pipe( fpu_reg_reg );
11635 %}
11636 
11637 // Replicate scalar to packed integer (4 byte) values in xmm
11638 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11639   match(Set dst (Replicate2I src));
11640   format %{ "MOVD   $dst,$src\n\t"
11641             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11642   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11643   ins_pipe( fpu_reg_reg );
11644 %}
11645 
11646 // Replicate scalar zero to packed integer (2 byte) values in xmm
11647 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11648   match(Set dst (Replicate2I zero));
11649   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11650   ins_encode( pxor(dst, dst));
11651   ins_pipe( fpu_reg_reg );
11652 %}
11653 
11654 // Replicate scalar to packed single precision floating point values in xmm
11655 instruct Repl2F_reg(regD dst, regD src) %{
11656   match(Set dst (Replicate2F src));
11657   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11658   ins_encode( pshufd(dst, src, 0xe0));
11659   ins_pipe( fpu_reg_reg );
11660 %}
11661 
11662 // Replicate scalar to packed single precision floating point values in xmm
11663 instruct Repl2F_regF(regD dst, regF src) %{
11664   match(Set dst (Replicate2F src));
11665   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11666   ins_encode( pshufd(dst, src, 0xe0));
11667   ins_pipe( fpu_reg_reg );
11668 %}
11669 
11670 // Replicate scalar to packed single precision floating point values in xmm
11671 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11672   match(Set dst (Replicate2F zero));
11673   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11674   ins_encode( pxor(dst, dst));
11675   ins_pipe( fpu_reg_reg );
11676 %}
11677 
11678 
11679 // =======================================================================
11680 // fast clearing of an array
11681 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11682                   rFlagsReg cr)
11683 %{
11684   match(Set dummy (ClearArray cnt base));
11685   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11686 
11687   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11688             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11689   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11690              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11691   ins_pipe(pipe_slow);
11692 %}
11693 
11694 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
11695                         rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
11696 %{
11697   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11698   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11699 
11700   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11701   ins_encode %{
11702     __ string_compare($str1$$Register, $str2$$Register,
11703                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11704                       $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11705   %}
11706   ins_pipe( pipe_slow );
11707 %}
11708 
11709 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11710                         rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
11711 %{
11712   predicate(UseSSE42Intrinsics);
11713   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11714   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
11715 
11716   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11717   ins_encode %{
11718     __ string_indexof($str1$$Register, $str2$$Register,
11719                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11720                       $tmp1$$XMMRegister, $tmp2$$Register);
11721   %}
11722   ins_pipe( pipe_slow );
11723 %}
11724 
11725 // fast string equals
11726 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11727                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11728 %{
11729   match(Set result (StrEquals (Binary str1 str2) cnt));
11730   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11731 
11732   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11733   ins_encode %{
11734     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11735                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11736                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11737   %}
11738   ins_pipe( pipe_slow );
11739 %}
11740 
11741 // fast array equals
11742 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11743                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11744 %{
11745   match(Set result (AryEq ary1 ary2));
11746   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11747   //ins_cost(300);
11748 
11749   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11750   ins_encode %{
11751     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11752                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11753                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11754   %}
11755   ins_pipe( pipe_slow );
11756 %}
11757 
11758 //----------Control Flow Instructions------------------------------------------
11759 // Signed compare Instructions
11760 
11761 // XXX more variants!!
11762 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11763 %{
11764   match(Set cr (CmpI op1 op2));
11765   effect(DEF cr, USE op1, USE op2);
11766 
11767   format %{ "cmpl    $op1, $op2" %}
11768   opcode(0x3B);  /* Opcode 3B /r */
11769   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11770   ins_pipe(ialu_cr_reg_reg);
11771 %}
11772 
11773 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11774 %{
11775   match(Set cr (CmpI op1 op2));
11776 
11777   format %{ "cmpl    $op1, $op2" %}
11778   opcode(0x81, 0x07); /* Opcode 81 /7 */
11779   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11780   ins_pipe(ialu_cr_reg_imm);
11781 %}
11782 
11783 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11784 %{
11785   match(Set cr (CmpI op1 (LoadI op2)));
11786 
11787   ins_cost(500); // XXX
11788   format %{ "cmpl    $op1, $op2" %}
11789   opcode(0x3B); /* Opcode 3B /r */
11790   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11791   ins_pipe(ialu_cr_reg_mem);
11792 %}
11793 
11794 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11795 %{
11796   match(Set cr (CmpI src zero));
11797 
11798   format %{ "testl   $src, $src" %}
11799   opcode(0x85);
11800   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11801   ins_pipe(ialu_cr_reg_imm);
11802 %}
11803 
11804 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11805 %{
11806   match(Set cr (CmpI (AndI src con) zero));
11807 
11808   format %{ "testl   $src, $con" %}
11809   opcode(0xF7, 0x00);
11810   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11811   ins_pipe(ialu_cr_reg_imm);
11812 %}
11813 
11814 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11815 %{
11816   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11817 
11818   format %{ "testl   $src, $mem" %}
11819   opcode(0x85);
11820   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11821   ins_pipe(ialu_cr_reg_mem);
11822 %}
11823 
11824 // Unsigned compare Instructions; really, same as signed except they
11825 // produce an rFlagsRegU instead of rFlagsReg.
11826 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11827 %{
11828   match(Set cr (CmpU op1 op2));
11829 
11830   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11831   opcode(0x3B); /* Opcode 3B /r */
11832   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11833   ins_pipe(ialu_cr_reg_reg);
11834 %}
11835 
11836 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11837 %{
11838   match(Set cr (CmpU op1 op2));
11839 
11840   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11841   opcode(0x81,0x07); /* Opcode 81 /7 */
11842   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11843   ins_pipe(ialu_cr_reg_imm);
11844 %}
11845 
11846 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11847 %{
11848   match(Set cr (CmpU op1 (LoadI op2)));
11849 
11850   ins_cost(500); // XXX
11851   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11852   opcode(0x3B); /* Opcode 3B /r */
11853   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11854   ins_pipe(ialu_cr_reg_mem);
11855 %}
11856 
11857 // // // Cisc-spilled version of cmpU_rReg
11858 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11859 // //%{
11860 // //  match(Set cr (CmpU (LoadI op1) op2));
11861 // //
11862 // //  format %{ "CMPu   $op1,$op2" %}
11863 // //  ins_cost(500);
11864 // //  opcode(0x39);  /* Opcode 39 /r */
11865 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11866 // //%}
11867 
11868 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11869 %{
11870   match(Set cr (CmpU src zero));
11871 
11872   format %{ "testl  $src, $src\t# unsigned" %}
11873   opcode(0x85);
11874   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11875   ins_pipe(ialu_cr_reg_imm);
11876 %}
11877 
11878 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11879 %{
11880   match(Set cr (CmpP op1 op2));
11881 
11882   format %{ "cmpq    $op1, $op2\t# ptr" %}
11883   opcode(0x3B); /* Opcode 3B /r */
11884   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11885   ins_pipe(ialu_cr_reg_reg);
11886 %}
11887 
11888 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11889 %{
11890   match(Set cr (CmpP op1 (LoadP op2)));
11891 
11892   ins_cost(500); // XXX
11893   format %{ "cmpq    $op1, $op2\t# ptr" %}
11894   opcode(0x3B); /* Opcode 3B /r */
11895   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11896   ins_pipe(ialu_cr_reg_mem);
11897 %}
11898 
11899 // // // Cisc-spilled version of cmpP_rReg
11900 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11901 // //%{
11902 // //  match(Set cr (CmpP (LoadP op1) op2));
11903 // //
11904 // //  format %{ "CMPu   $op1,$op2" %}
11905 // //  ins_cost(500);
11906 // //  opcode(0x39);  /* Opcode 39 /r */
11907 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11908 // //%}
11909 
11910 // XXX this is generalized by compP_rReg_mem???
11911 // Compare raw pointer (used in out-of-heap check).
11912 // Only works because non-oop pointers must be raw pointers
11913 // and raw pointers have no anti-dependencies.
11914 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11915 %{
11916   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11917   match(Set cr (CmpP op1 (LoadP op2)));
11918 
11919   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11920   opcode(0x3B); /* Opcode 3B /r */
11921   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11922   ins_pipe(ialu_cr_reg_mem);
11923 %}
11924 
11925 // This will generate a signed flags result. This should be OK since
11926 // any compare to a zero should be eq/neq.
11927 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11928 %{
11929   match(Set cr (CmpP src zero));
11930 
11931   format %{ "testq   $src, $src\t# ptr" %}
11932   opcode(0x85);
11933   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11934   ins_pipe(ialu_cr_reg_imm);
11935 %}
11936 
11937 // This will generate a signed flags result. This should be OK since
11938 // any compare to a zero should be eq/neq.
11939 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11940 %{
11941   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11942   match(Set cr (CmpP (LoadP op) zero));
11943 
11944   ins_cost(500); // XXX
11945   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11946   opcode(0xF7); /* Opcode F7 /0 */
11947   ins_encode(REX_mem_wide(op),
11948              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11949   ins_pipe(ialu_cr_reg_imm);
11950 %}
11951 
11952 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11953 %{
11954   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11955   match(Set cr (CmpP (LoadP mem) zero));
11956 
11957   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11958   ins_encode %{
11959     __ cmpq(r12, $mem$$Address);
11960   %}
11961   ins_pipe(ialu_cr_reg_mem);
11962 %}
11963 
11964 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11965 %{
11966   match(Set cr (CmpN op1 op2));
11967 
11968   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11969   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11970   ins_pipe(ialu_cr_reg_reg);
11971 %}
11972 
11973 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11974 %{
11975   match(Set cr (CmpN src (LoadN mem)));
11976 
11977   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11978   ins_encode %{
11979     __ cmpl($src$$Register, $mem$$Address);
11980   %}
11981   ins_pipe(ialu_cr_reg_mem);
11982 %}
11983 
11984 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11985   match(Set cr (CmpN op1 op2));
11986 
11987   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11988   ins_encode %{
11989     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11990   %}
11991   ins_pipe(ialu_cr_reg_imm);
11992 %}
11993 
11994 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11995 %{
11996   match(Set cr (CmpN src (LoadN mem)));
11997 
11998   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11999   ins_encode %{
12000     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12001   %}
12002   ins_pipe(ialu_cr_reg_mem);
12003 %}
12004 
12005 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12006   match(Set cr (CmpN src zero));
12007 
12008   format %{ "testl   $src, $src\t# compressed ptr" %}
12009   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12010   ins_pipe(ialu_cr_reg_imm);
12011 %}
12012 
12013 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12014 %{
12015   predicate(Universe::narrow_oop_base() != NULL);
12016   match(Set cr (CmpN (LoadN mem) zero));
12017 
12018   ins_cost(500); // XXX
12019   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12020   ins_encode %{
12021     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12022   %}
12023   ins_pipe(ialu_cr_reg_mem);
12024 %}
12025 
12026 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12027 %{
12028   predicate(Universe::narrow_oop_base() == NULL);
12029   match(Set cr (CmpN (LoadN mem) zero));
12030 
12031   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12032   ins_encode %{
12033     __ cmpl(r12, $mem$$Address);
12034   %}
12035   ins_pipe(ialu_cr_reg_mem);
12036 %}
12037 
12038 // Yanked all unsigned pointer compare operations.
12039 // Pointer compares are done with CmpP which is already unsigned.
12040 
12041 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12042 %{
12043   match(Set cr (CmpL op1 op2));
12044 
12045   format %{ "cmpq    $op1, $op2" %}
12046   opcode(0x3B);  /* Opcode 3B /r */
12047   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12048   ins_pipe(ialu_cr_reg_reg);
12049 %}
12050 
12051 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12052 %{
12053   match(Set cr (CmpL op1 op2));
12054 
12055   format %{ "cmpq    $op1, $op2" %}
12056   opcode(0x81, 0x07); /* Opcode 81 /7 */
12057   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12058   ins_pipe(ialu_cr_reg_imm);
12059 %}
12060 
12061 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12062 %{
12063   match(Set cr (CmpL op1 (LoadL op2)));
12064 
12065   format %{ "cmpq    $op1, $op2" %}
12066   opcode(0x3B); /* Opcode 3B /r */
12067   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12068   ins_pipe(ialu_cr_reg_mem);
12069 %}
12070 
12071 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12072 %{
12073   match(Set cr (CmpL src zero));
12074 
12075   format %{ "testq   $src, $src" %}
12076   opcode(0x85);
12077   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12078   ins_pipe(ialu_cr_reg_imm);
12079 %}
12080 
12081 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12082 %{
12083   match(Set cr (CmpL (AndL src con) zero));
12084 
12085   format %{ "testq   $src, $con\t# long" %}
12086   opcode(0xF7, 0x00);
12087   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12088   ins_pipe(ialu_cr_reg_imm);
12089 %}
12090 
12091 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12092 %{
12093   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12094 
12095   format %{ "testq   $src, $mem" %}
12096   opcode(0x85);
12097   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12098   ins_pipe(ialu_cr_reg_mem);
12099 %}
12100 
12101 // Manifest a CmpL result in an integer register.  Very painful.
12102 // This is the test to avoid.
12103 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12104 %{
12105   match(Set dst (CmpL3 src1 src2));
12106   effect(KILL flags);
12107 
12108   ins_cost(275); // XXX
12109   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12110             "movl    $dst, -1\n\t"
12111             "jl,s    done\n\t"
12112             "setne   $dst\n\t"
12113             "movzbl  $dst, $dst\n\t"
12114     "done:" %}
12115   ins_encode(cmpl3_flag(src1, src2, dst));
12116   ins_pipe(pipe_slow);
12117 %}
12118 
12119 //----------Max and Min--------------------------------------------------------
12120 // Min Instructions
12121 
12122 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12123 %{
12124   effect(USE_DEF dst, USE src, USE cr);
12125 
12126   format %{ "cmovlgt $dst, $src\t# min" %}
12127   opcode(0x0F, 0x4F);
12128   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12129   ins_pipe(pipe_cmov_reg);
12130 %}
12131 
12132 
12133 instruct minI_rReg(rRegI dst, rRegI src)
12134 %{
12135   match(Set dst (MinI dst src));
12136 
12137   ins_cost(200);
12138   expand %{
12139     rFlagsReg cr;
12140     compI_rReg(cr, dst, src);
12141     cmovI_reg_g(dst, src, cr);
12142   %}
12143 %}
12144 
12145 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12146 %{
12147   effect(USE_DEF dst, USE src, USE cr);
12148 
12149   format %{ "cmovllt $dst, $src\t# max" %}
12150   opcode(0x0F, 0x4C);
12151   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12152   ins_pipe(pipe_cmov_reg);
12153 %}
12154 
12155 
12156 instruct maxI_rReg(rRegI dst, rRegI src)
12157 %{
12158   match(Set dst (MaxI dst src));
12159 
12160   ins_cost(200);
12161   expand %{
12162     rFlagsReg cr;
12163     compI_rReg(cr, dst, src);
12164     cmovI_reg_l(dst, src, cr);
12165   %}
12166 %}
12167 
12168 // ============================================================================
12169 // Branch Instructions
12170 
12171 // Jump Direct - Label defines a relative address from JMP+1
12172 instruct jmpDir(label labl)
12173 %{
12174   match(Goto);
12175   effect(USE labl);
12176 
12177   ins_cost(300);
12178   format %{ "jmp     $labl" %}
12179   size(5);
12180   opcode(0xE9);
12181   ins_encode(OpcP, Lbl(labl));
12182   ins_pipe(pipe_jmp);
12183   ins_pc_relative(1);
12184 %}
12185 
12186 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12187 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12188 %{
12189   match(If cop cr);
12190   effect(USE labl);
12191 
12192   ins_cost(300);
12193   format %{ "j$cop     $labl" %}
12194   size(6);
12195   opcode(0x0F, 0x80);
12196   ins_encode(Jcc(cop, labl));
12197   ins_pipe(pipe_jcc);
12198   ins_pc_relative(1);
12199 %}
12200 
12201 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12202 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12203 %{
12204   match(CountedLoopEnd cop cr);
12205   effect(USE labl);
12206 
12207   ins_cost(300);
12208   format %{ "j$cop     $labl\t# loop end" %}
12209   size(6);
12210   opcode(0x0F, 0x80);
12211   ins_encode(Jcc(cop, labl));
12212   ins_pipe(pipe_jcc);
12213   ins_pc_relative(1);
12214 %}
12215 
12216 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12217 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12218   match(CountedLoopEnd cop cmp);
12219   effect(USE labl);
12220 
12221   ins_cost(300);
12222   format %{ "j$cop,u   $labl\t# loop end" %}
12223   size(6);
12224   opcode(0x0F, 0x80);
12225   ins_encode(Jcc(cop, labl));
12226   ins_pipe(pipe_jcc);
12227   ins_pc_relative(1);
12228 %}
12229 
12230 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12231   match(CountedLoopEnd cop cmp);
12232   effect(USE labl);
12233 
12234   ins_cost(200);
12235   format %{ "j$cop,u   $labl\t# loop end" %}
12236   size(6);
12237   opcode(0x0F, 0x80);
12238   ins_encode(Jcc(cop, labl));
12239   ins_pipe(pipe_jcc);
12240   ins_pc_relative(1);
12241 %}
12242 
12243 // Jump Direct Conditional - using unsigned comparison
12244 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12245   match(If cop cmp);
12246   effect(USE labl);
12247 
12248   ins_cost(300);
12249   format %{ "j$cop,u  $labl" %}
12250   size(6);
12251   opcode(0x0F, 0x80);
12252   ins_encode(Jcc(cop, labl));
12253   ins_pipe(pipe_jcc);
12254   ins_pc_relative(1);
12255 %}
12256 
12257 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12258   match(If cop cmp);
12259   effect(USE labl);
12260 
12261   ins_cost(200);
12262   format %{ "j$cop,u  $labl" %}
12263   size(6);
12264   opcode(0x0F, 0x80);
12265   ins_encode(Jcc(cop, labl));
12266   ins_pipe(pipe_jcc);
12267   ins_pc_relative(1);
12268 %}
12269 
12270 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12271   match(If cop cmp);
12272   effect(USE labl);
12273 
12274   ins_cost(200);
12275   format %{ $$template
12276     if ($cop$$cmpcode == Assembler::notEqual) {
12277       $$emit$$"jp,u   $labl\n\t"
12278       $$emit$$"j$cop,u   $labl"
12279     } else {
12280       $$emit$$"jp,u   done\n\t"
12281       $$emit$$"j$cop,u   $labl\n\t"
12282       $$emit$$"done:"
12283     }
12284   %}
12285   size(12);
12286   opcode(0x0F, 0x80);
12287   ins_encode %{
12288     Label* l = $labl$$label;
12289     $$$emit8$primary;
12290     emit_cc(cbuf, $secondary, Assembler::parity);
12291     int parity_disp = -1;
12292     if ($cop$$cmpcode == Assembler::notEqual) {
12293        // the two jumps 6 bytes apart so the jump distances are too
12294        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12295     } else if ($cop$$cmpcode == Assembler::equal) {
12296        parity_disp = 6;
12297     } else {
12298        ShouldNotReachHere();
12299     }
12300     emit_d32(cbuf, parity_disp);
12301     $$$emit8$primary;
12302     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12303     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12304     emit_d32(cbuf, disp);
12305   %}
12306   ins_pipe(pipe_jcc);
12307   ins_pc_relative(1);
12308 %}
12309 
12310 // ============================================================================
12311 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12312 // superklass array for an instance of the superklass.  Set a hidden
12313 // internal cache on a hit (cache is checked with exposed code in
12314 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12315 // encoding ALSO sets flags.
12316 
12317 instruct partialSubtypeCheck(rdi_RegP result,
12318                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12319                              rFlagsReg cr)
12320 %{
12321   match(Set result (PartialSubtypeCheck sub super));
12322   effect(KILL rcx, KILL cr);
12323 
12324   ins_cost(1100);  // slightly larger than the next version
12325   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12326             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12327             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12328             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12329             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12330             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12331             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12332     "miss:\t" %}
12333 
12334   opcode(0x1); // Force a XOR of RDI
12335   ins_encode(enc_PartialSubtypeCheck());
12336   ins_pipe(pipe_slow);
12337 %}
12338 
12339 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12340                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12341                                      immP0 zero,
12342                                      rdi_RegP result)
12343 %{
12344   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12345   effect(KILL rcx, KILL result);
12346 
12347   ins_cost(1000);
12348   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12349             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12350             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12351             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12352             "jne,s   miss\t\t# Missed: flags nz\n\t"
12353             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12354     "miss:\t" %}
12355 
12356   opcode(0x0); // No need to XOR RDI
12357   ins_encode(enc_PartialSubtypeCheck());
12358   ins_pipe(pipe_slow);
12359 %}
12360 
12361 // ============================================================================
12362 // Branch Instructions -- short offset versions
12363 //
12364 // These instructions are used to replace jumps of a long offset (the default
12365 // match) with jumps of a shorter offset.  These instructions are all tagged
12366 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12367 // match rules in general matching.  Instead, the ADLC generates a conversion
12368 // method in the MachNode which can be used to do in-place replacement of the
12369 // long variant with the shorter variant.  The compiler will determine if a
12370 // branch can be taken by the is_short_branch_offset() predicate in the machine
12371 // specific code section of the file.
12372 
12373 // Jump Direct - Label defines a relative address from JMP+1
12374 instruct jmpDir_short(label labl) %{
12375   match(Goto);
12376   effect(USE labl);
12377 
12378   ins_cost(300);
12379   format %{ "jmp,s   $labl" %}
12380   size(2);
12381   opcode(0xEB);
12382   ins_encode(OpcP, LblShort(labl));
12383   ins_pipe(pipe_jmp);
12384   ins_pc_relative(1);
12385   ins_short_branch(1);
12386 %}
12387 
12388 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12389 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12390   match(If cop cr);
12391   effect(USE labl);
12392 
12393   ins_cost(300);
12394   format %{ "j$cop,s   $labl" %}
12395   size(2);
12396   opcode(0x70);
12397   ins_encode(JccShort(cop, labl));
12398   ins_pipe(pipe_jcc);
12399   ins_pc_relative(1);
12400   ins_short_branch(1);
12401 %}
12402 
12403 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12404 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12405   match(CountedLoopEnd cop cr);
12406   effect(USE labl);
12407 
12408   ins_cost(300);
12409   format %{ "j$cop,s   $labl\t# loop end" %}
12410   size(2);
12411   opcode(0x70);
12412   ins_encode(JccShort(cop, labl));
12413   ins_pipe(pipe_jcc);
12414   ins_pc_relative(1);
12415   ins_short_branch(1);
12416 %}
12417 
12418 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12419 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12420   match(CountedLoopEnd cop cmp);
12421   effect(USE labl);
12422 
12423   ins_cost(300);
12424   format %{ "j$cop,us  $labl\t# loop end" %}
12425   size(2);
12426   opcode(0x70);
12427   ins_encode(JccShort(cop, labl));
12428   ins_pipe(pipe_jcc);
12429   ins_pc_relative(1);
12430   ins_short_branch(1);
12431 %}
12432 
12433 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12434   match(CountedLoopEnd cop cmp);
12435   effect(USE labl);
12436 
12437   ins_cost(300);
12438   format %{ "j$cop,us  $labl\t# loop end" %}
12439   size(2);
12440   opcode(0x70);
12441   ins_encode(JccShort(cop, labl));
12442   ins_pipe(pipe_jcc);
12443   ins_pc_relative(1);
12444   ins_short_branch(1);
12445 %}
12446 
12447 // Jump Direct Conditional - using unsigned comparison
12448 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12449   match(If cop cmp);
12450   effect(USE labl);
12451 
12452   ins_cost(300);
12453   format %{ "j$cop,us  $labl" %}
12454   size(2);
12455   opcode(0x70);
12456   ins_encode(JccShort(cop, labl));
12457   ins_pipe(pipe_jcc);
12458   ins_pc_relative(1);
12459   ins_short_branch(1);
12460 %}
12461 
12462 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12463   match(If cop cmp);
12464   effect(USE labl);
12465 
12466   ins_cost(300);
12467   format %{ "j$cop,us  $labl" %}
12468   size(2);
12469   opcode(0x70);
12470   ins_encode(JccShort(cop, labl));
12471   ins_pipe(pipe_jcc);
12472   ins_pc_relative(1);
12473   ins_short_branch(1);
12474 %}
12475 
12476 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12477   match(If cop cmp);
12478   effect(USE labl);
12479 
12480   ins_cost(300);
12481   format %{ $$template
12482     if ($cop$$cmpcode == Assembler::notEqual) {
12483       $$emit$$"jp,u,s   $labl\n\t"
12484       $$emit$$"j$cop,u,s   $labl"
12485     } else {
12486       $$emit$$"jp,u,s   done\n\t"
12487       $$emit$$"j$cop,u,s  $labl\n\t"
12488       $$emit$$"done:"
12489     }
12490   %}
12491   size(4);
12492   opcode(0x70);
12493   ins_encode %{
12494     Label* l = $labl$$label;
12495     emit_cc(cbuf, $primary, Assembler::parity);
12496     int parity_disp = -1;
12497     if ($cop$$cmpcode == Assembler::notEqual) {
12498       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12499     } else if ($cop$$cmpcode == Assembler::equal) {
12500       parity_disp = 2;
12501     } else {
12502       ShouldNotReachHere();
12503     }
12504     emit_d8(cbuf, parity_disp);
12505     emit_cc(cbuf, $primary, $cop$$cmpcode);
12506     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12507     emit_d8(cbuf, disp);
12508     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12509     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12510   %}
12511   ins_pipe(pipe_jcc);
12512   ins_pc_relative(1);
12513   ins_short_branch(1);
12514 %}
12515 
12516 // ============================================================================
12517 // inlined locking and unlocking
12518 
12519 instruct cmpFastLock(rFlagsReg cr,
12520                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12521 %{
12522   match(Set cr (FastLock object box));
12523   effect(TEMP tmp, TEMP scr);
12524 
12525   ins_cost(300);
12526   format %{ "fastlock $object,$box,$tmp,$scr" %}
12527   ins_encode(Fast_Lock(object, box, tmp, scr));
12528   ins_pipe(pipe_slow);
12529   ins_pc_relative(1);
12530 %}
12531 
12532 instruct cmpFastUnlock(rFlagsReg cr,
12533                        rRegP object, rax_RegP box, rRegP tmp)
12534 %{
12535   match(Set cr (FastUnlock object box));
12536   effect(TEMP tmp);
12537 
12538   ins_cost(300);
12539   format %{ "fastunlock $object, $box, $tmp" %}
12540   ins_encode(Fast_Unlock(object, box, tmp));
12541   ins_pipe(pipe_slow);
12542   ins_pc_relative(1);
12543 %}
12544 
12545 
12546 // ============================================================================
12547 // Safepoint Instructions
12548 instruct safePoint_poll(rFlagsReg cr)
12549 %{
12550   match(SafePoint);
12551   effect(KILL cr);
12552 
12553   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12554             "# Safepoint: poll for GC" %}
12555   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12556   ins_cost(125);
12557   ins_encode(enc_safepoint_poll);
12558   ins_pipe(ialu_reg_mem);
12559 %}
12560 
12561 // ============================================================================
12562 // Procedure Call/Return Instructions
12563 // Call Java Static Instruction
12564 // Note: If this code changes, the corresponding ret_addr_offset() and
12565 //       compute_padding() functions will have to be adjusted.
12566 instruct CallStaticJavaDirect(method meth) %{
12567   match(CallStaticJava);
12568   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12569   effect(USE meth);
12570 
12571   ins_cost(300);
12572   format %{ "call,static " %}
12573   opcode(0xE8); /* E8 cd */
12574   ins_encode(Java_Static_Call(meth), call_epilog);
12575   ins_pipe(pipe_slow);
12576   ins_pc_relative(1);
12577   ins_alignment(4);
12578 %}
12579 
12580 // Call Java Static Instruction (method handle version)
12581 // Note: If this code changes, the corresponding ret_addr_offset() and
12582 //       compute_padding() functions will have to be adjusted.
12583 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp) %{
12584   match(CallStaticJava);
12585   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12586   effect(USE meth);
12587   // RBP is saved by all callees (for interpreter stack correction).
12588   // We use it here for a similar purpose, in {preserve,restore}_SP.
12589 
12590   ins_cost(300);
12591   format %{ "call,static/MethodHandle " %}
12592   opcode(0xE8); /* E8 cd */
12593   ins_encode(preserve_SP,
12594              Java_Static_Call(meth),
12595              restore_SP,
12596              call_epilog);
12597   ins_pipe(pipe_slow);
12598   ins_pc_relative(1);
12599   ins_alignment(4);
12600 %}
12601 
12602 // Call Java Dynamic Instruction
12603 // Note: If this code changes, the corresponding ret_addr_offset() and
12604 //       compute_padding() functions will have to be adjusted.
12605 instruct CallDynamicJavaDirect(method meth)
12606 %{
12607   match(CallDynamicJava);
12608   effect(USE meth);
12609 
12610   ins_cost(300);
12611   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12612             "call,dynamic " %}
12613   opcode(0xE8); /* E8 cd */
12614   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12615   ins_pipe(pipe_slow);
12616   ins_pc_relative(1);
12617   ins_alignment(4);
12618 %}
12619 
12620 // Call Runtime Instruction
12621 instruct CallRuntimeDirect(method meth)
12622 %{
12623   match(CallRuntime);
12624   effect(USE meth);
12625 
12626   ins_cost(300);
12627   format %{ "call,runtime " %}
12628   opcode(0xE8); /* E8 cd */
12629   ins_encode(Java_To_Runtime(meth));
12630   ins_pipe(pipe_slow);
12631   ins_pc_relative(1);
12632 %}
12633 
12634 // Call runtime without safepoint
12635 instruct CallLeafDirect(method meth)
12636 %{
12637   match(CallLeaf);
12638   effect(USE meth);
12639 
12640   ins_cost(300);
12641   format %{ "call_leaf,runtime " %}
12642   opcode(0xE8); /* E8 cd */
12643   ins_encode(Java_To_Runtime(meth));
12644   ins_pipe(pipe_slow);
12645   ins_pc_relative(1);
12646 %}
12647 
12648 // Call runtime without safepoint
12649 instruct CallLeafNoFPDirect(method meth)
12650 %{
12651   match(CallLeafNoFP);
12652   effect(USE meth);
12653 
12654   ins_cost(300);
12655   format %{ "call_leaf_nofp,runtime " %}
12656   opcode(0xE8); /* E8 cd */
12657   ins_encode(Java_To_Runtime(meth));
12658   ins_pipe(pipe_slow);
12659   ins_pc_relative(1);
12660 %}
12661 
12662 // Return Instruction
12663 // Remove the return address & jump to it.
12664 // Notice: We always emit a nop after a ret to make sure there is room
12665 // for safepoint patching
12666 instruct Ret()
12667 %{
12668   match(Return);
12669 
12670   format %{ "ret" %}
12671   opcode(0xC3);
12672   ins_encode(OpcP);
12673   ins_pipe(pipe_jmp);
12674 %}
12675 
12676 // Tail Call; Jump from runtime stub to Java code.
12677 // Also known as an 'interprocedural jump'.
12678 // Target of jump will eventually return to caller.
12679 // TailJump below removes the return address.
12680 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12681 %{
12682   match(TailCall jump_target method_oop);
12683 
12684   ins_cost(300);
12685   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12686   opcode(0xFF, 0x4); /* Opcode FF /4 */
12687   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12688   ins_pipe(pipe_jmp);
12689 %}
12690 
12691 // Tail Jump; remove the return address; jump to target.
12692 // TailCall above leaves the return address around.
12693 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12694 %{
12695   match(TailJump jump_target ex_oop);
12696 
12697   ins_cost(300);
12698   format %{ "popq    rdx\t# pop return address\n\t"
12699             "jmp     $jump_target" %}
12700   opcode(0xFF, 0x4); /* Opcode FF /4 */
12701   ins_encode(Opcode(0x5a), // popq rdx
12702              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12703   ins_pipe(pipe_jmp);
12704 %}
12705 
12706 // Create exception oop: created by stack-crawling runtime code.
12707 // Created exception is now available to this handler, and is setup
12708 // just prior to jumping to this handler.  No code emitted.
12709 instruct CreateException(rax_RegP ex_oop)
12710 %{
12711   match(Set ex_oop (CreateEx));
12712 
12713   size(0);
12714   // use the following format syntax
12715   format %{ "# exception oop is in rax; no code emitted" %}
12716   ins_encode();
12717   ins_pipe(empty);
12718 %}
12719 
12720 // Rethrow exception:
12721 // The exception oop will come in the first argument position.
12722 // Then JUMP (not call) to the rethrow stub code.
12723 instruct RethrowException()
12724 %{
12725   match(Rethrow);
12726 
12727   // use the following format syntax
12728   format %{ "jmp     rethrow_stub" %}
12729   ins_encode(enc_rethrow);
12730   ins_pipe(pipe_jmp);
12731 %}
12732 
12733 
12734 //----------PEEPHOLE RULES-----------------------------------------------------
12735 // These must follow all instruction definitions as they use the names
12736 // defined in the instructions definitions.
12737 //
12738 // peepmatch ( root_instr_name [preceding_instruction]* );
12739 //
12740 // peepconstraint %{
12741 // (instruction_number.operand_name relational_op instruction_number.operand_name
12742 //  [, ...] );
12743 // // instruction numbers are zero-based using left to right order in peepmatch
12744 //
12745 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12746 // // provide an instruction_number.operand_name for each operand that appears
12747 // // in the replacement instruction's match rule
12748 //
12749 // ---------VM FLAGS---------------------------------------------------------
12750 //
12751 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12752 //
12753 // Each peephole rule is given an identifying number starting with zero and
12754 // increasing by one in the order seen by the parser.  An individual peephole
12755 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12756 // on the command-line.
12757 //
12758 // ---------CURRENT LIMITATIONS----------------------------------------------
12759 //
12760 // Only match adjacent instructions in same basic block
12761 // Only equality constraints
12762 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12763 // Only one replacement instruction
12764 //
12765 // ---------EXAMPLE----------------------------------------------------------
12766 //
12767 // // pertinent parts of existing instructions in architecture description
12768 // instruct movI(rRegI dst, rRegI src)
12769 // %{
12770 //   match(Set dst (CopyI src));
12771 // %}
12772 //
12773 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12774 // %{
12775 //   match(Set dst (AddI dst src));
12776 //   effect(KILL cr);
12777 // %}
12778 //
12779 // // Change (inc mov) to lea
12780 // peephole %{
12781 //   // increment preceeded by register-register move
12782 //   peepmatch ( incI_rReg movI );
12783 //   // require that the destination register of the increment
12784 //   // match the destination register of the move
12785 //   peepconstraint ( 0.dst == 1.dst );
12786 //   // construct a replacement instruction that sets
12787 //   // the destination to ( move's source register + one )
12788 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12789 // %}
12790 //
12791 
12792 // Implementation no longer uses movX instructions since
12793 // machine-independent system no longer uses CopyX nodes.
12794 //
12795 // peephole
12796 // %{
12797 //   peepmatch (incI_rReg movI);
12798 //   peepconstraint (0.dst == 1.dst);
12799 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12800 // %}
12801 
12802 // peephole
12803 // %{
12804 //   peepmatch (decI_rReg movI);
12805 //   peepconstraint (0.dst == 1.dst);
12806 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12807 // %}
12808 
12809 // peephole
12810 // %{
12811 //   peepmatch (addI_rReg_imm movI);
12812 //   peepconstraint (0.dst == 1.dst);
12813 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12814 // %}
12815 
12816 // peephole
12817 // %{
12818 //   peepmatch (incL_rReg movL);
12819 //   peepconstraint (0.dst == 1.dst);
12820 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12821 // %}
12822 
12823 // peephole
12824 // %{
12825 //   peepmatch (decL_rReg movL);
12826 //   peepconstraint (0.dst == 1.dst);
12827 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12828 // %}
12829 
12830 // peephole
12831 // %{
12832 //   peepmatch (addL_rReg_imm movL);
12833 //   peepconstraint (0.dst == 1.dst);
12834 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12835 // %}
12836 
12837 // peephole
12838 // %{
12839 //   peepmatch (addP_rReg_imm movP);
12840 //   peepconstraint (0.dst == 1.dst);
12841 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12842 // %}
12843 
12844 // // Change load of spilled value to only a spill
12845 // instruct storeI(memory mem, rRegI src)
12846 // %{
12847 //   match(Set mem (StoreI mem src));
12848 // %}
12849 //
12850 // instruct loadI(rRegI dst, memory mem)
12851 // %{
12852 //   match(Set dst (LoadI mem));
12853 // %}
12854 //
12855 
12856 peephole
12857 %{
12858   peepmatch (loadI storeI);
12859   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12860   peepreplace (storeI(1.mem 1.mem 1.src));
12861 %}
12862 
12863 peephole
12864 %{
12865   peepmatch (loadL storeL);
12866   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12867   peepreplace (storeL(1.mem 1.mem 1.src));
12868 %}
12869 
12870 //----------SMARTSPILL RULES---------------------------------------------------
12871 // These must follow all instruction definitions as they use the names
12872 // defined in the instructions definitions.