1 //
   2 // Copyright 2003-2010 Sun Microsystems, Inc.  All Rights Reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20 // CA 95054 USA or visit www.sun.com if you need additional information or
  21 // have any questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input.
 578 // Since amd64 does not have absolute addressing but RIP-relative
 579 // addressing and the polling page is within 2G, it doesn't.
 580 bool SafePointNode::needs_polling_address_input()
 581 {
 582   return false;
 583 }
 584 
 585 //
 586 // Compute padding required for nodes which need alignment
 587 //
 588 
 589 // The address of the call instruction needs to be 4-byte aligned to
 590 // ensure that it does not span a cache line so that it can be patched.
 591 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 592 {
 593   current_offset += 1; // skip call opcode byte
 594   return round_to(current_offset, alignment_required()) - current_offset;
 595 }
 596 
 597 // The address of the call instruction needs to be 4-byte aligned to
 598 // ensure that it does not span a cache line so that it can be patched.
 599 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 600 {
 601   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 602   current_offset += 1; // skip call opcode byte
 603   return round_to(current_offset, alignment_required()) - current_offset;
 604 }
 605 
 606 // The address of the call instruction needs to be 4-byte aligned to
 607 // ensure that it does not span a cache line so that it can be patched.
 608 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 609 {
 610   current_offset += 11; // skip movq instruction + call opcode byte
 611   return round_to(current_offset, alignment_required()) - current_offset;
 612 }
 613 
 614 #ifndef PRODUCT
 615 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 616 {
 617   st->print("INT3");
 618 }
 619 #endif
 620 
 621 // EMIT_RM()
 622 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3)
 623 {
 624   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 625   *(cbuf.code_end()) = c;
 626   cbuf.set_code_end(cbuf.code_end() + 1);
 627 }
 628 
 629 // EMIT_CC()
 630 void emit_cc(CodeBuffer &cbuf, int f1, int f2)
 631 {
 632   unsigned char c = (unsigned char) (f1 | f2);
 633   *(cbuf.code_end()) = c;
 634   cbuf.set_code_end(cbuf.code_end() + 1);
 635 }
 636 
 637 // EMIT_OPCODE()
 638 void emit_opcode(CodeBuffer &cbuf, int code)
 639 {
 640   *(cbuf.code_end()) = (unsigned char) code;
 641   cbuf.set_code_end(cbuf.code_end() + 1);
 642 }
 643 
 644 // EMIT_OPCODE() w/ relocation information
 645 void emit_opcode(CodeBuffer &cbuf,
 646                  int code, relocInfo::relocType reloc, int offset, int format)
 647 {
 648   cbuf.relocate(cbuf.inst_mark() + offset, reloc, format);
 649   emit_opcode(cbuf, code);
 650 }
 651 
 652 // EMIT_D8()
 653 void emit_d8(CodeBuffer &cbuf, int d8)
 654 {
 655   *(cbuf.code_end()) = (unsigned char) d8;
 656   cbuf.set_code_end(cbuf.code_end() + 1);
 657 }
 658 
 659 // EMIT_D16()
 660 void emit_d16(CodeBuffer &cbuf, int d16)
 661 {
 662   *((short *)(cbuf.code_end())) = d16;
 663   cbuf.set_code_end(cbuf.code_end() + 2);
 664 }
 665 
 666 // EMIT_D32()
 667 void emit_d32(CodeBuffer &cbuf, int d32)
 668 {
 669   *((int *)(cbuf.code_end())) = d32;
 670   cbuf.set_code_end(cbuf.code_end() + 4);
 671 }
 672 
 673 // EMIT_D64()
 674 void emit_d64(CodeBuffer &cbuf, int64_t d64)
 675 {
 676   *((int64_t*) (cbuf.code_end())) = d64;
 677   cbuf.set_code_end(cbuf.code_end() + 8);
 678 }
 679 
 680 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 681 void emit_d32_reloc(CodeBuffer& cbuf,
 682                     int d32,
 683                     relocInfo::relocType reloc,
 684                     int format)
 685 {
 686   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 687   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 688 
 689   *((int*) (cbuf.code_end())) = d32;
 690   cbuf.set_code_end(cbuf.code_end() + 4);
 691 }
 692 
 693 // emit 32 bit value and construct relocation entry from RelocationHolder
 694 void emit_d32_reloc(CodeBuffer& cbuf,
 695                     int d32,
 696                     RelocationHolder const& rspec,
 697                     int format)
 698 {
 699 #ifdef ASSERT
 700   if (rspec.reloc()->type() == relocInfo::oop_type &&
 701       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 702     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 703   }
 704 #endif
 705   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 706 
 707   *((int* )(cbuf.code_end())) = d32;
 708   cbuf.set_code_end(cbuf.code_end() + 4);
 709 }
 710 
 711 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 712   address next_ip = cbuf.code_end() + 4;
 713   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 714                  external_word_Relocation::spec(addr),
 715                  RELOC_DISP32);
 716 }
 717 
 718 
 719 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 720 void emit_d64_reloc(CodeBuffer& cbuf,
 721                     int64_t d64,
 722                     relocInfo::relocType reloc,
 723                     int format)
 724 {
 725   cbuf.relocate(cbuf.inst_mark(), reloc, format);
 726 
 727   *((int64_t*) (cbuf.code_end())) = d64;
 728   cbuf.set_code_end(cbuf.code_end() + 8);
 729 }
 730 
 731 // emit 64 bit value and construct relocation entry from RelocationHolder
 732 void emit_d64_reloc(CodeBuffer& cbuf,
 733                     int64_t d64,
 734                     RelocationHolder const& rspec,
 735                     int format)
 736 {
 737 #ifdef ASSERT
 738   if (rspec.reloc()->type() == relocInfo::oop_type &&
 739       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 740     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 741            "cannot embed scavengable oops in code");
 742   }
 743 #endif
 744   cbuf.relocate(cbuf.inst_mark(), rspec, format);
 745 
 746   *((int64_t*) (cbuf.code_end())) = d64;
 747   cbuf.set_code_end(cbuf.code_end() + 8);
 748 }
 749 
 750 // Access stack slot for load or store
 751 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 752 {
 753   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 754   if (-0x80 <= disp && disp < 0x80) {
 755     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 756     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 757     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 758   } else {
 759     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 760     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 761     emit_d32(cbuf, disp);     // Displacement // R/M byte
 762   }
 763 }
 764 
 765    // rRegI ereg, memory mem) %{    // emit_reg_mem
 766 void encode_RegMem(CodeBuffer &cbuf,
 767                    int reg,
 768                    int base, int index, int scale, int disp, bool disp_is_oop)
 769 {
 770   assert(!disp_is_oop, "cannot have disp");
 771   int regenc = reg & 7;
 772   int baseenc = base & 7;
 773   int indexenc = index & 7;
 774 
 775   // There is no index & no scale, use form without SIB byte
 776   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 777     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 778     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 779       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 780     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 781       // If 8-bit displacement, mode 0x1
 782       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 783       emit_d8(cbuf, disp);
 784     } else {
 785       // If 32-bit displacement
 786       if (base == -1) { // Special flag for absolute address
 787         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 788         if (disp_is_oop) {
 789           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 790         } else {
 791           emit_d32(cbuf, disp);
 792         }
 793       } else {
 794         // Normal base + offset
 795         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 796         if (disp_is_oop) {
 797           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 798         } else {
 799           emit_d32(cbuf, disp);
 800         }
 801       }
 802     }
 803   } else {
 804     // Else, encode with the SIB byte
 805     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 806     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 807       // If no displacement
 808       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 809       emit_rm(cbuf, scale, indexenc, baseenc);
 810     } else {
 811       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 812         // If 8-bit displacement, mode 0x1
 813         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 814         emit_rm(cbuf, scale, indexenc, baseenc);
 815         emit_d8(cbuf, disp);
 816       } else {
 817         // If 32-bit displacement
 818         if (base == 0x04 ) {
 819           emit_rm(cbuf, 0x2, regenc, 0x4);
 820           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 821         } else {
 822           emit_rm(cbuf, 0x2, regenc, 0x4);
 823           emit_rm(cbuf, scale, indexenc, baseenc); // *
 824         }
 825         if (disp_is_oop) {
 826           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 827         } else {
 828           emit_d32(cbuf, disp);
 829         }
 830       }
 831     }
 832   }
 833 }
 834 
 835 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 836 {
 837   if (dstenc != srcenc) {
 838     if (dstenc < 8) {
 839       if (srcenc >= 8) {
 840         emit_opcode(cbuf, Assembler::REX_B);
 841         srcenc -= 8;
 842       }
 843     } else {
 844       if (srcenc < 8) {
 845         emit_opcode(cbuf, Assembler::REX_R);
 846       } else {
 847         emit_opcode(cbuf, Assembler::REX_RB);
 848         srcenc -= 8;
 849       }
 850       dstenc -= 8;
 851     }
 852 
 853     emit_opcode(cbuf, 0x8B);
 854     emit_rm(cbuf, 0x3, dstenc, srcenc);
 855   }
 856 }
 857 
 858 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 859   if( dst_encoding == src_encoding ) {
 860     // reg-reg copy, use an empty encoding
 861   } else {
 862     MacroAssembler _masm(&cbuf);
 863 
 864     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 865   }
 866 }
 867 
 868 
 869 //=============================================================================
 870 #ifndef PRODUCT
 871 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 872 {
 873   Compile* C = ra_->C;
 874 
 875   int framesize = C->frame_slots() << LogBytesPerInt;
 876   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 877   // Remove wordSize for return adr already pushed
 878   // and another for the RBP we are going to save
 879   framesize -= 2*wordSize;
 880   bool need_nop = true;
 881 
 882   // Calls to C2R adapters often do not accept exceptional returns.
 883   // We require that their callers must bang for them.  But be
 884   // careful, because some VM calls (such as call site linkage) can
 885   // use several kilobytes of stack.  But the stack safety zone should
 886   // account for that.  See bugs 4446381, 4468289, 4497237.
 887   if (C->need_stack_bang(framesize)) {
 888     st->print_cr("# stack bang"); st->print("\t");
 889     need_nop = false;
 890   }
 891   st->print_cr("pushq   rbp"); st->print("\t");
 892 
 893   if (VerifyStackAtCalls) {
 894     // Majik cookie to verify stack depth
 895     st->print_cr("pushq   0xffffffffbadb100d"
 896                   "\t# Majik cookie for stack depth check");
 897     st->print("\t");
 898     framesize -= wordSize; // Remove 2 for cookie
 899     need_nop = false;
 900   }
 901 
 902   if (framesize) {
 903     st->print("subq    rsp, #%d\t# Create frame", framesize);
 904     if (framesize < 0x80 && need_nop) {
 905       st->print("\n\tnop\t# nop for patch_verified_entry");
 906     }
 907   }
 908 }
 909 #endif
 910 
 911 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 912 {
 913   Compile* C = ra_->C;
 914 
 915   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 916   // NativeJump::patch_verified_entry will be able to patch out the entry
 917   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 918   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 919   // 6 bytes. So if we don't do the fldcw or the push then we must
 920   // use the 6 byte frame allocation even if we have no frame. :-(
 921   // If method sets FPU control word do it now
 922 
 923   int framesize = C->frame_slots() << LogBytesPerInt;
 924   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 925   // Remove wordSize for return adr already pushed
 926   // and another for the RBP we are going to save
 927   framesize -= 2*wordSize;
 928   bool need_nop = true;
 929 
 930   // Calls to C2R adapters often do not accept exceptional returns.
 931   // We require that their callers must bang for them.  But be
 932   // careful, because some VM calls (such as call site linkage) can
 933   // use several kilobytes of stack.  But the stack safety zone should
 934   // account for that.  See bugs 4446381, 4468289, 4497237.
 935   if (C->need_stack_bang(framesize)) {
 936     MacroAssembler masm(&cbuf);
 937     masm.generate_stack_overflow_check(framesize);
 938     need_nop = false;
 939   }
 940 
 941   // We always push rbp so that on return to interpreter rbp will be
 942   // restored correctly and we can correct the stack.
 943   emit_opcode(cbuf, 0x50 | RBP_enc);
 944 
 945   if (VerifyStackAtCalls) {
 946     // Majik cookie to verify stack depth
 947     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 948     emit_d32(cbuf, 0xbadb100d);
 949     framesize -= wordSize; // Remove 2 for cookie
 950     need_nop = false;
 951   }
 952 
 953   if (framesize) {
 954     emit_opcode(cbuf, Assembler::REX_W);
 955     if (framesize < 0x80) {
 956       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 957       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 958       emit_d8(cbuf, framesize);
 959       if (need_nop) {
 960         emit_opcode(cbuf, 0x90); // nop
 961       }
 962     } else {
 963       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 964       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 965       emit_d32(cbuf, framesize);
 966     }
 967   }
 968 
 969   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 970 
 971 #ifdef ASSERT
 972   if (VerifyStackAtCalls) {
 973     Label L;
 974     MacroAssembler masm(&cbuf);
 975     masm.push(rax);
 976     masm.mov(rax, rsp);
 977     masm.andptr(rax, StackAlignmentInBytes-1);
 978     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 979     masm.pop(rax);
 980     masm.jcc(Assembler::equal, L);
 981     masm.stop("Stack is not properly aligned!");
 982     masm.bind(L);
 983   }
 984 #endif
 985 }
 986 
 987 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 988 {
 989   return MachNode::size(ra_); // too many variables; just compute it
 990                               // the hard way
 991 }
 992 
 993 int MachPrologNode::reloc() const
 994 {
 995   return 0; // a large enough number
 996 }
 997 
 998 //=============================================================================
 999 #ifndef PRODUCT
1000 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1001 {
1002   Compile* C = ra_->C;
1003   int framesize = C->frame_slots() << LogBytesPerInt;
1004   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1005   // Remove word for return adr already pushed
1006   // and RBP
1007   framesize -= 2*wordSize;
1008 
1009   if (framesize) {
1010     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
1011     st->print("\t");
1012   }
1013 
1014   st->print_cr("popq\trbp");
1015   if (do_polling() && C->is_method_compilation()) {
1016     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1017                   "# Safepoint: poll for GC");
1018     st->print("\t");
1019   }
1020 }
1021 #endif
1022 
1023 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1024 {
1025   Compile* C = ra_->C;
1026   int framesize = C->frame_slots() << LogBytesPerInt;
1027   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1028   // Remove word for return adr already pushed
1029   // and RBP
1030   framesize -= 2*wordSize;
1031 
1032   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1033 
1034   if (framesize) {
1035     emit_opcode(cbuf, Assembler::REX_W);
1036     if (framesize < 0x80) {
1037       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1038       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1039       emit_d8(cbuf, framesize);
1040     } else {
1041       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1042       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1043       emit_d32(cbuf, framesize);
1044     }
1045   }
1046 
1047   // popq rbp
1048   emit_opcode(cbuf, 0x58 | RBP_enc);
1049 
1050   if (do_polling() && C->is_method_compilation()) {
1051     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1052     // XXX reg_mem doesn't support RIP-relative addressing yet
1053     cbuf.set_inst_mark();
1054     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_return_type, 0); // XXX
1055     emit_opcode(cbuf, 0x85); // testl
1056     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1057     // cbuf.inst_mark() is beginning of instruction
1058     emit_d32_reloc(cbuf, os::get_polling_page());
1059 //                    relocInfo::poll_return_type,
1060   }
1061 }
1062 
1063 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1064 {
1065   Compile* C = ra_->C;
1066   int framesize = C->frame_slots() << LogBytesPerInt;
1067   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1068   // Remove word for return adr already pushed
1069   // and RBP
1070   framesize -= 2*wordSize;
1071 
1072   uint size = 0;
1073 
1074   if (do_polling() && C->is_method_compilation()) {
1075     size += 6;
1076   }
1077 
1078   // count popq rbp
1079   size++;
1080 
1081   if (framesize) {
1082     if (framesize < 0x80) {
1083       size += 4;
1084     } else if (framesize) {
1085       size += 7;
1086     }
1087   }
1088 
1089   return size;
1090 }
1091 
1092 int MachEpilogNode::reloc() const
1093 {
1094   return 2; // a large enough number
1095 }
1096 
1097 const Pipeline* MachEpilogNode::pipeline() const
1098 {
1099   return MachNode::pipeline_class();
1100 }
1101 
1102 int MachEpilogNode::safepoint_offset() const
1103 {
1104   return 0;
1105 }
1106 
1107 //=============================================================================
1108 
1109 enum RC {
1110   rc_bad,
1111   rc_int,
1112   rc_float,
1113   rc_stack
1114 };
1115 
1116 static enum RC rc_class(OptoReg::Name reg)
1117 {
1118   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1119 
1120   if (OptoReg::is_stack(reg)) return rc_stack;
1121 
1122   VMReg r = OptoReg::as_VMReg(reg);
1123 
1124   if (r->is_Register()) return rc_int;
1125 
1126   assert(r->is_XMMRegister(), "must be");
1127   return rc_float;
1128 }
1129 
1130 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1131                                        PhaseRegAlloc* ra_,
1132                                        bool do_size,
1133                                        outputStream* st) const
1134 {
1135 
1136   // Get registers to move
1137   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1138   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1139   OptoReg::Name dst_second = ra_->get_reg_second(this);
1140   OptoReg::Name dst_first = ra_->get_reg_first(this);
1141 
1142   enum RC src_second_rc = rc_class(src_second);
1143   enum RC src_first_rc = rc_class(src_first);
1144   enum RC dst_second_rc = rc_class(dst_second);
1145   enum RC dst_first_rc = rc_class(dst_first);
1146 
1147   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1148          "must move at least 1 register" );
1149 
1150   if (src_first == dst_first && src_second == dst_second) {
1151     // Self copy, no move
1152     return 0;
1153   } else if (src_first_rc == rc_stack) {
1154     // mem ->
1155     if (dst_first_rc == rc_stack) {
1156       // mem -> mem
1157       assert(src_second != dst_first, "overlap");
1158       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1159           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1160         // 64-bit
1161         int src_offset = ra_->reg2offset(src_first);
1162         int dst_offset = ra_->reg2offset(dst_first);
1163         if (cbuf) {
1164           emit_opcode(*cbuf, 0xFF);
1165           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1166 
1167           emit_opcode(*cbuf, 0x8F);
1168           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1169 
1170 #ifndef PRODUCT
1171         } else if (!do_size) {
1172           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1173                      "popq    [rsp + #%d]",
1174                      src_offset,
1175                      dst_offset);
1176 #endif
1177         }
1178         return
1179           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1180           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1181       } else {
1182         // 32-bit
1183         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1184         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1185         // No pushl/popl, so:
1186         int src_offset = ra_->reg2offset(src_first);
1187         int dst_offset = ra_->reg2offset(dst_first);
1188         if (cbuf) {
1189           emit_opcode(*cbuf, Assembler::REX_W);
1190           emit_opcode(*cbuf, 0x89);
1191           emit_opcode(*cbuf, 0x44);
1192           emit_opcode(*cbuf, 0x24);
1193           emit_opcode(*cbuf, 0xF8);
1194 
1195           emit_opcode(*cbuf, 0x8B);
1196           encode_RegMem(*cbuf,
1197                         RAX_enc,
1198                         RSP_enc, 0x4, 0, src_offset,
1199                         false);
1200 
1201           emit_opcode(*cbuf, 0x89);
1202           encode_RegMem(*cbuf,
1203                         RAX_enc,
1204                         RSP_enc, 0x4, 0, dst_offset,
1205                         false);
1206 
1207           emit_opcode(*cbuf, Assembler::REX_W);
1208           emit_opcode(*cbuf, 0x8B);
1209           emit_opcode(*cbuf, 0x44);
1210           emit_opcode(*cbuf, 0x24);
1211           emit_opcode(*cbuf, 0xF8);
1212 
1213 #ifndef PRODUCT
1214         } else if (!do_size) {
1215           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1216                      "movl    rax, [rsp + #%d]\n\t"
1217                      "movl    [rsp + #%d], rax\n\t"
1218                      "movq    rax, [rsp - #8]",
1219                      src_offset,
1220                      dst_offset);
1221 #endif
1222         }
1223         return
1224           5 + // movq
1225           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1226           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1227           5; // movq
1228       }
1229     } else if (dst_first_rc == rc_int) {
1230       // mem -> gpr
1231       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1232           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1233         // 64-bit
1234         int offset = ra_->reg2offset(src_first);
1235         if (cbuf) {
1236           if (Matcher::_regEncode[dst_first] < 8) {
1237             emit_opcode(*cbuf, Assembler::REX_W);
1238           } else {
1239             emit_opcode(*cbuf, Assembler::REX_WR);
1240           }
1241           emit_opcode(*cbuf, 0x8B);
1242           encode_RegMem(*cbuf,
1243                         Matcher::_regEncode[dst_first],
1244                         RSP_enc, 0x4, 0, offset,
1245                         false);
1246 #ifndef PRODUCT
1247         } else if (!do_size) {
1248           st->print("movq    %s, [rsp + #%d]\t# spill",
1249                      Matcher::regName[dst_first],
1250                      offset);
1251 #endif
1252         }
1253         return
1254           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1255       } else {
1256         // 32-bit
1257         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1258         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1259         int offset = ra_->reg2offset(src_first);
1260         if (cbuf) {
1261           if (Matcher::_regEncode[dst_first] >= 8) {
1262             emit_opcode(*cbuf, Assembler::REX_R);
1263           }
1264           emit_opcode(*cbuf, 0x8B);
1265           encode_RegMem(*cbuf,
1266                         Matcher::_regEncode[dst_first],
1267                         RSP_enc, 0x4, 0, offset,
1268                         false);
1269 #ifndef PRODUCT
1270         } else if (!do_size) {
1271           st->print("movl    %s, [rsp + #%d]\t# spill",
1272                      Matcher::regName[dst_first],
1273                      offset);
1274 #endif
1275         }
1276         return
1277           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1278           ((Matcher::_regEncode[dst_first] < 8)
1279            ? 3
1280            : 4); // REX
1281       }
1282     } else if (dst_first_rc == rc_float) {
1283       // mem-> xmm
1284       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1285           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1286         // 64-bit
1287         int offset = ra_->reg2offset(src_first);
1288         if (cbuf) {
1289           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1290           if (Matcher::_regEncode[dst_first] >= 8) {
1291             emit_opcode(*cbuf, Assembler::REX_R);
1292           }
1293           emit_opcode(*cbuf, 0x0F);
1294           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1295           encode_RegMem(*cbuf,
1296                         Matcher::_regEncode[dst_first],
1297                         RSP_enc, 0x4, 0, offset,
1298                         false);
1299 #ifndef PRODUCT
1300         } else if (!do_size) {
1301           st->print("%s  %s, [rsp + #%d]\t# spill",
1302                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1303                      Matcher::regName[dst_first],
1304                      offset);
1305 #endif
1306         }
1307         return
1308           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1309           ((Matcher::_regEncode[dst_first] < 8)
1310            ? 5
1311            : 6); // REX
1312       } else {
1313         // 32-bit
1314         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1315         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1316         int offset = ra_->reg2offset(src_first);
1317         if (cbuf) {
1318           emit_opcode(*cbuf, 0xF3);
1319           if (Matcher::_regEncode[dst_first] >= 8) {
1320             emit_opcode(*cbuf, Assembler::REX_R);
1321           }
1322           emit_opcode(*cbuf, 0x0F);
1323           emit_opcode(*cbuf, 0x10);
1324           encode_RegMem(*cbuf,
1325                         Matcher::_regEncode[dst_first],
1326                         RSP_enc, 0x4, 0, offset,
1327                         false);
1328 #ifndef PRODUCT
1329         } else if (!do_size) {
1330           st->print("movss   %s, [rsp + #%d]\t# spill",
1331                      Matcher::regName[dst_first],
1332                      offset);
1333 #endif
1334         }
1335         return
1336           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1337           ((Matcher::_regEncode[dst_first] < 8)
1338            ? 5
1339            : 6); // REX
1340       }
1341     }
1342   } else if (src_first_rc == rc_int) {
1343     // gpr ->
1344     if (dst_first_rc == rc_stack) {
1345       // gpr -> mem
1346       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1347           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1348         // 64-bit
1349         int offset = ra_->reg2offset(dst_first);
1350         if (cbuf) {
1351           if (Matcher::_regEncode[src_first] < 8) {
1352             emit_opcode(*cbuf, Assembler::REX_W);
1353           } else {
1354             emit_opcode(*cbuf, Assembler::REX_WR);
1355           }
1356           emit_opcode(*cbuf, 0x89);
1357           encode_RegMem(*cbuf,
1358                         Matcher::_regEncode[src_first],
1359                         RSP_enc, 0x4, 0, offset,
1360                         false);
1361 #ifndef PRODUCT
1362         } else if (!do_size) {
1363           st->print("movq    [rsp + #%d], %s\t# spill",
1364                      offset,
1365                      Matcher::regName[src_first]);
1366 #endif
1367         }
1368         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1369       } else {
1370         // 32-bit
1371         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1372         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1373         int offset = ra_->reg2offset(dst_first);
1374         if (cbuf) {
1375           if (Matcher::_regEncode[src_first] >= 8) {
1376             emit_opcode(*cbuf, Assembler::REX_R);
1377           }
1378           emit_opcode(*cbuf, 0x89);
1379           encode_RegMem(*cbuf,
1380                         Matcher::_regEncode[src_first],
1381                         RSP_enc, 0x4, 0, offset,
1382                         false);
1383 #ifndef PRODUCT
1384         } else if (!do_size) {
1385           st->print("movl    [rsp + #%d], %s\t# spill",
1386                      offset,
1387                      Matcher::regName[src_first]);
1388 #endif
1389         }
1390         return
1391           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1392           ((Matcher::_regEncode[src_first] < 8)
1393            ? 3
1394            : 4); // REX
1395       }
1396     } else if (dst_first_rc == rc_int) {
1397       // gpr -> gpr
1398       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1399           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1400         // 64-bit
1401         if (cbuf) {
1402           if (Matcher::_regEncode[dst_first] < 8) {
1403             if (Matcher::_regEncode[src_first] < 8) {
1404               emit_opcode(*cbuf, Assembler::REX_W);
1405             } else {
1406               emit_opcode(*cbuf, Assembler::REX_WB);
1407             }
1408           } else {
1409             if (Matcher::_regEncode[src_first] < 8) {
1410               emit_opcode(*cbuf, Assembler::REX_WR);
1411             } else {
1412               emit_opcode(*cbuf, Assembler::REX_WRB);
1413             }
1414           }
1415           emit_opcode(*cbuf, 0x8B);
1416           emit_rm(*cbuf, 0x3,
1417                   Matcher::_regEncode[dst_first] & 7,
1418                   Matcher::_regEncode[src_first] & 7);
1419 #ifndef PRODUCT
1420         } else if (!do_size) {
1421           st->print("movq    %s, %s\t# spill",
1422                      Matcher::regName[dst_first],
1423                      Matcher::regName[src_first]);
1424 #endif
1425         }
1426         return 3; // REX
1427       } else {
1428         // 32-bit
1429         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1430         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1431         if (cbuf) {
1432           if (Matcher::_regEncode[dst_first] < 8) {
1433             if (Matcher::_regEncode[src_first] >= 8) {
1434               emit_opcode(*cbuf, Assembler::REX_B);
1435             }
1436           } else {
1437             if (Matcher::_regEncode[src_first] < 8) {
1438               emit_opcode(*cbuf, Assembler::REX_R);
1439             } else {
1440               emit_opcode(*cbuf, Assembler::REX_RB);
1441             }
1442           }
1443           emit_opcode(*cbuf, 0x8B);
1444           emit_rm(*cbuf, 0x3,
1445                   Matcher::_regEncode[dst_first] & 7,
1446                   Matcher::_regEncode[src_first] & 7);
1447 #ifndef PRODUCT
1448         } else if (!do_size) {
1449           st->print("movl    %s, %s\t# spill",
1450                      Matcher::regName[dst_first],
1451                      Matcher::regName[src_first]);
1452 #endif
1453         }
1454         return
1455           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1456           ? 2
1457           : 3; // REX
1458       }
1459     } else if (dst_first_rc == rc_float) {
1460       // gpr -> xmm
1461       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1462           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1463         // 64-bit
1464         if (cbuf) {
1465           emit_opcode(*cbuf, 0x66);
1466           if (Matcher::_regEncode[dst_first] < 8) {
1467             if (Matcher::_regEncode[src_first] < 8) {
1468               emit_opcode(*cbuf, Assembler::REX_W);
1469             } else {
1470               emit_opcode(*cbuf, Assembler::REX_WB);
1471             }
1472           } else {
1473             if (Matcher::_regEncode[src_first] < 8) {
1474               emit_opcode(*cbuf, Assembler::REX_WR);
1475             } else {
1476               emit_opcode(*cbuf, Assembler::REX_WRB);
1477             }
1478           }
1479           emit_opcode(*cbuf, 0x0F);
1480           emit_opcode(*cbuf, 0x6E);
1481           emit_rm(*cbuf, 0x3,
1482                   Matcher::_regEncode[dst_first] & 7,
1483                   Matcher::_regEncode[src_first] & 7);
1484 #ifndef PRODUCT
1485         } else if (!do_size) {
1486           st->print("movdq   %s, %s\t# spill",
1487                      Matcher::regName[dst_first],
1488                      Matcher::regName[src_first]);
1489 #endif
1490         }
1491         return 5; // REX
1492       } else {
1493         // 32-bit
1494         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1495         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1496         if (cbuf) {
1497           emit_opcode(*cbuf, 0x66);
1498           if (Matcher::_regEncode[dst_first] < 8) {
1499             if (Matcher::_regEncode[src_first] >= 8) {
1500               emit_opcode(*cbuf, Assembler::REX_B);
1501             }
1502           } else {
1503             if (Matcher::_regEncode[src_first] < 8) {
1504               emit_opcode(*cbuf, Assembler::REX_R);
1505             } else {
1506               emit_opcode(*cbuf, Assembler::REX_RB);
1507             }
1508           }
1509           emit_opcode(*cbuf, 0x0F);
1510           emit_opcode(*cbuf, 0x6E);
1511           emit_rm(*cbuf, 0x3,
1512                   Matcher::_regEncode[dst_first] & 7,
1513                   Matcher::_regEncode[src_first] & 7);
1514 #ifndef PRODUCT
1515         } else if (!do_size) {
1516           st->print("movdl   %s, %s\t# spill",
1517                      Matcher::regName[dst_first],
1518                      Matcher::regName[src_first]);
1519 #endif
1520         }
1521         return
1522           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1523           ? 4
1524           : 5; // REX
1525       }
1526     }
1527   } else if (src_first_rc == rc_float) {
1528     // xmm ->
1529     if (dst_first_rc == rc_stack) {
1530       // xmm -> mem
1531       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1532           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1533         // 64-bit
1534         int offset = ra_->reg2offset(dst_first);
1535         if (cbuf) {
1536           emit_opcode(*cbuf, 0xF2);
1537           if (Matcher::_regEncode[src_first] >= 8) {
1538               emit_opcode(*cbuf, Assembler::REX_R);
1539           }
1540           emit_opcode(*cbuf, 0x0F);
1541           emit_opcode(*cbuf, 0x11);
1542           encode_RegMem(*cbuf,
1543                         Matcher::_regEncode[src_first],
1544                         RSP_enc, 0x4, 0, offset,
1545                         false);
1546 #ifndef PRODUCT
1547         } else if (!do_size) {
1548           st->print("movsd   [rsp + #%d], %s\t# spill",
1549                      offset,
1550                      Matcher::regName[src_first]);
1551 #endif
1552         }
1553         return
1554           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1555           ((Matcher::_regEncode[src_first] < 8)
1556            ? 5
1557            : 6); // REX
1558       } else {
1559         // 32-bit
1560         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1561         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1562         int offset = ra_->reg2offset(dst_first);
1563         if (cbuf) {
1564           emit_opcode(*cbuf, 0xF3);
1565           if (Matcher::_regEncode[src_first] >= 8) {
1566               emit_opcode(*cbuf, Assembler::REX_R);
1567           }
1568           emit_opcode(*cbuf, 0x0F);
1569           emit_opcode(*cbuf, 0x11);
1570           encode_RegMem(*cbuf,
1571                         Matcher::_regEncode[src_first],
1572                         RSP_enc, 0x4, 0, offset,
1573                         false);
1574 #ifndef PRODUCT
1575         } else if (!do_size) {
1576           st->print("movss   [rsp + #%d], %s\t# spill",
1577                      offset,
1578                      Matcher::regName[src_first]);
1579 #endif
1580         }
1581         return
1582           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1583           ((Matcher::_regEncode[src_first] < 8)
1584            ? 5
1585            : 6); // REX
1586       }
1587     } else if (dst_first_rc == rc_int) {
1588       // xmm -> gpr
1589       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1590           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1591         // 64-bit
1592         if (cbuf) {
1593           emit_opcode(*cbuf, 0x66);
1594           if (Matcher::_regEncode[dst_first] < 8) {
1595             if (Matcher::_regEncode[src_first] < 8) {
1596               emit_opcode(*cbuf, Assembler::REX_W);
1597             } else {
1598               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1599             }
1600           } else {
1601             if (Matcher::_regEncode[src_first] < 8) {
1602               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1603             } else {
1604               emit_opcode(*cbuf, Assembler::REX_WRB);
1605             }
1606           }
1607           emit_opcode(*cbuf, 0x0F);
1608           emit_opcode(*cbuf, 0x7E);
1609           emit_rm(*cbuf, 0x3,
1610                   Matcher::_regEncode[dst_first] & 7,
1611                   Matcher::_regEncode[src_first] & 7);
1612 #ifndef PRODUCT
1613         } else if (!do_size) {
1614           st->print("movdq   %s, %s\t# spill",
1615                      Matcher::regName[dst_first],
1616                      Matcher::regName[src_first]);
1617 #endif
1618         }
1619         return 5; // REX
1620       } else {
1621         // 32-bit
1622         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1623         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1624         if (cbuf) {
1625           emit_opcode(*cbuf, 0x66);
1626           if (Matcher::_regEncode[dst_first] < 8) {
1627             if (Matcher::_regEncode[src_first] >= 8) {
1628               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1629             }
1630           } else {
1631             if (Matcher::_regEncode[src_first] < 8) {
1632               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1633             } else {
1634               emit_opcode(*cbuf, Assembler::REX_RB);
1635             }
1636           }
1637           emit_opcode(*cbuf, 0x0F);
1638           emit_opcode(*cbuf, 0x7E);
1639           emit_rm(*cbuf, 0x3,
1640                   Matcher::_regEncode[dst_first] & 7,
1641                   Matcher::_regEncode[src_first] & 7);
1642 #ifndef PRODUCT
1643         } else if (!do_size) {
1644           st->print("movdl   %s, %s\t# spill",
1645                      Matcher::regName[dst_first],
1646                      Matcher::regName[src_first]);
1647 #endif
1648         }
1649         return
1650           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1651           ? 4
1652           : 5; // REX
1653       }
1654     } else if (dst_first_rc == rc_float) {
1655       // xmm -> xmm
1656       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1657           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1658         // 64-bit
1659         if (cbuf) {
1660           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1661           if (Matcher::_regEncode[dst_first] < 8) {
1662             if (Matcher::_regEncode[src_first] >= 8) {
1663               emit_opcode(*cbuf, Assembler::REX_B);
1664             }
1665           } else {
1666             if (Matcher::_regEncode[src_first] < 8) {
1667               emit_opcode(*cbuf, Assembler::REX_R);
1668             } else {
1669               emit_opcode(*cbuf, Assembler::REX_RB);
1670             }
1671           }
1672           emit_opcode(*cbuf, 0x0F);
1673           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1674           emit_rm(*cbuf, 0x3,
1675                   Matcher::_regEncode[dst_first] & 7,
1676                   Matcher::_regEncode[src_first] & 7);
1677 #ifndef PRODUCT
1678         } else if (!do_size) {
1679           st->print("%s  %s, %s\t# spill",
1680                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1681                      Matcher::regName[dst_first],
1682                      Matcher::regName[src_first]);
1683 #endif
1684         }
1685         return
1686           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1687           ? 4
1688           : 5; // REX
1689       } else {
1690         // 32-bit
1691         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1692         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1693         if (cbuf) {
1694           if (!UseXmmRegToRegMoveAll)
1695             emit_opcode(*cbuf, 0xF3);
1696           if (Matcher::_regEncode[dst_first] < 8) {
1697             if (Matcher::_regEncode[src_first] >= 8) {
1698               emit_opcode(*cbuf, Assembler::REX_B);
1699             }
1700           } else {
1701             if (Matcher::_regEncode[src_first] < 8) {
1702               emit_opcode(*cbuf, Assembler::REX_R);
1703             } else {
1704               emit_opcode(*cbuf, Assembler::REX_RB);
1705             }
1706           }
1707           emit_opcode(*cbuf, 0x0F);
1708           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1709           emit_rm(*cbuf, 0x3,
1710                   Matcher::_regEncode[dst_first] & 7,
1711                   Matcher::_regEncode[src_first] & 7);
1712 #ifndef PRODUCT
1713         } else if (!do_size) {
1714           st->print("%s  %s, %s\t# spill",
1715                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1716                      Matcher::regName[dst_first],
1717                      Matcher::regName[src_first]);
1718 #endif
1719         }
1720         return
1721           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1722           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1723           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1724       }
1725     }
1726   }
1727 
1728   assert(0," foo ");
1729   Unimplemented();
1730 
1731   return 0;
1732 }
1733 
1734 #ifndef PRODUCT
1735 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1736 {
1737   implementation(NULL, ra_, false, st);
1738 }
1739 #endif
1740 
1741 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1742 {
1743   implementation(&cbuf, ra_, false, NULL);
1744 }
1745 
1746 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1747 {
1748   return implementation(NULL, ra_, true, NULL);
1749 }
1750 
1751 //=============================================================================
1752 #ifndef PRODUCT
1753 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1754 {
1755   st->print("nop \t# %d bytes pad for loops and calls", _count);
1756 }
1757 #endif
1758 
1759 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1760 {
1761   MacroAssembler _masm(&cbuf);
1762   __ nop(_count);
1763 }
1764 
1765 uint MachNopNode::size(PhaseRegAlloc*) const
1766 {
1767   return _count;
1768 }
1769 
1770 
1771 //=============================================================================
1772 #ifndef PRODUCT
1773 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1774 {
1775   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1776   int reg = ra_->get_reg_first(this);
1777   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1778             Matcher::regName[reg], offset);
1779 }
1780 #endif
1781 
1782 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1783 {
1784   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1785   int reg = ra_->get_encode(this);
1786   if (offset >= 0x80) {
1787     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1788     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1789     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1790     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1791     emit_d32(cbuf, offset);
1792   } else {
1793     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1794     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1795     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1796     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1797     emit_d8(cbuf, offset);
1798   }
1799 }
1800 
1801 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1802 {
1803   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1804   return (offset < 0x80) ? 5 : 8; // REX
1805 }
1806 
1807 //=============================================================================
1808 
1809 // emit call stub, compiled java to interpreter
1810 void emit_java_to_interp(CodeBuffer& cbuf)
1811 {
1812   // Stub is fixed up when the corresponding call is converted from
1813   // calling compiled code to calling interpreted code.
1814   // movq rbx, 0
1815   // jmp -5 # to self
1816 
1817   address mark = cbuf.inst_mark();  // get mark within main instrs section
1818 
1819   // Note that the code buffer's inst_mark is always relative to insts.
1820   // That's why we must use the macroassembler to generate a stub.
1821   MacroAssembler _masm(&cbuf);
1822 
1823   address base =
1824   __ start_a_stub(Compile::MAX_stubs_size);
1825   if (base == NULL)  return;  // CodeBuffer::expand failed
1826   // static stub relocation stores the instruction address of the call
1827   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1828   // static stub relocation also tags the methodOop in the code-stream.
1829   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1830   // This is recognized as unresolved by relocs/nativeinst/ic code
1831   __ jump(RuntimeAddress(__ pc()));
1832 
1833   // Update current stubs pointer and restore code_end.
1834   __ end_a_stub();
1835 }
1836 
1837 // size of call stub, compiled java to interpretor
1838 uint size_java_to_interp()
1839 {
1840   return 15;  // movq (1+1+8); jmp (1+4)
1841 }
1842 
1843 // relocation entries for call stub, compiled java to interpretor
1844 uint reloc_java_to_interp()
1845 {
1846   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1847 }
1848 
1849 //=============================================================================
1850 #ifndef PRODUCT
1851 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1852 {
1853   if (UseCompressedOops) {
1854     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
1855     if (Universe::narrow_oop_shift() != 0) {
1856       st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
1857     }
1858     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
1859   } else {
1860     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
1861                  "# Inline cache check", oopDesc::klass_offset_in_bytes());
1862   }
1863   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1864   st->print_cr("\tnop");
1865   if (!OptoBreakpoint) {
1866     st->print_cr("\tnop");
1867   }
1868 }
1869 #endif
1870 
1871 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1872 {
1873   MacroAssembler masm(&cbuf);
1874 #ifdef ASSERT
1875   uint code_size = cbuf.code_size();
1876 #endif
1877   if (UseCompressedOops) {
1878     masm.load_klass(rscratch1, j_rarg0);
1879     masm.cmpptr(rax, rscratch1);
1880   } else {
1881     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1882   }
1883 
1884   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1885 
1886   /* WARNING these NOPs are critical so that verified entry point is properly
1887      aligned for patching by NativeJump::patch_verified_entry() */
1888   int nops_cnt = 1;
1889   if (!OptoBreakpoint) {
1890     // Leave space for int3
1891      nops_cnt += 1;
1892   }
1893   if (UseCompressedOops) {
1894     // ??? divisible by 4 is aligned?
1895     nops_cnt += 1;
1896   }
1897   masm.nop(nops_cnt);
1898 
1899   assert(cbuf.code_size() - code_size == size(ra_),
1900          "checking code size of inline cache node");
1901 }
1902 
1903 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1904 {
1905   if (UseCompressedOops) {
1906     if (Universe::narrow_oop_shift() == 0) {
1907       return OptoBreakpoint ? 15 : 16;
1908     } else {
1909       return OptoBreakpoint ? 19 : 20;
1910     }
1911   } else {
1912     return OptoBreakpoint ? 11 : 12;
1913   }
1914 }
1915 
1916 
1917 //=============================================================================
1918 uint size_exception_handler()
1919 {
1920   // NativeCall instruction size is the same as NativeJump.
1921   // Note that this value is also credited (in output.cpp) to
1922   // the size of the code section.
1923   return NativeJump::instruction_size;
1924 }
1925 
1926 // Emit exception handler code.
1927 int emit_exception_handler(CodeBuffer& cbuf)
1928 {
1929 
1930   // Note that the code buffer's inst_mark is always relative to insts.
1931   // That's why we must use the macroassembler to generate a handler.
1932   MacroAssembler _masm(&cbuf);
1933   address base =
1934   __ start_a_stub(size_exception_handler());
1935   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1936   int offset = __ offset();
1937   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1938   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1939   __ end_a_stub();
1940   return offset;
1941 }
1942 
1943 uint size_deopt_handler()
1944 {
1945   // three 5 byte instructions
1946   return 15;
1947 }
1948 
1949 // Emit deopt handler code.
1950 int emit_deopt_handler(CodeBuffer& cbuf)
1951 {
1952 
1953   // Note that the code buffer's inst_mark is always relative to insts.
1954   // That's why we must use the macroassembler to generate a handler.
1955   MacroAssembler _masm(&cbuf);
1956   address base =
1957   __ start_a_stub(size_deopt_handler());
1958   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1959   int offset = __ offset();
1960   address the_pc = (address) __ pc();
1961   Label next;
1962   // push a "the_pc" on the stack without destroying any registers
1963   // as they all may be live.
1964 
1965   // push address of "next"
1966   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1967   __ bind(next);
1968   // adjust it so it matches "the_pc"
1969   __ subptr(Address(rsp, 0), __ offset() - offset);
1970   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1971   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1972   __ end_a_stub();
1973   return offset;
1974 }
1975 
1976 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1977   int mark = cbuf.insts()->mark_off();
1978   MacroAssembler _masm(&cbuf);
1979   address double_address = __ double_constant(x);
1980   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1981   emit_d32_reloc(cbuf,
1982                  (int) (double_address - cbuf.code_end() - 4),
1983                  internal_word_Relocation::spec(double_address),
1984                  RELOC_DISP32);
1985 }
1986 
1987 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1988   int mark = cbuf.insts()->mark_off();
1989   MacroAssembler _masm(&cbuf);
1990   address float_address = __ float_constant(x);
1991   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1992   emit_d32_reloc(cbuf,
1993                  (int) (float_address - cbuf.code_end() - 4),
1994                  internal_word_Relocation::spec(float_address),
1995                  RELOC_DISP32);
1996 }
1997 
1998 
1999 const bool Matcher::match_rule_supported(int opcode) {
2000   if (!has_match_rule(opcode))
2001     return false;
2002 
2003   return true;  // Per default match rules are supported.
2004 }
2005 
2006 int Matcher::regnum_to_fpu_offset(int regnum)
2007 {
2008   return regnum - 32; // The FP registers are in the second chunk
2009 }
2010 
2011 // This is UltraSparc specific, true just means we have fast l2f conversion
2012 const bool Matcher::convL2FSupported(void) {
2013   return true;
2014 }
2015 
2016 // Vector width in bytes
2017 const uint Matcher::vector_width_in_bytes(void) {
2018   return 8;
2019 }
2020 
2021 // Vector ideal reg
2022 const uint Matcher::vector_ideal_reg(void) {
2023   return Op_RegD;
2024 }
2025 
2026 // Is this branch offset short enough that a short branch can be used?
2027 //
2028 // NOTE: If the platform does not provide any short branch variants, then
2029 //       this method should return false for offset 0.
2030 bool Matcher::is_short_branch_offset(int rule, int offset) {
2031   // the short version of jmpConUCF2 contains multiple branches,
2032   // making the reach slightly less
2033   if (rule == jmpConUCF2_rule)
2034     return (-126 <= offset && offset <= 125);
2035   return (-128 <= offset && offset <= 127);
2036 }
2037 
2038 const bool Matcher::isSimpleConstant64(jlong value) {
2039   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2040   //return value == (int) value;  // Cf. storeImmL and immL32.
2041 
2042   // Probably always true, even if a temp register is required.
2043   return true;
2044 }
2045 
2046 // The ecx parameter to rep stosq for the ClearArray node is in words.
2047 const bool Matcher::init_array_count_is_in_bytes = false;
2048 
2049 // Threshold size for cleararray.
2050 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2051 
2052 // Should the Matcher clone shifts on addressing modes, expecting them
2053 // to be subsumed into complex addressing expressions or compute them
2054 // into registers?  True for Intel but false for most RISCs
2055 const bool Matcher::clone_shift_expressions = true;
2056 
2057 bool Matcher::narrow_oop_use_complex_address() {
2058   assert(UseCompressedOops, "only for comressed oops code");
2059   return (LogMinObjAlignmentInBytes <= 3);
2060 }
2061 
2062 // Is it better to copy float constants, or load them directly from
2063 // memory?  Intel can load a float constant from a direct address,
2064 // requiring no extra registers.  Most RISCs will have to materialize
2065 // an address into a register first, so they would do better to copy
2066 // the constant from stack.
2067 const bool Matcher::rematerialize_float_constants = true; // XXX
2068 
2069 // If CPU can load and store mis-aligned doubles directly then no
2070 // fixup is needed.  Else we split the double into 2 integer pieces
2071 // and move it piece-by-piece.  Only happens when passing doubles into
2072 // C code as the Java calling convention forces doubles to be aligned.
2073 const bool Matcher::misaligned_doubles_ok = true;
2074 
2075 // No-op on amd64
2076 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2077 
2078 // Advertise here if the CPU requires explicit rounding operations to
2079 // implement the UseStrictFP mode.
2080 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2081 
2082 // Are floats conerted to double when stored to stack during deoptimization?
2083 // On x64 it is stored without convertion so we can use normal access.
2084 bool Matcher::float_in_double() { return false; }
2085 
2086 // Do ints take an entire long register or just half?
2087 const bool Matcher::int_in_long = true;
2088 
2089 // Return whether or not this register is ever used as an argument.
2090 // This function is used on startup to build the trampoline stubs in
2091 // generateOptoStub.  Registers not mentioned will be killed by the VM
2092 // call in the trampoline, and arguments in those registers not be
2093 // available to the callee.
2094 bool Matcher::can_be_java_arg(int reg)
2095 {
2096   return
2097     reg ==  RDI_num || reg ==  RDI_H_num ||
2098     reg ==  RSI_num || reg ==  RSI_H_num ||
2099     reg ==  RDX_num || reg ==  RDX_H_num ||
2100     reg ==  RCX_num || reg ==  RCX_H_num ||
2101     reg ==   R8_num || reg ==   R8_H_num ||
2102     reg ==   R9_num || reg ==   R9_H_num ||
2103     reg ==  R12_num || reg ==  R12_H_num ||
2104     reg == XMM0_num || reg == XMM0_H_num ||
2105     reg == XMM1_num || reg == XMM1_H_num ||
2106     reg == XMM2_num || reg == XMM2_H_num ||
2107     reg == XMM3_num || reg == XMM3_H_num ||
2108     reg == XMM4_num || reg == XMM4_H_num ||
2109     reg == XMM5_num || reg == XMM5_H_num ||
2110     reg == XMM6_num || reg == XMM6_H_num ||
2111     reg == XMM7_num || reg == XMM7_H_num;
2112 }
2113 
2114 bool Matcher::is_spillable_arg(int reg)
2115 {
2116   return can_be_java_arg(reg);
2117 }
2118 
2119 // Register for DIVI projection of divmodI
2120 RegMask Matcher::divI_proj_mask() {
2121   return INT_RAX_REG_mask;
2122 }
2123 
2124 // Register for MODI projection of divmodI
2125 RegMask Matcher::modI_proj_mask() {
2126   return INT_RDX_REG_mask;
2127 }
2128 
2129 // Register for DIVL projection of divmodL
2130 RegMask Matcher::divL_proj_mask() {
2131   return LONG_RAX_REG_mask;
2132 }
2133 
2134 // Register for MODL projection of divmodL
2135 RegMask Matcher::modL_proj_mask() {
2136   return LONG_RDX_REG_mask;
2137 }
2138 
2139 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2140   return PTR_RBP_REG_mask;
2141 }
2142 
2143 static Address build_address(int b, int i, int s, int d) {
2144   Register index = as_Register(i);
2145   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2146   if (index == rsp) {
2147     index = noreg;
2148     scale = Address::no_scale;
2149   }
2150   Address addr(as_Register(b), index, scale, d);
2151   return addr;
2152 }
2153 
2154 %}
2155 
2156 //----------ENCODING BLOCK-----------------------------------------------------
2157 // This block specifies the encoding classes used by the compiler to
2158 // output byte streams.  Encoding classes are parameterized macros
2159 // used by Machine Instruction Nodes in order to generate the bit
2160 // encoding of the instruction.  Operands specify their base encoding
2161 // interface with the interface keyword.  There are currently
2162 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2163 // COND_INTER.  REG_INTER causes an operand to generate a function
2164 // which returns its register number when queried.  CONST_INTER causes
2165 // an operand to generate a function which returns the value of the
2166 // constant when queried.  MEMORY_INTER causes an operand to generate
2167 // four functions which return the Base Register, the Index Register,
2168 // the Scale Value, and the Offset Value of the operand when queried.
2169 // COND_INTER causes an operand to generate six functions which return
2170 // the encoding code (ie - encoding bits for the instruction)
2171 // associated with each basic boolean condition for a conditional
2172 // instruction.
2173 //
2174 // Instructions specify two basic values for encoding.  Again, a
2175 // function is available to check if the constant displacement is an
2176 // oop. They use the ins_encode keyword to specify their encoding
2177 // classes (which must be a sequence of enc_class names, and their
2178 // parameters, specified in the encoding block), and they use the
2179 // opcode keyword to specify, in order, their primary, secondary, and
2180 // tertiary opcode.  Only the opcode sections which a particular
2181 // instruction needs for encoding need to be specified.
2182 encode %{
2183   // Build emit functions for each basic byte or larger field in the
2184   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2185   // from C++ code in the enc_class source block.  Emit functions will
2186   // live in the main source block for now.  In future, we can
2187   // generalize this by adding a syntax that specifies the sizes of
2188   // fields in an order, so that the adlc can build the emit functions
2189   // automagically
2190 
2191   // Emit primary opcode
2192   enc_class OpcP
2193   %{
2194     emit_opcode(cbuf, $primary);
2195   %}
2196 
2197   // Emit secondary opcode
2198   enc_class OpcS
2199   %{
2200     emit_opcode(cbuf, $secondary);
2201   %}
2202 
2203   // Emit tertiary opcode
2204   enc_class OpcT
2205   %{
2206     emit_opcode(cbuf, $tertiary);
2207   %}
2208 
2209   // Emit opcode directly
2210   enc_class Opcode(immI d8)
2211   %{
2212     emit_opcode(cbuf, $d8$$constant);
2213   %}
2214 
2215   // Emit size prefix
2216   enc_class SizePrefix
2217   %{
2218     emit_opcode(cbuf, 0x66);
2219   %}
2220 
2221   enc_class reg(rRegI reg)
2222   %{
2223     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2224   %}
2225 
2226   enc_class reg_reg(rRegI dst, rRegI src)
2227   %{
2228     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2229   %}
2230 
2231   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2232   %{
2233     emit_opcode(cbuf, $opcode$$constant);
2234     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2235   %}
2236 
2237   enc_class cmpfp_fixup()
2238   %{
2239     // jnp,s exit
2240     emit_opcode(cbuf, 0x7B);
2241     emit_d8(cbuf, 0x0A);
2242 
2243     // pushfq
2244     emit_opcode(cbuf, 0x9C);
2245 
2246     // andq $0xffffff2b, (%rsp)
2247     emit_opcode(cbuf, Assembler::REX_W);
2248     emit_opcode(cbuf, 0x81);
2249     emit_opcode(cbuf, 0x24);
2250     emit_opcode(cbuf, 0x24);
2251     emit_d32(cbuf, 0xffffff2b);
2252 
2253     // popfq
2254     emit_opcode(cbuf, 0x9D);
2255 
2256     // nop (target for branch to avoid branch to branch)
2257     emit_opcode(cbuf, 0x90);
2258   %}
2259 
2260   enc_class cmpfp3(rRegI dst)
2261   %{
2262     int dstenc = $dst$$reg;
2263 
2264     // movl $dst, -1
2265     if (dstenc >= 8) {
2266       emit_opcode(cbuf, Assembler::REX_B);
2267     }
2268     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2269     emit_d32(cbuf, -1);
2270 
2271     // jp,s done
2272     emit_opcode(cbuf, 0x7A);
2273     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2274 
2275     // jb,s done
2276     emit_opcode(cbuf, 0x72);
2277     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2278 
2279     // setne $dst
2280     if (dstenc >= 4) {
2281       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2282     }
2283     emit_opcode(cbuf, 0x0F);
2284     emit_opcode(cbuf, 0x95);
2285     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2286 
2287     // movzbl $dst, $dst
2288     if (dstenc >= 4) {
2289       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2290     }
2291     emit_opcode(cbuf, 0x0F);
2292     emit_opcode(cbuf, 0xB6);
2293     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2294   %}
2295 
2296   enc_class cdql_enc(no_rax_rdx_RegI div)
2297   %{
2298     // Full implementation of Java idiv and irem; checks for
2299     // special case as described in JVM spec., p.243 & p.271.
2300     //
2301     //         normal case                           special case
2302     //
2303     // input : rax: dividend                         min_int
2304     //         reg: divisor                          -1
2305     //
2306     // output: rax: quotient  (= rax idiv reg)       min_int
2307     //         rdx: remainder (= rax irem reg)       0
2308     //
2309     //  Code sequnce:
2310     //
2311     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2312     //    5:   75 07/08                jne    e <normal>
2313     //    7:   33 d2                   xor    %edx,%edx
2314     //  [div >= 8 -> offset + 1]
2315     //  [REX_B]
2316     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2317     //    c:   74 03/04                je     11 <done>
2318     // 000000000000000e <normal>:
2319     //    e:   99                      cltd
2320     //  [div >= 8 -> offset + 1]
2321     //  [REX_B]
2322     //    f:   f7 f9                   idiv   $div
2323     // 0000000000000011 <done>:
2324 
2325     // cmp    $0x80000000,%eax
2326     emit_opcode(cbuf, 0x3d);
2327     emit_d8(cbuf, 0x00);
2328     emit_d8(cbuf, 0x00);
2329     emit_d8(cbuf, 0x00);
2330     emit_d8(cbuf, 0x80);
2331 
2332     // jne    e <normal>
2333     emit_opcode(cbuf, 0x75);
2334     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2335 
2336     // xor    %edx,%edx
2337     emit_opcode(cbuf, 0x33);
2338     emit_d8(cbuf, 0xD2);
2339 
2340     // cmp    $0xffffffffffffffff,%ecx
2341     if ($div$$reg >= 8) {
2342       emit_opcode(cbuf, Assembler::REX_B);
2343     }
2344     emit_opcode(cbuf, 0x83);
2345     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2346     emit_d8(cbuf, 0xFF);
2347 
2348     // je     11 <done>
2349     emit_opcode(cbuf, 0x74);
2350     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2351 
2352     // <normal>
2353     // cltd
2354     emit_opcode(cbuf, 0x99);
2355 
2356     // idivl (note: must be emitted by the user of this rule)
2357     // <done>
2358   %}
2359 
2360   enc_class cdqq_enc(no_rax_rdx_RegL div)
2361   %{
2362     // Full implementation of Java ldiv and lrem; checks for
2363     // special case as described in JVM spec., p.243 & p.271.
2364     //
2365     //         normal case                           special case
2366     //
2367     // input : rax: dividend                         min_long
2368     //         reg: divisor                          -1
2369     //
2370     // output: rax: quotient  (= rax idiv reg)       min_long
2371     //         rdx: remainder (= rax irem reg)       0
2372     //
2373     //  Code sequnce:
2374     //
2375     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2376     //    7:   00 00 80
2377     //    a:   48 39 d0                cmp    %rdx,%rax
2378     //    d:   75 08                   jne    17 <normal>
2379     //    f:   33 d2                   xor    %edx,%edx
2380     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2381     //   15:   74 05                   je     1c <done>
2382     // 0000000000000017 <normal>:
2383     //   17:   48 99                   cqto
2384     //   19:   48 f7 f9                idiv   $div
2385     // 000000000000001c <done>:
2386 
2387     // mov    $0x8000000000000000,%rdx
2388     emit_opcode(cbuf, Assembler::REX_W);
2389     emit_opcode(cbuf, 0xBA);
2390     emit_d8(cbuf, 0x00);
2391     emit_d8(cbuf, 0x00);
2392     emit_d8(cbuf, 0x00);
2393     emit_d8(cbuf, 0x00);
2394     emit_d8(cbuf, 0x00);
2395     emit_d8(cbuf, 0x00);
2396     emit_d8(cbuf, 0x00);
2397     emit_d8(cbuf, 0x80);
2398 
2399     // cmp    %rdx,%rax
2400     emit_opcode(cbuf, Assembler::REX_W);
2401     emit_opcode(cbuf, 0x39);
2402     emit_d8(cbuf, 0xD0);
2403 
2404     // jne    17 <normal>
2405     emit_opcode(cbuf, 0x75);
2406     emit_d8(cbuf, 0x08);
2407 
2408     // xor    %edx,%edx
2409     emit_opcode(cbuf, 0x33);
2410     emit_d8(cbuf, 0xD2);
2411 
2412     // cmp    $0xffffffffffffffff,$div
2413     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2414     emit_opcode(cbuf, 0x83);
2415     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2416     emit_d8(cbuf, 0xFF);
2417 
2418     // je     1e <done>
2419     emit_opcode(cbuf, 0x74);
2420     emit_d8(cbuf, 0x05);
2421 
2422     // <normal>
2423     // cqto
2424     emit_opcode(cbuf, Assembler::REX_W);
2425     emit_opcode(cbuf, 0x99);
2426 
2427     // idivq (note: must be emitted by the user of this rule)
2428     // <done>
2429   %}
2430 
2431   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2432   enc_class OpcSE(immI imm)
2433   %{
2434     // Emit primary opcode and set sign-extend bit
2435     // Check for 8-bit immediate, and set sign extend bit in opcode
2436     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2437       emit_opcode(cbuf, $primary | 0x02);
2438     } else {
2439       // 32-bit immediate
2440       emit_opcode(cbuf, $primary);
2441     }
2442   %}
2443 
2444   enc_class OpcSErm(rRegI dst, immI imm)
2445   %{
2446     // OpcSEr/m
2447     int dstenc = $dst$$reg;
2448     if (dstenc >= 8) {
2449       emit_opcode(cbuf, Assembler::REX_B);
2450       dstenc -= 8;
2451     }
2452     // Emit primary opcode and set sign-extend bit
2453     // Check for 8-bit immediate, and set sign extend bit in opcode
2454     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2455       emit_opcode(cbuf, $primary | 0x02);
2456     } else {
2457       // 32-bit immediate
2458       emit_opcode(cbuf, $primary);
2459     }
2460     // Emit r/m byte with secondary opcode, after primary opcode.
2461     emit_rm(cbuf, 0x3, $secondary, dstenc);
2462   %}
2463 
2464   enc_class OpcSErm_wide(rRegL dst, immI imm)
2465   %{
2466     // OpcSEr/m
2467     int dstenc = $dst$$reg;
2468     if (dstenc < 8) {
2469       emit_opcode(cbuf, Assembler::REX_W);
2470     } else {
2471       emit_opcode(cbuf, Assembler::REX_WB);
2472       dstenc -= 8;
2473     }
2474     // Emit primary opcode and set sign-extend bit
2475     // Check for 8-bit immediate, and set sign extend bit in opcode
2476     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2477       emit_opcode(cbuf, $primary | 0x02);
2478     } else {
2479       // 32-bit immediate
2480       emit_opcode(cbuf, $primary);
2481     }
2482     // Emit r/m byte with secondary opcode, after primary opcode.
2483     emit_rm(cbuf, 0x3, $secondary, dstenc);
2484   %}
2485 
2486   enc_class Con8or32(immI imm)
2487   %{
2488     // Check for 8-bit immediate, and set sign extend bit in opcode
2489     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2490       $$$emit8$imm$$constant;
2491     } else {
2492       // 32-bit immediate
2493       $$$emit32$imm$$constant;
2494     }
2495   %}
2496 
2497   enc_class Lbl(label labl)
2498   %{
2499     // JMP, CALL
2500     Label* l = $labl$$label;
2501     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2502   %}
2503 
2504   enc_class LblShort(label labl)
2505   %{
2506     // JMP, CALL
2507     Label* l = $labl$$label;
2508     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2509     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2510     emit_d8(cbuf, disp);
2511   %}
2512 
2513   enc_class opc2_reg(rRegI dst)
2514   %{
2515     // BSWAP
2516     emit_cc(cbuf, $secondary, $dst$$reg);
2517   %}
2518 
2519   enc_class opc3_reg(rRegI dst)
2520   %{
2521     // BSWAP
2522     emit_cc(cbuf, $tertiary, $dst$$reg);
2523   %}
2524 
2525   enc_class reg_opc(rRegI div)
2526   %{
2527     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2528     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2529   %}
2530 
2531   enc_class Jcc(cmpOp cop, label labl)
2532   %{
2533     // JCC
2534     Label* l = $labl$$label;
2535     $$$emit8$primary;
2536     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2537     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0);
2538   %}
2539 
2540   enc_class JccShort (cmpOp cop, label labl)
2541   %{
2542   // JCC
2543     Label *l = $labl$$label;
2544     emit_cc(cbuf, $primary, $cop$$cmpcode);
2545     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
2546     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2547     emit_d8(cbuf, disp);
2548   %}
2549 
2550   enc_class enc_cmov(cmpOp cop)
2551   %{
2552     // CMOV
2553     $$$emit8$primary;
2554     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2555   %}
2556 
2557   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2558   %{
2559     // Invert sense of branch from sense of cmov
2560     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2561     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2562                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2563                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2564     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2565     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2566     if ($dst$$reg < 8) {
2567       if ($src$$reg >= 8) {
2568         emit_opcode(cbuf, Assembler::REX_B);
2569       }
2570     } else {
2571       if ($src$$reg < 8) {
2572         emit_opcode(cbuf, Assembler::REX_R);
2573       } else {
2574         emit_opcode(cbuf, Assembler::REX_RB);
2575       }
2576     }
2577     emit_opcode(cbuf, 0x0F);
2578     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2579     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2580   %}
2581 
2582   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2583   %{
2584     // Invert sense of branch from sense of cmov
2585     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2586     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2587 
2588     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2589     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2590     if ($dst$$reg < 8) {
2591       if ($src$$reg >= 8) {
2592         emit_opcode(cbuf, Assembler::REX_B);
2593       }
2594     } else {
2595       if ($src$$reg < 8) {
2596         emit_opcode(cbuf, Assembler::REX_R);
2597       } else {
2598         emit_opcode(cbuf, Assembler::REX_RB);
2599       }
2600     }
2601     emit_opcode(cbuf, 0x0F);
2602     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2603     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2604   %}
2605 
2606   enc_class enc_PartialSubtypeCheck()
2607   %{
2608     Register Rrdi = as_Register(RDI_enc); // result register
2609     Register Rrax = as_Register(RAX_enc); // super class
2610     Register Rrcx = as_Register(RCX_enc); // killed
2611     Register Rrsi = as_Register(RSI_enc); // sub class
2612     Label miss;
2613     const bool set_cond_codes = true;
2614 
2615     MacroAssembler _masm(&cbuf);
2616     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2617                                      NULL, &miss,
2618                                      /*set_cond_codes:*/ true);
2619     if ($primary) {
2620       __ xorptr(Rrdi, Rrdi);
2621     }
2622     __ bind(miss);
2623   %}
2624 
2625   enc_class Java_To_Interpreter(method meth)
2626   %{
2627     // CALL Java_To_Interpreter
2628     // This is the instruction starting address for relocation info.
2629     cbuf.set_inst_mark();
2630     $$$emit8$primary;
2631     // CALL directly to the runtime
2632     emit_d32_reloc(cbuf,
2633                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2634                    runtime_call_Relocation::spec(),
2635                    RELOC_DISP32);
2636   %}
2637 
2638   enc_class preserve_SP %{
2639     debug_only(int off0 = cbuf.code_size());
2640     MacroAssembler _masm(&cbuf);
2641     // RBP is preserved across all calls, even compiled calls.
2642     // Use it to preserve RSP in places where the callee might change the SP.
2643     __ movptr(rbp, rsp);
2644     debug_only(int off1 = cbuf.code_size());
2645     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2646   %}
2647 
2648   enc_class restore_SP %{
2649     MacroAssembler _masm(&cbuf);
2650     __ movptr(rsp, rbp);
2651   %}
2652 
2653   enc_class Java_Static_Call(method meth)
2654   %{
2655     // JAVA STATIC CALL
2656     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2657     // determine who we intended to call.
2658     cbuf.set_inst_mark();
2659     $$$emit8$primary;
2660 
2661     if (!_method) {
2662       emit_d32_reloc(cbuf,
2663                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2664                      runtime_call_Relocation::spec(),
2665                      RELOC_DISP32);
2666     } else if (_optimized_virtual) {
2667       emit_d32_reloc(cbuf,
2668                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2669                      opt_virtual_call_Relocation::spec(),
2670                      RELOC_DISP32);
2671     } else {
2672       emit_d32_reloc(cbuf,
2673                      (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2674                      static_call_Relocation::spec(),
2675                      RELOC_DISP32);
2676     }
2677     if (_method) {
2678       // Emit stub for static call
2679       emit_java_to_interp(cbuf);
2680     }
2681   %}
2682 
2683   enc_class Java_Dynamic_Call(method meth)
2684   %{
2685     // JAVA DYNAMIC CALL
2686     // !!!!!
2687     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2688     // emit_call_dynamic_prologue( cbuf );
2689     cbuf.set_inst_mark();
2690 
2691     // movq rax, -1
2692     emit_opcode(cbuf, Assembler::REX_W);
2693     emit_opcode(cbuf, 0xB8 | RAX_enc);
2694     emit_d64_reloc(cbuf,
2695                    (int64_t) Universe::non_oop_word(),
2696                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2697     address virtual_call_oop_addr = cbuf.inst_mark();
2698     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2699     // who we intended to call.
2700     cbuf.set_inst_mark();
2701     $$$emit8$primary;
2702     emit_d32_reloc(cbuf,
2703                    (int) ($meth$$method - ((intptr_t) cbuf.code_end()) - 4),
2704                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2705                    RELOC_DISP32);
2706   %}
2707 
2708   enc_class Java_Compiled_Call(method meth)
2709   %{
2710     // JAVA COMPILED CALL
2711     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2712 
2713     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2714     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2715 
2716     // callq *disp(%rax)
2717     cbuf.set_inst_mark();
2718     $$$emit8$primary;
2719     if (disp < 0x80) {
2720       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2721       emit_d8(cbuf, disp); // Displacement
2722     } else {
2723       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2724       emit_d32(cbuf, disp); // Displacement
2725     }
2726   %}
2727 
2728   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2729   %{
2730     // SAL, SAR, SHR
2731     int dstenc = $dst$$reg;
2732     if (dstenc >= 8) {
2733       emit_opcode(cbuf, Assembler::REX_B);
2734       dstenc -= 8;
2735     }
2736     $$$emit8$primary;
2737     emit_rm(cbuf, 0x3, $secondary, dstenc);
2738     $$$emit8$shift$$constant;
2739   %}
2740 
2741   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2742   %{
2743     // SAL, SAR, SHR
2744     int dstenc = $dst$$reg;
2745     if (dstenc < 8) {
2746       emit_opcode(cbuf, Assembler::REX_W);
2747     } else {
2748       emit_opcode(cbuf, Assembler::REX_WB);
2749       dstenc -= 8;
2750     }
2751     $$$emit8$primary;
2752     emit_rm(cbuf, 0x3, $secondary, dstenc);
2753     $$$emit8$shift$$constant;
2754   %}
2755 
2756   enc_class load_immI(rRegI dst, immI src)
2757   %{
2758     int dstenc = $dst$$reg;
2759     if (dstenc >= 8) {
2760       emit_opcode(cbuf, Assembler::REX_B);
2761       dstenc -= 8;
2762     }
2763     emit_opcode(cbuf, 0xB8 | dstenc);
2764     $$$emit32$src$$constant;
2765   %}
2766 
2767   enc_class load_immL(rRegL dst, immL src)
2768   %{
2769     int dstenc = $dst$$reg;
2770     if (dstenc < 8) {
2771       emit_opcode(cbuf, Assembler::REX_W);
2772     } else {
2773       emit_opcode(cbuf, Assembler::REX_WB);
2774       dstenc -= 8;
2775     }
2776     emit_opcode(cbuf, 0xB8 | dstenc);
2777     emit_d64(cbuf, $src$$constant);
2778   %}
2779 
2780   enc_class load_immUL32(rRegL dst, immUL32 src)
2781   %{
2782     // same as load_immI, but this time we care about zeroes in the high word
2783     int dstenc = $dst$$reg;
2784     if (dstenc >= 8) {
2785       emit_opcode(cbuf, Assembler::REX_B);
2786       dstenc -= 8;
2787     }
2788     emit_opcode(cbuf, 0xB8 | dstenc);
2789     $$$emit32$src$$constant;
2790   %}
2791 
2792   enc_class load_immL32(rRegL dst, immL32 src)
2793   %{
2794     int dstenc = $dst$$reg;
2795     if (dstenc < 8) {
2796       emit_opcode(cbuf, Assembler::REX_W);
2797     } else {
2798       emit_opcode(cbuf, Assembler::REX_WB);
2799       dstenc -= 8;
2800     }
2801     emit_opcode(cbuf, 0xC7);
2802     emit_rm(cbuf, 0x03, 0x00, dstenc);
2803     $$$emit32$src$$constant;
2804   %}
2805 
2806   enc_class load_immP31(rRegP dst, immP32 src)
2807   %{
2808     // same as load_immI, but this time we care about zeroes in the high word
2809     int dstenc = $dst$$reg;
2810     if (dstenc >= 8) {
2811       emit_opcode(cbuf, Assembler::REX_B);
2812       dstenc -= 8;
2813     }
2814     emit_opcode(cbuf, 0xB8 | dstenc);
2815     $$$emit32$src$$constant;
2816   %}
2817 
2818   enc_class load_immP(rRegP dst, immP src)
2819   %{
2820     int dstenc = $dst$$reg;
2821     if (dstenc < 8) {
2822       emit_opcode(cbuf, Assembler::REX_W);
2823     } else {
2824       emit_opcode(cbuf, Assembler::REX_WB);
2825       dstenc -= 8;
2826     }
2827     emit_opcode(cbuf, 0xB8 | dstenc);
2828     // This next line should be generated from ADLC
2829     if ($src->constant_is_oop()) {
2830       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2831     } else {
2832       emit_d64(cbuf, $src$$constant);
2833     }
2834   %}
2835 
2836   enc_class load_immF(regF dst, immF con)
2837   %{
2838     // XXX reg_mem doesn't support RIP-relative addressing yet
2839     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2840     emit_float_constant(cbuf, $con$$constant);
2841   %}
2842 
2843   enc_class load_immD(regD dst, immD con)
2844   %{
2845     // XXX reg_mem doesn't support RIP-relative addressing yet
2846     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2847     emit_double_constant(cbuf, $con$$constant);
2848   %}
2849 
2850   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2851     emit_opcode(cbuf, 0xF3);
2852     if ($dst$$reg >= 8) {
2853       emit_opcode(cbuf, Assembler::REX_R);
2854     }
2855     emit_opcode(cbuf, 0x0F);
2856     emit_opcode(cbuf, 0x10);
2857     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2858     emit_float_constant(cbuf, $con$$constant);
2859   %}
2860 
2861   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2862     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2863     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2864     if ($dst$$reg >= 8) {
2865       emit_opcode(cbuf, Assembler::REX_R);
2866     }
2867     emit_opcode(cbuf, 0x0F);
2868     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2869     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2870     emit_double_constant(cbuf, $con$$constant);
2871   %}
2872 
2873   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2874   enc_class enc_copy(rRegI dst, rRegI src)
2875   %{
2876     encode_copy(cbuf, $dst$$reg, $src$$reg);
2877   %}
2878 
2879   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2880   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2881     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2882   %}
2883 
2884   enc_class enc_copy_always(rRegI dst, rRegI src)
2885   %{
2886     int srcenc = $src$$reg;
2887     int dstenc = $dst$$reg;
2888 
2889     if (dstenc < 8) {
2890       if (srcenc >= 8) {
2891         emit_opcode(cbuf, Assembler::REX_B);
2892         srcenc -= 8;
2893       }
2894     } else {
2895       if (srcenc < 8) {
2896         emit_opcode(cbuf, Assembler::REX_R);
2897       } else {
2898         emit_opcode(cbuf, Assembler::REX_RB);
2899         srcenc -= 8;
2900       }
2901       dstenc -= 8;
2902     }
2903 
2904     emit_opcode(cbuf, 0x8B);
2905     emit_rm(cbuf, 0x3, dstenc, srcenc);
2906   %}
2907 
2908   enc_class enc_copy_wide(rRegL dst, rRegL src)
2909   %{
2910     int srcenc = $src$$reg;
2911     int dstenc = $dst$$reg;
2912 
2913     if (dstenc != srcenc) {
2914       if (dstenc < 8) {
2915         if (srcenc < 8) {
2916           emit_opcode(cbuf, Assembler::REX_W);
2917         } else {
2918           emit_opcode(cbuf, Assembler::REX_WB);
2919           srcenc -= 8;
2920         }
2921       } else {
2922         if (srcenc < 8) {
2923           emit_opcode(cbuf, Assembler::REX_WR);
2924         } else {
2925           emit_opcode(cbuf, Assembler::REX_WRB);
2926           srcenc -= 8;
2927         }
2928         dstenc -= 8;
2929       }
2930       emit_opcode(cbuf, 0x8B);
2931       emit_rm(cbuf, 0x3, dstenc, srcenc);
2932     }
2933   %}
2934 
2935   enc_class Con32(immI src)
2936   %{
2937     // Output immediate
2938     $$$emit32$src$$constant;
2939   %}
2940 
2941   enc_class Con64(immL src)
2942   %{
2943     // Output immediate
2944     emit_d64($src$$constant);
2945   %}
2946 
2947   enc_class Con32F_as_bits(immF src)
2948   %{
2949     // Output Float immediate bits
2950     jfloat jf = $src$$constant;
2951     jint jf_as_bits = jint_cast(jf);
2952     emit_d32(cbuf, jf_as_bits);
2953   %}
2954 
2955   enc_class Con16(immI src)
2956   %{
2957     // Output immediate
2958     $$$emit16$src$$constant;
2959   %}
2960 
2961   // How is this different from Con32??? XXX
2962   enc_class Con_d32(immI src)
2963   %{
2964     emit_d32(cbuf,$src$$constant);
2965   %}
2966 
2967   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2968     // Output immediate memory reference
2969     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2970     emit_d32(cbuf, 0x00);
2971   %}
2972 
2973   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2974     MacroAssembler masm(&cbuf);
2975 
2976     Register switch_reg = as_Register($switch_val$$reg);
2977     Register dest_reg   = as_Register($dest$$reg);
2978     address table_base  = masm.address_table_constant(_index2label);
2979 
2980     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2981     // to do that and the compiler is using that register as one it can allocate.
2982     // So we build it all by hand.
2983     // Address index(noreg, switch_reg, Address::times_1);
2984     // ArrayAddress dispatch(table, index);
2985 
2986     Address dispatch(dest_reg, switch_reg, Address::times_1);
2987 
2988     masm.lea(dest_reg, InternalAddress(table_base));
2989     masm.jmp(dispatch);
2990   %}
2991 
2992   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2993     MacroAssembler masm(&cbuf);
2994 
2995     Register switch_reg = as_Register($switch_val$$reg);
2996     Register dest_reg   = as_Register($dest$$reg);
2997     address table_base  = masm.address_table_constant(_index2label);
2998 
2999     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
3000     // to do that and the compiler is using that register as one it can allocate.
3001     // So we build it all by hand.
3002     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
3003     // ArrayAddress dispatch(table, index);
3004 
3005     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
3006 
3007     masm.lea(dest_reg, InternalAddress(table_base));
3008     masm.jmp(dispatch);
3009   %}
3010 
3011   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
3012     MacroAssembler masm(&cbuf);
3013 
3014     Register switch_reg = as_Register($switch_val$$reg);
3015     Register dest_reg   = as_Register($dest$$reg);
3016     address table_base  = masm.address_table_constant(_index2label);
3017 
3018     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
3019     // to do that and the compiler is using that register as one it can allocate.
3020     // So we build it all by hand.
3021     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3022     // ArrayAddress dispatch(table, index);
3023 
3024     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
3025     masm.lea(dest_reg, InternalAddress(table_base));
3026     masm.jmp(dispatch);
3027 
3028   %}
3029 
3030   enc_class lock_prefix()
3031   %{
3032     if (os::is_MP()) {
3033       emit_opcode(cbuf, 0xF0); // lock
3034     }
3035   %}
3036 
3037   enc_class REX_mem(memory mem)
3038   %{
3039     if ($mem$$base >= 8) {
3040       if ($mem$$index < 8) {
3041         emit_opcode(cbuf, Assembler::REX_B);
3042       } else {
3043         emit_opcode(cbuf, Assembler::REX_XB);
3044       }
3045     } else {
3046       if ($mem$$index >= 8) {
3047         emit_opcode(cbuf, Assembler::REX_X);
3048       }
3049     }
3050   %}
3051 
3052   enc_class REX_mem_wide(memory mem)
3053   %{
3054     if ($mem$$base >= 8) {
3055       if ($mem$$index < 8) {
3056         emit_opcode(cbuf, Assembler::REX_WB);
3057       } else {
3058         emit_opcode(cbuf, Assembler::REX_WXB);
3059       }
3060     } else {
3061       if ($mem$$index < 8) {
3062         emit_opcode(cbuf, Assembler::REX_W);
3063       } else {
3064         emit_opcode(cbuf, Assembler::REX_WX);
3065       }
3066     }
3067   %}
3068 
3069   // for byte regs
3070   enc_class REX_breg(rRegI reg)
3071   %{
3072     if ($reg$$reg >= 4) {
3073       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3074     }
3075   %}
3076 
3077   // for byte regs
3078   enc_class REX_reg_breg(rRegI dst, rRegI src)
3079   %{
3080     if ($dst$$reg < 8) {
3081       if ($src$$reg >= 4) {
3082         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3083       }
3084     } else {
3085       if ($src$$reg < 8) {
3086         emit_opcode(cbuf, Assembler::REX_R);
3087       } else {
3088         emit_opcode(cbuf, Assembler::REX_RB);
3089       }
3090     }
3091   %}
3092 
3093   // for byte regs
3094   enc_class REX_breg_mem(rRegI reg, memory mem)
3095   %{
3096     if ($reg$$reg < 8) {
3097       if ($mem$$base < 8) {
3098         if ($mem$$index >= 8) {
3099           emit_opcode(cbuf, Assembler::REX_X);
3100         } else if ($reg$$reg >= 4) {
3101           emit_opcode(cbuf, Assembler::REX);
3102         }
3103       } else {
3104         if ($mem$$index < 8) {
3105           emit_opcode(cbuf, Assembler::REX_B);
3106         } else {
3107           emit_opcode(cbuf, Assembler::REX_XB);
3108         }
3109       }
3110     } else {
3111       if ($mem$$base < 8) {
3112         if ($mem$$index < 8) {
3113           emit_opcode(cbuf, Assembler::REX_R);
3114         } else {
3115           emit_opcode(cbuf, Assembler::REX_RX);
3116         }
3117       } else {
3118         if ($mem$$index < 8) {
3119           emit_opcode(cbuf, Assembler::REX_RB);
3120         } else {
3121           emit_opcode(cbuf, Assembler::REX_RXB);
3122         }
3123       }
3124     }
3125   %}
3126 
3127   enc_class REX_reg(rRegI reg)
3128   %{
3129     if ($reg$$reg >= 8) {
3130       emit_opcode(cbuf, Assembler::REX_B);
3131     }
3132   %}
3133 
3134   enc_class REX_reg_wide(rRegI reg)
3135   %{
3136     if ($reg$$reg < 8) {
3137       emit_opcode(cbuf, Assembler::REX_W);
3138     } else {
3139       emit_opcode(cbuf, Assembler::REX_WB);
3140     }
3141   %}
3142 
3143   enc_class REX_reg_reg(rRegI dst, rRegI src)
3144   %{
3145     if ($dst$$reg < 8) {
3146       if ($src$$reg >= 8) {
3147         emit_opcode(cbuf, Assembler::REX_B);
3148       }
3149     } else {
3150       if ($src$$reg < 8) {
3151         emit_opcode(cbuf, Assembler::REX_R);
3152       } else {
3153         emit_opcode(cbuf, Assembler::REX_RB);
3154       }
3155     }
3156   %}
3157 
3158   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3159   %{
3160     if ($dst$$reg < 8) {
3161       if ($src$$reg < 8) {
3162         emit_opcode(cbuf, Assembler::REX_W);
3163       } else {
3164         emit_opcode(cbuf, Assembler::REX_WB);
3165       }
3166     } else {
3167       if ($src$$reg < 8) {
3168         emit_opcode(cbuf, Assembler::REX_WR);
3169       } else {
3170         emit_opcode(cbuf, Assembler::REX_WRB);
3171       }
3172     }
3173   %}
3174 
3175   enc_class REX_reg_mem(rRegI reg, memory mem)
3176   %{
3177     if ($reg$$reg < 8) {
3178       if ($mem$$base < 8) {
3179         if ($mem$$index >= 8) {
3180           emit_opcode(cbuf, Assembler::REX_X);
3181         }
3182       } else {
3183         if ($mem$$index < 8) {
3184           emit_opcode(cbuf, Assembler::REX_B);
3185         } else {
3186           emit_opcode(cbuf, Assembler::REX_XB);
3187         }
3188       }
3189     } else {
3190       if ($mem$$base < 8) {
3191         if ($mem$$index < 8) {
3192           emit_opcode(cbuf, Assembler::REX_R);
3193         } else {
3194           emit_opcode(cbuf, Assembler::REX_RX);
3195         }
3196       } else {
3197         if ($mem$$index < 8) {
3198           emit_opcode(cbuf, Assembler::REX_RB);
3199         } else {
3200           emit_opcode(cbuf, Assembler::REX_RXB);
3201         }
3202       }
3203     }
3204   %}
3205 
3206   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3207   %{
3208     if ($reg$$reg < 8) {
3209       if ($mem$$base < 8) {
3210         if ($mem$$index < 8) {
3211           emit_opcode(cbuf, Assembler::REX_W);
3212         } else {
3213           emit_opcode(cbuf, Assembler::REX_WX);
3214         }
3215       } else {
3216         if ($mem$$index < 8) {
3217           emit_opcode(cbuf, Assembler::REX_WB);
3218         } else {
3219           emit_opcode(cbuf, Assembler::REX_WXB);
3220         }
3221       }
3222     } else {
3223       if ($mem$$base < 8) {
3224         if ($mem$$index < 8) {
3225           emit_opcode(cbuf, Assembler::REX_WR);
3226         } else {
3227           emit_opcode(cbuf, Assembler::REX_WRX);
3228         }
3229       } else {
3230         if ($mem$$index < 8) {
3231           emit_opcode(cbuf, Assembler::REX_WRB);
3232         } else {
3233           emit_opcode(cbuf, Assembler::REX_WRXB);
3234         }
3235       }
3236     }
3237   %}
3238 
3239   enc_class reg_mem(rRegI ereg, memory mem)
3240   %{
3241     // High registers handle in encode_RegMem
3242     int reg = $ereg$$reg;
3243     int base = $mem$$base;
3244     int index = $mem$$index;
3245     int scale = $mem$$scale;
3246     int disp = $mem$$disp;
3247     bool disp_is_oop = $mem->disp_is_oop();
3248 
3249     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3250   %}
3251 
3252   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3253   %{
3254     int rm_byte_opcode = $rm_opcode$$constant;
3255 
3256     // High registers handle in encode_RegMem
3257     int base = $mem$$base;
3258     int index = $mem$$index;
3259     int scale = $mem$$scale;
3260     int displace = $mem$$disp;
3261 
3262     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3263                                             // working with static
3264                                             // globals
3265     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3266                   disp_is_oop);
3267   %}
3268 
3269   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3270   %{
3271     int reg_encoding = $dst$$reg;
3272     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3273     int index        = 0x04;            // 0x04 indicates no index
3274     int scale        = 0x00;            // 0x00 indicates no scale
3275     int displace     = $src1$$constant; // 0x00 indicates no displacement
3276     bool disp_is_oop = false;
3277     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3278                   disp_is_oop);
3279   %}
3280 
3281   enc_class neg_reg(rRegI dst)
3282   %{
3283     int dstenc = $dst$$reg;
3284     if (dstenc >= 8) {
3285       emit_opcode(cbuf, Assembler::REX_B);
3286       dstenc -= 8;
3287     }
3288     // NEG $dst
3289     emit_opcode(cbuf, 0xF7);
3290     emit_rm(cbuf, 0x3, 0x03, dstenc);
3291   %}
3292 
3293   enc_class neg_reg_wide(rRegI dst)
3294   %{
3295     int dstenc = $dst$$reg;
3296     if (dstenc < 8) {
3297       emit_opcode(cbuf, Assembler::REX_W);
3298     } else {
3299       emit_opcode(cbuf, Assembler::REX_WB);
3300       dstenc -= 8;
3301     }
3302     // NEG $dst
3303     emit_opcode(cbuf, 0xF7);
3304     emit_rm(cbuf, 0x3, 0x03, dstenc);
3305   %}
3306 
3307   enc_class setLT_reg(rRegI dst)
3308   %{
3309     int dstenc = $dst$$reg;
3310     if (dstenc >= 8) {
3311       emit_opcode(cbuf, Assembler::REX_B);
3312       dstenc -= 8;
3313     } else if (dstenc >= 4) {
3314       emit_opcode(cbuf, Assembler::REX);
3315     }
3316     // SETLT $dst
3317     emit_opcode(cbuf, 0x0F);
3318     emit_opcode(cbuf, 0x9C);
3319     emit_rm(cbuf, 0x3, 0x0, dstenc);
3320   %}
3321 
3322   enc_class setNZ_reg(rRegI dst)
3323   %{
3324     int dstenc = $dst$$reg;
3325     if (dstenc >= 8) {
3326       emit_opcode(cbuf, Assembler::REX_B);
3327       dstenc -= 8;
3328     } else if (dstenc >= 4) {
3329       emit_opcode(cbuf, Assembler::REX);
3330     }
3331     // SETNZ $dst
3332     emit_opcode(cbuf, 0x0F);
3333     emit_opcode(cbuf, 0x95);
3334     emit_rm(cbuf, 0x3, 0x0, dstenc);
3335   %}
3336 
3337   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3338                        rcx_RegI tmp)
3339   %{
3340     // cadd_cmpLT
3341 
3342     int tmpReg = $tmp$$reg;
3343 
3344     int penc = $p$$reg;
3345     int qenc = $q$$reg;
3346     int yenc = $y$$reg;
3347 
3348     // subl $p,$q
3349     if (penc < 8) {
3350       if (qenc >= 8) {
3351         emit_opcode(cbuf, Assembler::REX_B);
3352       }
3353     } else {
3354       if (qenc < 8) {
3355         emit_opcode(cbuf, Assembler::REX_R);
3356       } else {
3357         emit_opcode(cbuf, Assembler::REX_RB);
3358       }
3359     }
3360     emit_opcode(cbuf, 0x2B);
3361     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3362 
3363     // sbbl $tmp, $tmp
3364     emit_opcode(cbuf, 0x1B);
3365     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3366 
3367     // andl $tmp, $y
3368     if (yenc >= 8) {
3369       emit_opcode(cbuf, Assembler::REX_B);
3370     }
3371     emit_opcode(cbuf, 0x23);
3372     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3373 
3374     // addl $p,$tmp
3375     if (penc >= 8) {
3376         emit_opcode(cbuf, Assembler::REX_R);
3377     }
3378     emit_opcode(cbuf, 0x03);
3379     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3380   %}
3381 
3382   // Compare the lonogs and set -1, 0, or 1 into dst
3383   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3384   %{
3385     int src1enc = $src1$$reg;
3386     int src2enc = $src2$$reg;
3387     int dstenc = $dst$$reg;
3388 
3389     // cmpq $src1, $src2
3390     if (src1enc < 8) {
3391       if (src2enc < 8) {
3392         emit_opcode(cbuf, Assembler::REX_W);
3393       } else {
3394         emit_opcode(cbuf, Assembler::REX_WB);
3395       }
3396     } else {
3397       if (src2enc < 8) {
3398         emit_opcode(cbuf, Assembler::REX_WR);
3399       } else {
3400         emit_opcode(cbuf, Assembler::REX_WRB);
3401       }
3402     }
3403     emit_opcode(cbuf, 0x3B);
3404     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3405 
3406     // movl $dst, -1
3407     if (dstenc >= 8) {
3408       emit_opcode(cbuf, Assembler::REX_B);
3409     }
3410     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3411     emit_d32(cbuf, -1);
3412 
3413     // jl,s done
3414     emit_opcode(cbuf, 0x7C);
3415     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3416 
3417     // setne $dst
3418     if (dstenc >= 4) {
3419       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3420     }
3421     emit_opcode(cbuf, 0x0F);
3422     emit_opcode(cbuf, 0x95);
3423     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3424 
3425     // movzbl $dst, $dst
3426     if (dstenc >= 4) {
3427       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3428     }
3429     emit_opcode(cbuf, 0x0F);
3430     emit_opcode(cbuf, 0xB6);
3431     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3432   %}
3433 
3434   enc_class Push_ResultXD(regD dst) %{
3435     int dstenc = $dst$$reg;
3436 
3437     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3438 
3439     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3440     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3441     if (dstenc >= 8) {
3442       emit_opcode(cbuf, Assembler::REX_R);
3443     }
3444     emit_opcode  (cbuf, 0x0F );
3445     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3446     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3447 
3448     // add rsp,8
3449     emit_opcode(cbuf, Assembler::REX_W);
3450     emit_opcode(cbuf,0x83);
3451     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3452     emit_d8(cbuf,0x08);
3453   %}
3454 
3455   enc_class Push_SrcXD(regD src) %{
3456     int srcenc = $src$$reg;
3457 
3458     // subq rsp,#8
3459     emit_opcode(cbuf, Assembler::REX_W);
3460     emit_opcode(cbuf, 0x83);
3461     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3462     emit_d8(cbuf, 0x8);
3463 
3464     // movsd [rsp],src
3465     emit_opcode(cbuf, 0xF2);
3466     if (srcenc >= 8) {
3467       emit_opcode(cbuf, Assembler::REX_R);
3468     }
3469     emit_opcode(cbuf, 0x0F);
3470     emit_opcode(cbuf, 0x11);
3471     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3472 
3473     // fldd [rsp]
3474     emit_opcode(cbuf, 0x66);
3475     emit_opcode(cbuf, 0xDD);
3476     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3477   %}
3478 
3479 
3480   enc_class movq_ld(regD dst, memory mem) %{
3481     MacroAssembler _masm(&cbuf);
3482     __ movq($dst$$XMMRegister, $mem$$Address);
3483   %}
3484 
3485   enc_class movq_st(memory mem, regD src) %{
3486     MacroAssembler _masm(&cbuf);
3487     __ movq($mem$$Address, $src$$XMMRegister);
3488   %}
3489 
3490   enc_class pshufd_8x8(regF dst, regF src) %{
3491     MacroAssembler _masm(&cbuf);
3492 
3493     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3494     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3495     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3496   %}
3497 
3498   enc_class pshufd_4x16(regF dst, regF src) %{
3499     MacroAssembler _masm(&cbuf);
3500 
3501     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3502   %}
3503 
3504   enc_class pshufd(regD dst, regD src, int mode) %{
3505     MacroAssembler _masm(&cbuf);
3506 
3507     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3508   %}
3509 
3510   enc_class pxor(regD dst, regD src) %{
3511     MacroAssembler _masm(&cbuf);
3512 
3513     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3514   %}
3515 
3516   enc_class mov_i2x(regD dst, rRegI src) %{
3517     MacroAssembler _masm(&cbuf);
3518 
3519     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3520   %}
3521 
3522   // obj: object to lock
3523   // box: box address (header location) -- killed
3524   // tmp: rax -- killed
3525   // scr: rbx -- killed
3526   //
3527   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3528   // from i486.ad.  See that file for comments.
3529   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3530   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3531 
3532 
3533   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3534   %{
3535     Register objReg = as_Register((int)$obj$$reg);
3536     Register boxReg = as_Register((int)$box$$reg);
3537     Register tmpReg = as_Register($tmp$$reg);
3538     Register scrReg = as_Register($scr$$reg);
3539     MacroAssembler masm(&cbuf);
3540 
3541     // Verify uniqueness of register assignments -- necessary but not sufficient
3542     assert (objReg != boxReg && objReg != tmpReg &&
3543             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3544 
3545     if (_counters != NULL) {
3546       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3547     }
3548     if (EmitSync & 1) {
3549         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3550         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3551         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3552     } else
3553     if (EmitSync & 2) {
3554         Label DONE_LABEL;
3555         if (UseBiasedLocking) {
3556            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3557           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3558         }
3559         // QQQ was movl...
3560         masm.movptr(tmpReg, 0x1);
3561         masm.orptr(tmpReg, Address(objReg, 0));
3562         masm.movptr(Address(boxReg, 0), tmpReg);
3563         if (os::is_MP()) {
3564           masm.lock();
3565         }
3566         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3567         masm.jcc(Assembler::equal, DONE_LABEL);
3568 
3569         // Recursive locking
3570         masm.subptr(tmpReg, rsp);
3571         masm.andptr(tmpReg, 7 - os::vm_page_size());
3572         masm.movptr(Address(boxReg, 0), tmpReg);
3573 
3574         masm.bind(DONE_LABEL);
3575         masm.nop(); // avoid branch to branch
3576     } else {
3577         Label DONE_LABEL, IsInflated, Egress;
3578 
3579         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3580         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3581         masm.jcc   (Assembler::notZero, IsInflated) ; 
3582          
3583         // it's stack-locked, biased or neutral
3584         // TODO: optimize markword triage order to reduce the number of
3585         // conditional branches in the most common cases.
3586         // Beware -- there's a subtle invariant that fetch of the markword
3587         // at [FETCH], below, will never observe a biased encoding (*101b).
3588         // If this invariant is not held we'll suffer exclusion (safety) failure.
3589 
3590         if (UseBiasedLocking && !UseOptoBiasInlining) {
3591           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3592           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3593         }
3594 
3595         // was q will it destroy high?
3596         masm.orl   (tmpReg, 1) ; 
3597         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3598         if (os::is_MP()) { masm.lock(); } 
3599         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3600         if (_counters != NULL) {
3601            masm.cond_inc32(Assembler::equal,
3602                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3603         }
3604         masm.jcc   (Assembler::equal, DONE_LABEL);
3605 
3606         // Recursive locking
3607         masm.subptr(tmpReg, rsp);
3608         masm.andptr(tmpReg, 7 - os::vm_page_size());
3609         masm.movptr(Address(boxReg, 0), tmpReg);
3610         if (_counters != NULL) {
3611            masm.cond_inc32(Assembler::equal,
3612                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3613         }
3614         masm.jmp   (DONE_LABEL) ;
3615 
3616         masm.bind  (IsInflated) ;
3617         // It's inflated
3618 
3619         // TODO: someday avoid the ST-before-CAS penalty by
3620         // relocating (deferring) the following ST.
3621         // We should also think about trying a CAS without having
3622         // fetched _owner.  If the CAS is successful we may
3623         // avoid an RTO->RTS upgrade on the $line.
3624         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3625         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3626 
3627         masm.mov    (boxReg, tmpReg) ; 
3628         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3629         masm.testptr(tmpReg, tmpReg) ;   
3630         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3631 
3632         // It's inflated and appears unlocked
3633         if (os::is_MP()) { masm.lock(); } 
3634         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3635         // Intentional fall-through into DONE_LABEL ...
3636 
3637         masm.bind  (DONE_LABEL) ;
3638         masm.nop   () ;                 // avoid jmp to jmp
3639     }
3640   %}
3641 
3642   // obj: object to unlock
3643   // box: box address (displaced header location), killed
3644   // RBX: killed tmp; cannot be obj nor box
3645   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3646   %{
3647 
3648     Register objReg = as_Register($obj$$reg);
3649     Register boxReg = as_Register($box$$reg);
3650     Register tmpReg = as_Register($tmp$$reg);
3651     MacroAssembler masm(&cbuf);
3652 
3653     if (EmitSync & 4) { 
3654        masm.cmpptr(rsp, 0) ; 
3655     } else
3656     if (EmitSync & 8) {
3657        Label DONE_LABEL;
3658        if (UseBiasedLocking) {
3659          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3660        }
3661 
3662        // Check whether the displaced header is 0
3663        //(=> recursive unlock)
3664        masm.movptr(tmpReg, Address(boxReg, 0));
3665        masm.testptr(tmpReg, tmpReg);
3666        masm.jcc(Assembler::zero, DONE_LABEL);
3667 
3668        // If not recursive lock, reset the header to displaced header
3669        if (os::is_MP()) {
3670          masm.lock();
3671        }
3672        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3673        masm.bind(DONE_LABEL);
3674        masm.nop(); // avoid branch to branch
3675     } else {
3676        Label DONE_LABEL, Stacked, CheckSucc ;
3677 
3678        if (UseBiasedLocking && !UseOptoBiasInlining) {
3679          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3680        }
3681         
3682        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3683        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3684        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3685        masm.testl (tmpReg, 0x02) ; 
3686        masm.jcc   (Assembler::zero, Stacked) ; 
3687         
3688        // It's inflated
3689        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3690        masm.xorptr(boxReg, r15_thread) ; 
3691        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3692        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3693        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3694        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3695        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3696        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3697        masm.jmp   (DONE_LABEL) ; 
3698         
3699        if ((EmitSync & 65536) == 0) { 
3700          Label LSuccess, LGoSlowPath ;
3701          masm.bind  (CheckSucc) ;
3702          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3703          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3704 
3705          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3706          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3707          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3708          // are all faster when the write buffer is populated.
3709          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3710          if (os::is_MP()) {
3711             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3712          }
3713          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3714          masm.jcc   (Assembler::notZero, LSuccess) ;
3715 
3716          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3717          if (os::is_MP()) { masm.lock(); }
3718          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3719          masm.jcc   (Assembler::notEqual, LSuccess) ;
3720          // Intentional fall-through into slow-path
3721 
3722          masm.bind  (LGoSlowPath) ;
3723          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3724          masm.jmp   (DONE_LABEL) ;
3725 
3726          masm.bind  (LSuccess) ;
3727          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3728          masm.jmp   (DONE_LABEL) ;
3729        }
3730 
3731        masm.bind  (Stacked) ; 
3732        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3733        if (os::is_MP()) { masm.lock(); } 
3734        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3735 
3736        if (EmitSync & 65536) {
3737           masm.bind (CheckSucc) ;
3738        }
3739        masm.bind(DONE_LABEL);
3740        if (EmitSync & 32768) {
3741           masm.nop();                      // avoid branch to branch
3742        }
3743     }
3744   %}
3745 
3746 
3747   enc_class enc_rethrow()
3748   %{
3749     cbuf.set_inst_mark();
3750     emit_opcode(cbuf, 0xE9); // jmp entry
3751     emit_d32_reloc(cbuf,
3752                    (int) (OptoRuntime::rethrow_stub() - cbuf.code_end() - 4),
3753                    runtime_call_Relocation::spec(),
3754                    RELOC_DISP32);
3755   %}
3756 
3757   enc_class absF_encoding(regF dst)
3758   %{
3759     int dstenc = $dst$$reg;
3760     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3761 
3762     cbuf.set_inst_mark();
3763     if (dstenc >= 8) {
3764       emit_opcode(cbuf, Assembler::REX_R);
3765       dstenc -= 8;
3766     }
3767     // XXX reg_mem doesn't support RIP-relative addressing yet
3768     emit_opcode(cbuf, 0x0F);
3769     emit_opcode(cbuf, 0x54);
3770     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3771     emit_d32_reloc(cbuf, signmask_address);
3772   %}
3773 
3774   enc_class absD_encoding(regD dst)
3775   %{
3776     int dstenc = $dst$$reg;
3777     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3778 
3779     cbuf.set_inst_mark();
3780     emit_opcode(cbuf, 0x66);
3781     if (dstenc >= 8) {
3782       emit_opcode(cbuf, Assembler::REX_R);
3783       dstenc -= 8;
3784     }
3785     // XXX reg_mem doesn't support RIP-relative addressing yet
3786     emit_opcode(cbuf, 0x0F);
3787     emit_opcode(cbuf, 0x54);
3788     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3789     emit_d32_reloc(cbuf, signmask_address);
3790   %}
3791 
3792   enc_class negF_encoding(regF dst)
3793   %{
3794     int dstenc = $dst$$reg;
3795     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3796 
3797     cbuf.set_inst_mark();
3798     if (dstenc >= 8) {
3799       emit_opcode(cbuf, Assembler::REX_R);
3800       dstenc -= 8;
3801     }
3802     // XXX reg_mem doesn't support RIP-relative addressing yet
3803     emit_opcode(cbuf, 0x0F);
3804     emit_opcode(cbuf, 0x57);
3805     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3806     emit_d32_reloc(cbuf, signflip_address);
3807   %}
3808 
3809   enc_class negD_encoding(regD dst)
3810   %{
3811     int dstenc = $dst$$reg;
3812     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3813 
3814     cbuf.set_inst_mark();
3815     emit_opcode(cbuf, 0x66);
3816     if (dstenc >= 8) {
3817       emit_opcode(cbuf, Assembler::REX_R);
3818       dstenc -= 8;
3819     }
3820     // XXX reg_mem doesn't support RIP-relative addressing yet
3821     emit_opcode(cbuf, 0x0F);
3822     emit_opcode(cbuf, 0x57);
3823     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3824     emit_d32_reloc(cbuf, signflip_address);
3825   %}
3826 
3827   enc_class f2i_fixup(rRegI dst, regF src)
3828   %{
3829     int dstenc = $dst$$reg;
3830     int srcenc = $src$$reg;
3831 
3832     // cmpl $dst, #0x80000000
3833     if (dstenc >= 8) {
3834       emit_opcode(cbuf, Assembler::REX_B);
3835     }
3836     emit_opcode(cbuf, 0x81);
3837     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3838     emit_d32(cbuf, 0x80000000);
3839 
3840     // jne,s done
3841     emit_opcode(cbuf, 0x75);
3842     if (srcenc < 8 && dstenc < 8) {
3843       emit_d8(cbuf, 0xF);
3844     } else if (srcenc >= 8 && dstenc >= 8) {
3845       emit_d8(cbuf, 0x11);
3846     } else {
3847       emit_d8(cbuf, 0x10);
3848     }
3849 
3850     // subq rsp, #8
3851     emit_opcode(cbuf, Assembler::REX_W);
3852     emit_opcode(cbuf, 0x83);
3853     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3854     emit_d8(cbuf, 8);
3855 
3856     // movss [rsp], $src
3857     emit_opcode(cbuf, 0xF3);
3858     if (srcenc >= 8) {
3859       emit_opcode(cbuf, Assembler::REX_R);
3860     }
3861     emit_opcode(cbuf, 0x0F);
3862     emit_opcode(cbuf, 0x11);
3863     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3864 
3865     // call f2i_fixup
3866     cbuf.set_inst_mark();
3867     emit_opcode(cbuf, 0xE8);
3868     emit_d32_reloc(cbuf,
3869                    (int)
3870                    (StubRoutines::x86::f2i_fixup() - cbuf.code_end() - 4),
3871                    runtime_call_Relocation::spec(),
3872                    RELOC_DISP32);
3873 
3874     // popq $dst
3875     if (dstenc >= 8) {
3876       emit_opcode(cbuf, Assembler::REX_B);
3877     }
3878     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3879 
3880     // done:
3881   %}
3882 
3883   enc_class f2l_fixup(rRegL dst, regF src)
3884   %{
3885     int dstenc = $dst$$reg;
3886     int srcenc = $src$$reg;
3887     address const_address = (address) StubRoutines::x86::double_sign_flip();
3888 
3889     // cmpq $dst, [0x8000000000000000]
3890     cbuf.set_inst_mark();
3891     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3892     emit_opcode(cbuf, 0x39);
3893     // XXX reg_mem doesn't support RIP-relative addressing yet
3894     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3895     emit_d32_reloc(cbuf, const_address);
3896 
3897 
3898     // jne,s done
3899     emit_opcode(cbuf, 0x75);
3900     if (srcenc < 8 && dstenc < 8) {
3901       emit_d8(cbuf, 0xF);
3902     } else if (srcenc >= 8 && dstenc >= 8) {
3903       emit_d8(cbuf, 0x11);
3904     } else {
3905       emit_d8(cbuf, 0x10);
3906     }
3907 
3908     // subq rsp, #8
3909     emit_opcode(cbuf, Assembler::REX_W);
3910     emit_opcode(cbuf, 0x83);
3911     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3912     emit_d8(cbuf, 8);
3913 
3914     // movss [rsp], $src
3915     emit_opcode(cbuf, 0xF3);
3916     if (srcenc >= 8) {
3917       emit_opcode(cbuf, Assembler::REX_R);
3918     }
3919     emit_opcode(cbuf, 0x0F);
3920     emit_opcode(cbuf, 0x11);
3921     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3922 
3923     // call f2l_fixup
3924     cbuf.set_inst_mark();
3925     emit_opcode(cbuf, 0xE8);
3926     emit_d32_reloc(cbuf,
3927                    (int)
3928                    (StubRoutines::x86::f2l_fixup() - cbuf.code_end() - 4),
3929                    runtime_call_Relocation::spec(),
3930                    RELOC_DISP32);
3931 
3932     // popq $dst
3933     if (dstenc >= 8) {
3934       emit_opcode(cbuf, Assembler::REX_B);
3935     }
3936     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3937 
3938     // done:
3939   %}
3940 
3941   enc_class d2i_fixup(rRegI dst, regD src)
3942   %{
3943     int dstenc = $dst$$reg;
3944     int srcenc = $src$$reg;
3945 
3946     // cmpl $dst, #0x80000000
3947     if (dstenc >= 8) {
3948       emit_opcode(cbuf, Assembler::REX_B);
3949     }
3950     emit_opcode(cbuf, 0x81);
3951     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3952     emit_d32(cbuf, 0x80000000);
3953 
3954     // jne,s done
3955     emit_opcode(cbuf, 0x75);
3956     if (srcenc < 8 && dstenc < 8) {
3957       emit_d8(cbuf, 0xF);
3958     } else if (srcenc >= 8 && dstenc >= 8) {
3959       emit_d8(cbuf, 0x11);
3960     } else {
3961       emit_d8(cbuf, 0x10);
3962     }
3963 
3964     // subq rsp, #8
3965     emit_opcode(cbuf, Assembler::REX_W);
3966     emit_opcode(cbuf, 0x83);
3967     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3968     emit_d8(cbuf, 8);
3969 
3970     // movsd [rsp], $src
3971     emit_opcode(cbuf, 0xF2);
3972     if (srcenc >= 8) {
3973       emit_opcode(cbuf, Assembler::REX_R);
3974     }
3975     emit_opcode(cbuf, 0x0F);
3976     emit_opcode(cbuf, 0x11);
3977     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3978 
3979     // call d2i_fixup
3980     cbuf.set_inst_mark();
3981     emit_opcode(cbuf, 0xE8);
3982     emit_d32_reloc(cbuf,
3983                    (int)
3984                    (StubRoutines::x86::d2i_fixup() - cbuf.code_end() - 4),
3985                    runtime_call_Relocation::spec(),
3986                    RELOC_DISP32);
3987 
3988     // popq $dst
3989     if (dstenc >= 8) {
3990       emit_opcode(cbuf, Assembler::REX_B);
3991     }
3992     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3993 
3994     // done:
3995   %}
3996 
3997   enc_class d2l_fixup(rRegL dst, regD src)
3998   %{
3999     int dstenc = $dst$$reg;
4000     int srcenc = $src$$reg;
4001     address const_address = (address) StubRoutines::x86::double_sign_flip();
4002 
4003     // cmpq $dst, [0x8000000000000000]
4004     cbuf.set_inst_mark();
4005     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
4006     emit_opcode(cbuf, 0x39);
4007     // XXX reg_mem doesn't support RIP-relative addressing yet
4008     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
4009     emit_d32_reloc(cbuf, const_address);
4010 
4011 
4012     // jne,s done
4013     emit_opcode(cbuf, 0x75);
4014     if (srcenc < 8 && dstenc < 8) {
4015       emit_d8(cbuf, 0xF);
4016     } else if (srcenc >= 8 && dstenc >= 8) {
4017       emit_d8(cbuf, 0x11);
4018     } else {
4019       emit_d8(cbuf, 0x10);
4020     }
4021 
4022     // subq rsp, #8
4023     emit_opcode(cbuf, Assembler::REX_W);
4024     emit_opcode(cbuf, 0x83);
4025     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
4026     emit_d8(cbuf, 8);
4027 
4028     // movsd [rsp], $src
4029     emit_opcode(cbuf, 0xF2);
4030     if (srcenc >= 8) {
4031       emit_opcode(cbuf, Assembler::REX_R);
4032     }
4033     emit_opcode(cbuf, 0x0F);
4034     emit_opcode(cbuf, 0x11);
4035     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
4036 
4037     // call d2l_fixup
4038     cbuf.set_inst_mark();
4039     emit_opcode(cbuf, 0xE8);
4040     emit_d32_reloc(cbuf,
4041                    (int)
4042                    (StubRoutines::x86::d2l_fixup() - cbuf.code_end() - 4),
4043                    runtime_call_Relocation::spec(),
4044                    RELOC_DISP32);
4045 
4046     // popq $dst
4047     if (dstenc >= 8) {
4048       emit_opcode(cbuf, Assembler::REX_B);
4049     }
4050     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4051 
4052     // done:
4053   %}
4054 
4055   // Safepoint Poll.  This polls the safepoint page, and causes an
4056   // exception if it is not readable. Unfortunately, it kills
4057   // RFLAGS in the process.
4058   enc_class enc_safepoint_poll
4059   %{
4060     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4061     // XXX reg_mem doesn't support RIP-relative addressing yet
4062     cbuf.set_inst_mark();
4063     cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0); // XXX
4064     emit_opcode(cbuf, 0x85); // testl
4065     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4066     // cbuf.inst_mark() is beginning of instruction
4067     emit_d32_reloc(cbuf, os::get_polling_page());
4068 //                    relocInfo::poll_type,
4069   %}
4070 %}
4071 
4072 
4073 
4074 //----------FRAME--------------------------------------------------------------
4075 // Definition of frame structure and management information.
4076 //
4077 //  S T A C K   L A Y O U T    Allocators stack-slot number
4078 //                             |   (to get allocators register number
4079 //  G  Owned by    |        |  v    add OptoReg::stack0())
4080 //  r   CALLER     |        |
4081 //  o     |        +--------+      pad to even-align allocators stack-slot
4082 //  w     V        |  pad0  |        numbers; owned by CALLER
4083 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4084 //  h     ^        |   in   |  5
4085 //        |        |  args  |  4   Holes in incoming args owned by SELF
4086 //  |     |        |        |  3
4087 //  |     |        +--------+
4088 //  V     |        | old out|      Empty on Intel, window on Sparc
4089 //        |    old |preserve|      Must be even aligned.
4090 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4091 //        |        |   in   |  3   area for Intel ret address
4092 //     Owned by    |preserve|      Empty on Sparc.
4093 //       SELF      +--------+
4094 //        |        |  pad2  |  2   pad to align old SP
4095 //        |        +--------+  1
4096 //        |        | locks  |  0
4097 //        |        +--------+----> OptoReg::stack0(), even aligned
4098 //        |        |  pad1  | 11   pad to align new SP
4099 //        |        +--------+
4100 //        |        |        | 10
4101 //        |        | spills |  9   spills
4102 //        V        |        |  8   (pad0 slot for callee)
4103 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4104 //        ^        |  out   |  7
4105 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4106 //     Owned by    +--------+
4107 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4108 //        |    new |preserve|      Must be even-aligned.
4109 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4110 //        |        |        |
4111 //
4112 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4113 //         known from SELF's arguments and the Java calling convention.
4114 //         Region 6-7 is determined per call site.
4115 // Note 2: If the calling convention leaves holes in the incoming argument
4116 //         area, those holes are owned by SELF.  Holes in the outgoing area
4117 //         are owned by the CALLEE.  Holes should not be nessecary in the
4118 //         incoming area, as the Java calling convention is completely under
4119 //         the control of the AD file.  Doubles can be sorted and packed to
4120 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4121 //         varargs C calling conventions.
4122 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4123 //         even aligned with pad0 as needed.
4124 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4125 //         region 6-11 is even aligned; it may be padded out more so that
4126 //         the region from SP to FP meets the minimum stack alignment.
4127 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4128 //         alignment.  Region 11, pad1, may be dynamically extended so that
4129 //         SP meets the minimum alignment.
4130 
4131 frame
4132 %{
4133   // What direction does stack grow in (assumed to be same for C & Java)
4134   stack_direction(TOWARDS_LOW);
4135 
4136   // These three registers define part of the calling convention
4137   // between compiled code and the interpreter.
4138   inline_cache_reg(RAX);                // Inline Cache Register
4139   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4140                                         // calling interpreter
4141 
4142   // Optional: name the operand used by cisc-spilling to access
4143   // [stack_pointer + offset]
4144   cisc_spilling_operand_name(indOffset32);
4145 
4146   // Number of stack slots consumed by locking an object
4147   sync_stack_slots(2);
4148 
4149   // Compiled code's Frame Pointer
4150   frame_pointer(RSP);
4151 
4152   // Interpreter stores its frame pointer in a register which is
4153   // stored to the stack by I2CAdaptors.
4154   // I2CAdaptors convert from interpreted java to compiled java.
4155   interpreter_frame_pointer(RBP);
4156 
4157   // Stack alignment requirement
4158   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4159 
4160   // Number of stack slots between incoming argument block and the start of
4161   // a new frame.  The PROLOG must add this many slots to the stack.  The
4162   // EPILOG must remove this many slots.  amd64 needs two slots for
4163   // return address.
4164   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4165 
4166   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4167   // for calls to C.  Supports the var-args backing area for register parms.
4168   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4169 
4170   // The after-PROLOG location of the return address.  Location of
4171   // return address specifies a type (REG or STACK) and a number
4172   // representing the register number (i.e. - use a register name) or
4173   // stack slot.
4174   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4175   // Otherwise, it is above the locks and verification slot and alignment word
4176   return_addr(STACK - 2 +
4177               round_to(2 + 2 * VerifyStackAtCalls +
4178                        Compile::current()->fixed_slots(),
4179                        WordsPerLong * 2));
4180 
4181   // Body of function which returns an integer array locating
4182   // arguments either in registers or in stack slots.  Passed an array
4183   // of ideal registers called "sig" and a "length" count.  Stack-slot
4184   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4185   // arguments for a CALLEE.  Incoming stack arguments are
4186   // automatically biased by the preserve_stack_slots field above.
4187 
4188   calling_convention
4189   %{
4190     // No difference between ingoing/outgoing just pass false
4191     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4192   %}
4193 
4194   c_calling_convention
4195   %{
4196     // This is obviously always outgoing
4197     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4198   %}
4199 
4200   // Location of compiled Java return values.  Same as C for now.
4201   return_value
4202   %{
4203     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4204            "only return normal values");
4205 
4206     static const int lo[Op_RegL + 1] = {
4207       0,
4208       0,
4209       RAX_num,  // Op_RegN
4210       RAX_num,  // Op_RegI
4211       RAX_num,  // Op_RegP
4212       XMM0_num, // Op_RegF
4213       XMM0_num, // Op_RegD
4214       RAX_num   // Op_RegL
4215     };
4216     static const int hi[Op_RegL + 1] = {
4217       0,
4218       0,
4219       OptoReg::Bad, // Op_RegN
4220       OptoReg::Bad, // Op_RegI
4221       RAX_H_num,    // Op_RegP
4222       OptoReg::Bad, // Op_RegF
4223       XMM0_H_num,   // Op_RegD
4224       RAX_H_num     // Op_RegL
4225     };
4226     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4227     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4228   %}
4229 %}
4230 
4231 //----------ATTRIBUTES---------------------------------------------------------
4232 //----------Operand Attributes-------------------------------------------------
4233 op_attrib op_cost(0);        // Required cost attribute
4234 
4235 //----------Instruction Attributes---------------------------------------------
4236 ins_attrib ins_cost(100);       // Required cost attribute
4237 ins_attrib ins_size(8);         // Required size attribute (in bits)
4238 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4239 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4240                                 // a non-matching short branch variant
4241                                 // of some long branch?
4242 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4243                                 // be a power of 2) specifies the
4244                                 // alignment that some part of the
4245                                 // instruction (not necessarily the
4246                                 // start) requires.  If > 1, a
4247                                 // compute_padding() function must be
4248                                 // provided for the instruction
4249 
4250 //----------OPERANDS-----------------------------------------------------------
4251 // Operand definitions must precede instruction definitions for correct parsing
4252 // in the ADLC because operands constitute user defined types which are used in
4253 // instruction definitions.
4254 
4255 //----------Simple Operands----------------------------------------------------
4256 // Immediate Operands
4257 // Integer Immediate
4258 operand immI()
4259 %{
4260   match(ConI);
4261 
4262   op_cost(10);
4263   format %{ %}
4264   interface(CONST_INTER);
4265 %}
4266 
4267 // Constant for test vs zero
4268 operand immI0()
4269 %{
4270   predicate(n->get_int() == 0);
4271   match(ConI);
4272 
4273   op_cost(0);
4274   format %{ %}
4275   interface(CONST_INTER);
4276 %}
4277 
4278 // Constant for increment
4279 operand immI1()
4280 %{
4281   predicate(n->get_int() == 1);
4282   match(ConI);
4283 
4284   op_cost(0);
4285   format %{ %}
4286   interface(CONST_INTER);
4287 %}
4288 
4289 // Constant for decrement
4290 operand immI_M1()
4291 %{
4292   predicate(n->get_int() == -1);
4293   match(ConI);
4294 
4295   op_cost(0);
4296   format %{ %}
4297   interface(CONST_INTER);
4298 %}
4299 
4300 // Valid scale values for addressing modes
4301 operand immI2()
4302 %{
4303   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4304   match(ConI);
4305 
4306   format %{ %}
4307   interface(CONST_INTER);
4308 %}
4309 
4310 operand immI8()
4311 %{
4312   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4313   match(ConI);
4314 
4315   op_cost(5);
4316   format %{ %}
4317   interface(CONST_INTER);
4318 %}
4319 
4320 operand immI16()
4321 %{
4322   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4323   match(ConI);
4324 
4325   op_cost(10);
4326   format %{ %}
4327   interface(CONST_INTER);
4328 %}
4329 
4330 // Constant for long shifts
4331 operand immI_32()
4332 %{
4333   predicate( n->get_int() == 32 );
4334   match(ConI);
4335 
4336   op_cost(0);
4337   format %{ %}
4338   interface(CONST_INTER);
4339 %}
4340 
4341 // Constant for long shifts
4342 operand immI_64()
4343 %{
4344   predicate( n->get_int() == 64 );
4345   match(ConI);
4346 
4347   op_cost(0);
4348   format %{ %}
4349   interface(CONST_INTER);
4350 %}
4351 
4352 // Pointer Immediate
4353 operand immP()
4354 %{
4355   match(ConP);
4356 
4357   op_cost(10);
4358   format %{ %}
4359   interface(CONST_INTER);
4360 %}
4361 
4362 // NULL Pointer Immediate
4363 operand immP0()
4364 %{
4365   predicate(n->get_ptr() == 0);
4366   match(ConP);
4367 
4368   op_cost(5);
4369   format %{ %}
4370   interface(CONST_INTER);
4371 %}
4372 
4373 // Pointer Immediate
4374 operand immN() %{
4375   match(ConN);
4376 
4377   op_cost(10);
4378   format %{ %}
4379   interface(CONST_INTER);
4380 %}
4381 
4382 // NULL Pointer Immediate
4383 operand immN0() %{
4384   predicate(n->get_narrowcon() == 0);
4385   match(ConN);
4386 
4387   op_cost(5);
4388   format %{ %}
4389   interface(CONST_INTER);
4390 %}
4391 
4392 operand immP31()
4393 %{
4394   predicate(!n->as_Type()->type()->isa_oopptr()
4395             && (n->get_ptr() >> 31) == 0);
4396   match(ConP);
4397 
4398   op_cost(5);
4399   format %{ %}
4400   interface(CONST_INTER);
4401 %}
4402 
4403 
4404 // Long Immediate
4405 operand immL()
4406 %{
4407   match(ConL);
4408 
4409   op_cost(20);
4410   format %{ %}
4411   interface(CONST_INTER);
4412 %}
4413 
4414 // Long Immediate 8-bit
4415 operand immL8()
4416 %{
4417   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4418   match(ConL);
4419 
4420   op_cost(5);
4421   format %{ %}
4422   interface(CONST_INTER);
4423 %}
4424 
4425 // Long Immediate 32-bit unsigned
4426 operand immUL32()
4427 %{
4428   predicate(n->get_long() == (unsigned int) (n->get_long()));
4429   match(ConL);
4430 
4431   op_cost(10);
4432   format %{ %}
4433   interface(CONST_INTER);
4434 %}
4435 
4436 // Long Immediate 32-bit signed
4437 operand immL32()
4438 %{
4439   predicate(n->get_long() == (int) (n->get_long()));
4440   match(ConL);
4441 
4442   op_cost(15);
4443   format %{ %}
4444   interface(CONST_INTER);
4445 %}
4446 
4447 // Long Immediate zero
4448 operand immL0()
4449 %{
4450   predicate(n->get_long() == 0L);
4451   match(ConL);
4452 
4453   op_cost(10);
4454   format %{ %}
4455   interface(CONST_INTER);
4456 %}
4457 
4458 // Constant for increment
4459 operand immL1()
4460 %{
4461   predicate(n->get_long() == 1);
4462   match(ConL);
4463 
4464   format %{ %}
4465   interface(CONST_INTER);
4466 %}
4467 
4468 // Constant for decrement
4469 operand immL_M1()
4470 %{
4471   predicate(n->get_long() == -1);
4472   match(ConL);
4473 
4474   format %{ %}
4475   interface(CONST_INTER);
4476 %}
4477 
4478 // Long Immediate: the value 10
4479 operand immL10()
4480 %{
4481   predicate(n->get_long() == 10);
4482   match(ConL);
4483 
4484   format %{ %}
4485   interface(CONST_INTER);
4486 %}
4487 
4488 // Long immediate from 0 to 127.
4489 // Used for a shorter form of long mul by 10.
4490 operand immL_127()
4491 %{
4492   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4493   match(ConL);
4494 
4495   op_cost(10);
4496   format %{ %}
4497   interface(CONST_INTER);
4498 %}
4499 
4500 // Long Immediate: low 32-bit mask
4501 operand immL_32bits()
4502 %{
4503   predicate(n->get_long() == 0xFFFFFFFFL);
4504   match(ConL);
4505   op_cost(20);
4506 
4507   format %{ %}
4508   interface(CONST_INTER);
4509 %}
4510 
4511 // Float Immediate zero
4512 operand immF0()
4513 %{
4514   predicate(jint_cast(n->getf()) == 0);
4515   match(ConF);
4516 
4517   op_cost(5);
4518   format %{ %}
4519   interface(CONST_INTER);
4520 %}
4521 
4522 // Float Immediate
4523 operand immF()
4524 %{
4525   match(ConF);
4526 
4527   op_cost(15);
4528   format %{ %}
4529   interface(CONST_INTER);
4530 %}
4531 
4532 // Double Immediate zero
4533 operand immD0()
4534 %{
4535   predicate(jlong_cast(n->getd()) == 0);
4536   match(ConD);
4537 
4538   op_cost(5);
4539   format %{ %}
4540   interface(CONST_INTER);
4541 %}
4542 
4543 // Double Immediate
4544 operand immD()
4545 %{
4546   match(ConD);
4547 
4548   op_cost(15);
4549   format %{ %}
4550   interface(CONST_INTER);
4551 %}
4552 
4553 // Immediates for special shifts (sign extend)
4554 
4555 // Constants for increment
4556 operand immI_16()
4557 %{
4558   predicate(n->get_int() == 16);
4559   match(ConI);
4560 
4561   format %{ %}
4562   interface(CONST_INTER);
4563 %}
4564 
4565 operand immI_24()
4566 %{
4567   predicate(n->get_int() == 24);
4568   match(ConI);
4569 
4570   format %{ %}
4571   interface(CONST_INTER);
4572 %}
4573 
4574 // Constant for byte-wide masking
4575 operand immI_255()
4576 %{
4577   predicate(n->get_int() == 255);
4578   match(ConI);
4579 
4580   format %{ %}
4581   interface(CONST_INTER);
4582 %}
4583 
4584 // Constant for short-wide masking
4585 operand immI_65535()
4586 %{
4587   predicate(n->get_int() == 65535);
4588   match(ConI);
4589 
4590   format %{ %}
4591   interface(CONST_INTER);
4592 %}
4593 
4594 // Constant for byte-wide masking
4595 operand immL_255()
4596 %{
4597   predicate(n->get_long() == 255);
4598   match(ConL);
4599 
4600   format %{ %}
4601   interface(CONST_INTER);
4602 %}
4603 
4604 // Constant for short-wide masking
4605 operand immL_65535()
4606 %{
4607   predicate(n->get_long() == 65535);
4608   match(ConL);
4609 
4610   format %{ %}
4611   interface(CONST_INTER);
4612 %}
4613 
4614 // Register Operands
4615 // Integer Register
4616 operand rRegI()
4617 %{
4618   constraint(ALLOC_IN_RC(int_reg));
4619   match(RegI);
4620 
4621   match(rax_RegI);
4622   match(rbx_RegI);
4623   match(rcx_RegI);
4624   match(rdx_RegI);
4625   match(rdi_RegI);
4626 
4627   format %{ %}
4628   interface(REG_INTER);
4629 %}
4630 
4631 // Special Registers
4632 operand rax_RegI()
4633 %{
4634   constraint(ALLOC_IN_RC(int_rax_reg));
4635   match(RegI);
4636   match(rRegI);
4637 
4638   format %{ "RAX" %}
4639   interface(REG_INTER);
4640 %}
4641 
4642 // Special Registers
4643 operand rbx_RegI()
4644 %{
4645   constraint(ALLOC_IN_RC(int_rbx_reg));
4646   match(RegI);
4647   match(rRegI);
4648 
4649   format %{ "RBX" %}
4650   interface(REG_INTER);
4651 %}
4652 
4653 operand rcx_RegI()
4654 %{
4655   constraint(ALLOC_IN_RC(int_rcx_reg));
4656   match(RegI);
4657   match(rRegI);
4658 
4659   format %{ "RCX" %}
4660   interface(REG_INTER);
4661 %}
4662 
4663 operand rdx_RegI()
4664 %{
4665   constraint(ALLOC_IN_RC(int_rdx_reg));
4666   match(RegI);
4667   match(rRegI);
4668 
4669   format %{ "RDX" %}
4670   interface(REG_INTER);
4671 %}
4672 
4673 operand rdi_RegI()
4674 %{
4675   constraint(ALLOC_IN_RC(int_rdi_reg));
4676   match(RegI);
4677   match(rRegI);
4678 
4679   format %{ "RDI" %}
4680   interface(REG_INTER);
4681 %}
4682 
4683 operand no_rcx_RegI()
4684 %{
4685   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4686   match(RegI);
4687   match(rax_RegI);
4688   match(rbx_RegI);
4689   match(rdx_RegI);
4690   match(rdi_RegI);
4691 
4692   format %{ %}
4693   interface(REG_INTER);
4694 %}
4695 
4696 operand no_rax_rdx_RegI()
4697 %{
4698   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4699   match(RegI);
4700   match(rbx_RegI);
4701   match(rcx_RegI);
4702   match(rdi_RegI);
4703 
4704   format %{ %}
4705   interface(REG_INTER);
4706 %}
4707 
4708 // Pointer Register
4709 operand any_RegP()
4710 %{
4711   constraint(ALLOC_IN_RC(any_reg));
4712   match(RegP);
4713   match(rax_RegP);
4714   match(rbx_RegP);
4715   match(rdi_RegP);
4716   match(rsi_RegP);
4717   match(rbp_RegP);
4718   match(r15_RegP);
4719   match(rRegP);
4720 
4721   format %{ %}
4722   interface(REG_INTER);
4723 %}
4724 
4725 operand rRegP()
4726 %{
4727   constraint(ALLOC_IN_RC(ptr_reg));
4728   match(RegP);
4729   match(rax_RegP);
4730   match(rbx_RegP);
4731   match(rdi_RegP);
4732   match(rsi_RegP);
4733   match(rbp_RegP);
4734   match(r15_RegP);  // See Q&A below about r15_RegP.
4735 
4736   format %{ %}
4737   interface(REG_INTER);
4738 %}
4739 
4740 operand rRegN() %{
4741   constraint(ALLOC_IN_RC(int_reg));
4742   match(RegN);
4743 
4744   format %{ %}
4745   interface(REG_INTER);
4746 %}
4747 
4748 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4749 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4750 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4751 // The output of an instruction is controlled by the allocator, which respects
4752 // register class masks, not match rules.  Unless an instruction mentions
4753 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4754 // by the allocator as an input.
4755 
4756 operand no_rax_RegP()
4757 %{
4758   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4759   match(RegP);
4760   match(rbx_RegP);
4761   match(rsi_RegP);
4762   match(rdi_RegP);
4763 
4764   format %{ %}
4765   interface(REG_INTER);
4766 %}
4767 
4768 operand no_rbp_RegP()
4769 %{
4770   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4771   match(RegP);
4772   match(rbx_RegP);
4773   match(rsi_RegP);
4774   match(rdi_RegP);
4775 
4776   format %{ %}
4777   interface(REG_INTER);
4778 %}
4779 
4780 operand no_rax_rbx_RegP()
4781 %{
4782   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4783   match(RegP);
4784   match(rsi_RegP);
4785   match(rdi_RegP);
4786 
4787   format %{ %}
4788   interface(REG_INTER);
4789 %}
4790 
4791 // Special Registers
4792 // Return a pointer value
4793 operand rax_RegP()
4794 %{
4795   constraint(ALLOC_IN_RC(ptr_rax_reg));
4796   match(RegP);
4797   match(rRegP);
4798 
4799   format %{ %}
4800   interface(REG_INTER);
4801 %}
4802 
4803 // Special Registers
4804 // Return a compressed pointer value
4805 operand rax_RegN()
4806 %{
4807   constraint(ALLOC_IN_RC(int_rax_reg));
4808   match(RegN);
4809   match(rRegN);
4810 
4811   format %{ %}
4812   interface(REG_INTER);
4813 %}
4814 
4815 // Used in AtomicAdd
4816 operand rbx_RegP()
4817 %{
4818   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4819   match(RegP);
4820   match(rRegP);
4821 
4822   format %{ %}
4823   interface(REG_INTER);
4824 %}
4825 
4826 operand rsi_RegP()
4827 %{
4828   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4829   match(RegP);
4830   match(rRegP);
4831 
4832   format %{ %}
4833   interface(REG_INTER);
4834 %}
4835 
4836 // Used in rep stosq
4837 operand rdi_RegP()
4838 %{
4839   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4840   match(RegP);
4841   match(rRegP);
4842 
4843   format %{ %}
4844   interface(REG_INTER);
4845 %}
4846 
4847 operand rbp_RegP()
4848 %{
4849   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4850   match(RegP);
4851   match(rRegP);
4852 
4853   format %{ %}
4854   interface(REG_INTER);
4855 %}
4856 
4857 operand r15_RegP()
4858 %{
4859   constraint(ALLOC_IN_RC(ptr_r15_reg));
4860   match(RegP);
4861   match(rRegP);
4862 
4863   format %{ %}
4864   interface(REG_INTER);
4865 %}
4866 
4867 operand rRegL()
4868 %{
4869   constraint(ALLOC_IN_RC(long_reg));
4870   match(RegL);
4871   match(rax_RegL);
4872   match(rdx_RegL);
4873 
4874   format %{ %}
4875   interface(REG_INTER);
4876 %}
4877 
4878 // Special Registers
4879 operand no_rax_rdx_RegL()
4880 %{
4881   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4882   match(RegL);
4883   match(rRegL);
4884 
4885   format %{ %}
4886   interface(REG_INTER);
4887 %}
4888 
4889 operand no_rax_RegL()
4890 %{
4891   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4892   match(RegL);
4893   match(rRegL);
4894   match(rdx_RegL);
4895 
4896   format %{ %}
4897   interface(REG_INTER);
4898 %}
4899 
4900 operand no_rcx_RegL()
4901 %{
4902   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4903   match(RegL);
4904   match(rRegL);
4905 
4906   format %{ %}
4907   interface(REG_INTER);
4908 %}
4909 
4910 operand rax_RegL()
4911 %{
4912   constraint(ALLOC_IN_RC(long_rax_reg));
4913   match(RegL);
4914   match(rRegL);
4915 
4916   format %{ "RAX" %}
4917   interface(REG_INTER);
4918 %}
4919 
4920 operand rcx_RegL()
4921 %{
4922   constraint(ALLOC_IN_RC(long_rcx_reg));
4923   match(RegL);
4924   match(rRegL);
4925 
4926   format %{ %}
4927   interface(REG_INTER);
4928 %}
4929 
4930 operand rdx_RegL()
4931 %{
4932   constraint(ALLOC_IN_RC(long_rdx_reg));
4933   match(RegL);
4934   match(rRegL);
4935 
4936   format %{ %}
4937   interface(REG_INTER);
4938 %}
4939 
4940 // Flags register, used as output of compare instructions
4941 operand rFlagsReg()
4942 %{
4943   constraint(ALLOC_IN_RC(int_flags));
4944   match(RegFlags);
4945 
4946   format %{ "RFLAGS" %}
4947   interface(REG_INTER);
4948 %}
4949 
4950 // Flags register, used as output of FLOATING POINT compare instructions
4951 operand rFlagsRegU()
4952 %{
4953   constraint(ALLOC_IN_RC(int_flags));
4954   match(RegFlags);
4955 
4956   format %{ "RFLAGS_U" %}
4957   interface(REG_INTER);
4958 %}
4959 
4960 operand rFlagsRegUCF() %{
4961   constraint(ALLOC_IN_RC(int_flags));
4962   match(RegFlags);
4963   predicate(false);
4964 
4965   format %{ "RFLAGS_U_CF" %}
4966   interface(REG_INTER);
4967 %}
4968 
4969 // Float register operands
4970 operand regF()
4971 %{
4972   constraint(ALLOC_IN_RC(float_reg));
4973   match(RegF);
4974 
4975   format %{ %}
4976   interface(REG_INTER);
4977 %}
4978 
4979 // Double register operands
4980 operand regD() 
4981 %{
4982   constraint(ALLOC_IN_RC(double_reg));
4983   match(RegD);
4984 
4985   format %{ %}
4986   interface(REG_INTER);
4987 %}
4988 
4989 
4990 //----------Memory Operands----------------------------------------------------
4991 // Direct Memory Operand
4992 // operand direct(immP addr)
4993 // %{
4994 //   match(addr);
4995 
4996 //   format %{ "[$addr]" %}
4997 //   interface(MEMORY_INTER) %{
4998 //     base(0xFFFFFFFF);
4999 //     index(0x4);
5000 //     scale(0x0);
5001 //     disp($addr);
5002 //   %}
5003 // %}
5004 
5005 // Indirect Memory Operand
5006 operand indirect(any_RegP reg)
5007 %{
5008   constraint(ALLOC_IN_RC(ptr_reg));
5009   match(reg);
5010 
5011   format %{ "[$reg]" %}
5012   interface(MEMORY_INTER) %{
5013     base($reg);
5014     index(0x4);
5015     scale(0x0);
5016     disp(0x0);
5017   %}
5018 %}
5019 
5020 // Indirect Memory Plus Short Offset Operand
5021 operand indOffset8(any_RegP reg, immL8 off)
5022 %{
5023   constraint(ALLOC_IN_RC(ptr_reg));
5024   match(AddP reg off);
5025 
5026   format %{ "[$reg + $off (8-bit)]" %}
5027   interface(MEMORY_INTER) %{
5028     base($reg);
5029     index(0x4);
5030     scale(0x0);
5031     disp($off);
5032   %}
5033 %}
5034 
5035 // Indirect Memory Plus Long Offset Operand
5036 operand indOffset32(any_RegP reg, immL32 off)
5037 %{
5038   constraint(ALLOC_IN_RC(ptr_reg));
5039   match(AddP reg off);
5040 
5041   format %{ "[$reg + $off (32-bit)]" %}
5042   interface(MEMORY_INTER) %{
5043     base($reg);
5044     index(0x4);
5045     scale(0x0);
5046     disp($off);
5047   %}
5048 %}
5049 
5050 // Indirect Memory Plus Index Register Plus Offset Operand
5051 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5052 %{
5053   constraint(ALLOC_IN_RC(ptr_reg));
5054   match(AddP (AddP reg lreg) off);
5055 
5056   op_cost(10);
5057   format %{"[$reg + $off + $lreg]" %}
5058   interface(MEMORY_INTER) %{
5059     base($reg);
5060     index($lreg);
5061     scale(0x0);
5062     disp($off);
5063   %}
5064 %}
5065 
5066 // Indirect Memory Plus Index Register Plus Offset Operand
5067 operand indIndex(any_RegP reg, rRegL lreg)
5068 %{
5069   constraint(ALLOC_IN_RC(ptr_reg));
5070   match(AddP reg lreg);
5071 
5072   op_cost(10);
5073   format %{"[$reg + $lreg]" %}
5074   interface(MEMORY_INTER) %{
5075     base($reg);
5076     index($lreg);
5077     scale(0x0);
5078     disp(0x0);
5079   %}
5080 %}
5081 
5082 // Indirect Memory Times Scale Plus Index Register
5083 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5084 %{
5085   constraint(ALLOC_IN_RC(ptr_reg));
5086   match(AddP reg (LShiftL lreg scale));
5087 
5088   op_cost(10);
5089   format %{"[$reg + $lreg << $scale]" %}
5090   interface(MEMORY_INTER) %{
5091     base($reg);
5092     index($lreg);
5093     scale($scale);
5094     disp(0x0);
5095   %}
5096 %}
5097 
5098 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5099 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5100 %{
5101   constraint(ALLOC_IN_RC(ptr_reg));
5102   match(AddP (AddP reg (LShiftL lreg scale)) off);
5103 
5104   op_cost(10);
5105   format %{"[$reg + $off + $lreg << $scale]" %}
5106   interface(MEMORY_INTER) %{
5107     base($reg);
5108     index($lreg);
5109     scale($scale);
5110     disp($off);
5111   %}
5112 %}
5113 
5114 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5115 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5116 %{
5117   constraint(ALLOC_IN_RC(ptr_reg));
5118   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5119   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5120 
5121   op_cost(10);
5122   format %{"[$reg + $off + $idx << $scale]" %}
5123   interface(MEMORY_INTER) %{
5124     base($reg);
5125     index($idx);
5126     scale($scale);
5127     disp($off);
5128   %}
5129 %}
5130 
5131 // Indirect Narrow Oop Plus Offset Operand
5132 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5133 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5134 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5135   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5136   constraint(ALLOC_IN_RC(ptr_reg));
5137   match(AddP (DecodeN reg) off);
5138 
5139   op_cost(10);
5140   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5141   interface(MEMORY_INTER) %{
5142     base(0xc); // R12
5143     index($reg);
5144     scale(0x3);
5145     disp($off);
5146   %}
5147 %}
5148 
5149 // Indirect Memory Operand
5150 operand indirectNarrow(rRegN reg)
5151 %{
5152   predicate(Universe::narrow_oop_shift() == 0);
5153   constraint(ALLOC_IN_RC(ptr_reg));
5154   match(DecodeN reg);
5155 
5156   format %{ "[$reg]" %}
5157   interface(MEMORY_INTER) %{
5158     base($reg);
5159     index(0x4);
5160     scale(0x0);
5161     disp(0x0);
5162   %}
5163 %}
5164 
5165 // Indirect Memory Plus Short Offset Operand
5166 operand indOffset8Narrow(rRegN reg, immL8 off)
5167 %{
5168   predicate(Universe::narrow_oop_shift() == 0);
5169   constraint(ALLOC_IN_RC(ptr_reg));
5170   match(AddP (DecodeN reg) off);
5171 
5172   format %{ "[$reg + $off (8-bit)]" %}
5173   interface(MEMORY_INTER) %{
5174     base($reg);
5175     index(0x4);
5176     scale(0x0);
5177     disp($off);
5178   %}
5179 %}
5180 
5181 // Indirect Memory Plus Long Offset Operand
5182 operand indOffset32Narrow(rRegN reg, immL32 off)
5183 %{
5184   predicate(Universe::narrow_oop_shift() == 0);
5185   constraint(ALLOC_IN_RC(ptr_reg));
5186   match(AddP (DecodeN reg) off);
5187 
5188   format %{ "[$reg + $off (32-bit)]" %}
5189   interface(MEMORY_INTER) %{
5190     base($reg);
5191     index(0x4);
5192     scale(0x0);
5193     disp($off);
5194   %}
5195 %}
5196 
5197 // Indirect Memory Plus Index Register Plus Offset Operand
5198 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5199 %{
5200   predicate(Universe::narrow_oop_shift() == 0);
5201   constraint(ALLOC_IN_RC(ptr_reg));
5202   match(AddP (AddP (DecodeN reg) lreg) off);
5203 
5204   op_cost(10);
5205   format %{"[$reg + $off + $lreg]" %}
5206   interface(MEMORY_INTER) %{
5207     base($reg);
5208     index($lreg);
5209     scale(0x0);
5210     disp($off);
5211   %}
5212 %}
5213 
5214 // Indirect Memory Plus Index Register Plus Offset Operand
5215 operand indIndexNarrow(rRegN reg, rRegL lreg)
5216 %{
5217   predicate(Universe::narrow_oop_shift() == 0);
5218   constraint(ALLOC_IN_RC(ptr_reg));
5219   match(AddP (DecodeN reg) lreg);
5220 
5221   op_cost(10);
5222   format %{"[$reg + $lreg]" %}
5223   interface(MEMORY_INTER) %{
5224     base($reg);
5225     index($lreg);
5226     scale(0x0);
5227     disp(0x0);
5228   %}
5229 %}
5230 
5231 // Indirect Memory Times Scale Plus Index Register
5232 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5233 %{
5234   predicate(Universe::narrow_oop_shift() == 0);
5235   constraint(ALLOC_IN_RC(ptr_reg));
5236   match(AddP (DecodeN reg) (LShiftL lreg scale));
5237 
5238   op_cost(10);
5239   format %{"[$reg + $lreg << $scale]" %}
5240   interface(MEMORY_INTER) %{
5241     base($reg);
5242     index($lreg);
5243     scale($scale);
5244     disp(0x0);
5245   %}
5246 %}
5247 
5248 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5249 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5250 %{
5251   predicate(Universe::narrow_oop_shift() == 0);
5252   constraint(ALLOC_IN_RC(ptr_reg));
5253   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5254 
5255   op_cost(10);
5256   format %{"[$reg + $off + $lreg << $scale]" %}
5257   interface(MEMORY_INTER) %{
5258     base($reg);
5259     index($lreg);
5260     scale($scale);
5261     disp($off);
5262   %}
5263 %}
5264 
5265 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5266 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5267 %{
5268   constraint(ALLOC_IN_RC(ptr_reg));
5269   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5270   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5271 
5272   op_cost(10);
5273   format %{"[$reg + $off + $idx << $scale]" %}
5274   interface(MEMORY_INTER) %{
5275     base($reg);
5276     index($idx);
5277     scale($scale);
5278     disp($off);
5279   %}
5280 %}
5281 
5282 
5283 //----------Special Memory Operands--------------------------------------------
5284 // Stack Slot Operand - This operand is used for loading and storing temporary
5285 //                      values on the stack where a match requires a value to
5286 //                      flow through memory.
5287 operand stackSlotP(sRegP reg)
5288 %{
5289   constraint(ALLOC_IN_RC(stack_slots));
5290   // No match rule because this operand is only generated in matching
5291 
5292   format %{ "[$reg]" %}
5293   interface(MEMORY_INTER) %{
5294     base(0x4);   // RSP
5295     index(0x4);  // No Index
5296     scale(0x0);  // No Scale
5297     disp($reg);  // Stack Offset
5298   %}
5299 %}
5300 
5301 operand stackSlotI(sRegI reg)
5302 %{
5303   constraint(ALLOC_IN_RC(stack_slots));
5304   // No match rule because this operand is only generated in matching
5305 
5306   format %{ "[$reg]" %}
5307   interface(MEMORY_INTER) %{
5308     base(0x4);   // RSP
5309     index(0x4);  // No Index
5310     scale(0x0);  // No Scale
5311     disp($reg);  // Stack Offset
5312   %}
5313 %}
5314 
5315 operand stackSlotF(sRegF reg)
5316 %{
5317   constraint(ALLOC_IN_RC(stack_slots));
5318   // No match rule because this operand is only generated in matching
5319 
5320   format %{ "[$reg]" %}
5321   interface(MEMORY_INTER) %{
5322     base(0x4);   // RSP
5323     index(0x4);  // No Index
5324     scale(0x0);  // No Scale
5325     disp($reg);  // Stack Offset
5326   %}
5327 %}
5328 
5329 operand stackSlotD(sRegD reg)
5330 %{
5331   constraint(ALLOC_IN_RC(stack_slots));
5332   // No match rule because this operand is only generated in matching
5333 
5334   format %{ "[$reg]" %}
5335   interface(MEMORY_INTER) %{
5336     base(0x4);   // RSP
5337     index(0x4);  // No Index
5338     scale(0x0);  // No Scale
5339     disp($reg);  // Stack Offset
5340   %}
5341 %}
5342 operand stackSlotL(sRegL reg)
5343 %{
5344   constraint(ALLOC_IN_RC(stack_slots));
5345   // No match rule because this operand is only generated in matching
5346 
5347   format %{ "[$reg]" %}
5348   interface(MEMORY_INTER) %{
5349     base(0x4);   // RSP
5350     index(0x4);  // No Index
5351     scale(0x0);  // No Scale
5352     disp($reg);  // Stack Offset
5353   %}
5354 %}
5355 
5356 //----------Conditional Branch Operands----------------------------------------
5357 // Comparison Op  - This is the operation of the comparison, and is limited to
5358 //                  the following set of codes:
5359 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5360 //
5361 // Other attributes of the comparison, such as unsignedness, are specified
5362 // by the comparison instruction that sets a condition code flags register.
5363 // That result is represented by a flags operand whose subtype is appropriate
5364 // to the unsignedness (etc.) of the comparison.
5365 //
5366 // Later, the instruction which matches both the Comparison Op (a Bool) and
5367 // the flags (produced by the Cmp) specifies the coding of the comparison op
5368 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5369 
5370 // Comparision Code
5371 operand cmpOp()
5372 %{
5373   match(Bool);
5374 
5375   format %{ "" %}
5376   interface(COND_INTER) %{
5377     equal(0x4, "e");
5378     not_equal(0x5, "ne");
5379     less(0xC, "l");
5380     greater_equal(0xD, "ge");
5381     less_equal(0xE, "le");
5382     greater(0xF, "g");
5383   %}
5384 %}
5385 
5386 // Comparison Code, unsigned compare.  Used by FP also, with
5387 // C2 (unordered) turned into GT or LT already.  The other bits
5388 // C0 and C3 are turned into Carry & Zero flags.
5389 operand cmpOpU()
5390 %{
5391   match(Bool);
5392 
5393   format %{ "" %}
5394   interface(COND_INTER) %{
5395     equal(0x4, "e");
5396     not_equal(0x5, "ne");
5397     less(0x2, "b");
5398     greater_equal(0x3, "nb");
5399     less_equal(0x6, "be");
5400     greater(0x7, "nbe");
5401   %}
5402 %}
5403 
5404 
5405 // Floating comparisons that don't require any fixup for the unordered case
5406 operand cmpOpUCF() %{
5407   match(Bool);
5408   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5409             n->as_Bool()->_test._test == BoolTest::ge ||
5410             n->as_Bool()->_test._test == BoolTest::le ||
5411             n->as_Bool()->_test._test == BoolTest::gt);
5412   format %{ "" %}
5413   interface(COND_INTER) %{
5414     equal(0x4, "e");
5415     not_equal(0x5, "ne");
5416     less(0x2, "b");
5417     greater_equal(0x3, "nb");
5418     less_equal(0x6, "be");
5419     greater(0x7, "nbe");
5420   %}
5421 %}
5422 
5423 
5424 // Floating comparisons that can be fixed up with extra conditional jumps
5425 operand cmpOpUCF2() %{
5426   match(Bool);
5427   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5428             n->as_Bool()->_test._test == BoolTest::eq);
5429   format %{ "" %}
5430   interface(COND_INTER) %{
5431     equal(0x4, "e");
5432     not_equal(0x5, "ne");
5433     less(0x2, "b");
5434     greater_equal(0x3, "nb");
5435     less_equal(0x6, "be");
5436     greater(0x7, "nbe");
5437   %}
5438 %}
5439 
5440 
5441 //----------OPERAND CLASSES----------------------------------------------------
5442 // Operand Classes are groups of operands that are used as to simplify
5443 // instruction definitions by not requiring the AD writer to specify separate
5444 // instructions for every form of operand when the instruction accepts
5445 // multiple operand types with the same basic encoding and format.  The classic
5446 // case of this is memory operands.
5447 
5448 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5449                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5450                indCompressedOopOffset,
5451                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5452                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5453                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5454 
5455 //----------PIPELINE-----------------------------------------------------------
5456 // Rules which define the behavior of the target architectures pipeline.
5457 pipeline %{
5458 
5459 //----------ATTRIBUTES---------------------------------------------------------
5460 attributes %{
5461   variable_size_instructions;        // Fixed size instructions
5462   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5463   instruction_unit_size = 1;         // An instruction is 1 bytes long
5464   instruction_fetch_unit_size = 16;  // The processor fetches one line
5465   instruction_fetch_units = 1;       // of 16 bytes
5466 
5467   // List of nop instructions
5468   nops( MachNop );
5469 %}
5470 
5471 //----------RESOURCES----------------------------------------------------------
5472 // Resources are the functional units available to the machine
5473 
5474 // Generic P2/P3 pipeline
5475 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5476 // 3 instructions decoded per cycle.
5477 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5478 // 3 ALU op, only ALU0 handles mul instructions.
5479 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5480            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5481            BR, FPU,
5482            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5483 
5484 //----------PIPELINE DESCRIPTION-----------------------------------------------
5485 // Pipeline Description specifies the stages in the machine's pipeline
5486 
5487 // Generic P2/P3 pipeline
5488 pipe_desc(S0, S1, S2, S3, S4, S5);
5489 
5490 //----------PIPELINE CLASSES---------------------------------------------------
5491 // Pipeline Classes describe the stages in which input and output are
5492 // referenced by the hardware pipeline.
5493 
5494 // Naming convention: ialu or fpu
5495 // Then: _reg
5496 // Then: _reg if there is a 2nd register
5497 // Then: _long if it's a pair of instructions implementing a long
5498 // Then: _fat if it requires the big decoder
5499 //   Or: _mem if it requires the big decoder and a memory unit.
5500 
5501 // Integer ALU reg operation
5502 pipe_class ialu_reg(rRegI dst)
5503 %{
5504     single_instruction;
5505     dst    : S4(write);
5506     dst    : S3(read);
5507     DECODE : S0;        // any decoder
5508     ALU    : S3;        // any alu
5509 %}
5510 
5511 // Long ALU reg operation
5512 pipe_class ialu_reg_long(rRegL dst)
5513 %{
5514     instruction_count(2);
5515     dst    : S4(write);
5516     dst    : S3(read);
5517     DECODE : S0(2);     // any 2 decoders
5518     ALU    : S3(2);     // both alus
5519 %}
5520 
5521 // Integer ALU reg operation using big decoder
5522 pipe_class ialu_reg_fat(rRegI dst)
5523 %{
5524     single_instruction;
5525     dst    : S4(write);
5526     dst    : S3(read);
5527     D0     : S0;        // big decoder only
5528     ALU    : S3;        // any alu
5529 %}
5530 
5531 // Long ALU reg operation using big decoder
5532 pipe_class ialu_reg_long_fat(rRegL dst)
5533 %{
5534     instruction_count(2);
5535     dst    : S4(write);
5536     dst    : S3(read);
5537     D0     : S0(2);     // big decoder only; twice
5538     ALU    : S3(2);     // any 2 alus
5539 %}
5540 
5541 // Integer ALU reg-reg operation
5542 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5543 %{
5544     single_instruction;
5545     dst    : S4(write);
5546     src    : S3(read);
5547     DECODE : S0;        // any decoder
5548     ALU    : S3;        // any alu
5549 %}
5550 
5551 // Long ALU reg-reg operation
5552 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5553 %{
5554     instruction_count(2);
5555     dst    : S4(write);
5556     src    : S3(read);
5557     DECODE : S0(2);     // any 2 decoders
5558     ALU    : S3(2);     // both alus
5559 %}
5560 
5561 // Integer ALU reg-reg operation
5562 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5563 %{
5564     single_instruction;
5565     dst    : S4(write);
5566     src    : S3(read);
5567     D0     : S0;        // big decoder only
5568     ALU    : S3;        // any alu
5569 %}
5570 
5571 // Long ALU reg-reg operation
5572 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5573 %{
5574     instruction_count(2);
5575     dst    : S4(write);
5576     src    : S3(read);
5577     D0     : S0(2);     // big decoder only; twice
5578     ALU    : S3(2);     // both alus
5579 %}
5580 
5581 // Integer ALU reg-mem operation
5582 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5583 %{
5584     single_instruction;
5585     dst    : S5(write);
5586     mem    : S3(read);
5587     D0     : S0;        // big decoder only
5588     ALU    : S4;        // any alu
5589     MEM    : S3;        // any mem
5590 %}
5591 
5592 // Integer mem operation (prefetch)
5593 pipe_class ialu_mem(memory mem)
5594 %{
5595     single_instruction;
5596     mem    : S3(read);
5597     D0     : S0;        // big decoder only
5598     MEM    : S3;        // any mem
5599 %}
5600 
5601 // Integer Store to Memory
5602 pipe_class ialu_mem_reg(memory mem, rRegI src)
5603 %{
5604     single_instruction;
5605     mem    : S3(read);
5606     src    : S5(read);
5607     D0     : S0;        // big decoder only
5608     ALU    : S4;        // any alu
5609     MEM    : S3;
5610 %}
5611 
5612 // // Long Store to Memory
5613 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5614 // %{
5615 //     instruction_count(2);
5616 //     mem    : S3(read);
5617 //     src    : S5(read);
5618 //     D0     : S0(2);          // big decoder only; twice
5619 //     ALU    : S4(2);     // any 2 alus
5620 //     MEM    : S3(2);  // Both mems
5621 // %}
5622 
5623 // Integer Store to Memory
5624 pipe_class ialu_mem_imm(memory mem)
5625 %{
5626     single_instruction;
5627     mem    : S3(read);
5628     D0     : S0;        // big decoder only
5629     ALU    : S4;        // any alu
5630     MEM    : S3;
5631 %}
5632 
5633 // Integer ALU0 reg-reg operation
5634 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5635 %{
5636     single_instruction;
5637     dst    : S4(write);
5638     src    : S3(read);
5639     D0     : S0;        // Big decoder only
5640     ALU0   : S3;        // only alu0
5641 %}
5642 
5643 // Integer ALU0 reg-mem operation
5644 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5645 %{
5646     single_instruction;
5647     dst    : S5(write);
5648     mem    : S3(read);
5649     D0     : S0;        // big decoder only
5650     ALU0   : S4;        // ALU0 only
5651     MEM    : S3;        // any mem
5652 %}
5653 
5654 // Integer ALU reg-reg operation
5655 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5656 %{
5657     single_instruction;
5658     cr     : S4(write);
5659     src1   : S3(read);
5660     src2   : S3(read);
5661     DECODE : S0;        // any decoder
5662     ALU    : S3;        // any alu
5663 %}
5664 
5665 // Integer ALU reg-imm operation
5666 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5667 %{
5668     single_instruction;
5669     cr     : S4(write);
5670     src1   : S3(read);
5671     DECODE : S0;        // any decoder
5672     ALU    : S3;        // any alu
5673 %}
5674 
5675 // Integer ALU reg-mem operation
5676 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5677 %{
5678     single_instruction;
5679     cr     : S4(write);
5680     src1   : S3(read);
5681     src2   : S3(read);
5682     D0     : S0;        // big decoder only
5683     ALU    : S4;        // any alu
5684     MEM    : S3;
5685 %}
5686 
5687 // Conditional move reg-reg
5688 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5689 %{
5690     instruction_count(4);
5691     y      : S4(read);
5692     q      : S3(read);
5693     p      : S3(read);
5694     DECODE : S0(4);     // any decoder
5695 %}
5696 
5697 // Conditional move reg-reg
5698 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5699 %{
5700     single_instruction;
5701     dst    : S4(write);
5702     src    : S3(read);
5703     cr     : S3(read);
5704     DECODE : S0;        // any decoder
5705 %}
5706 
5707 // Conditional move reg-mem
5708 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5709 %{
5710     single_instruction;
5711     dst    : S4(write);
5712     src    : S3(read);
5713     cr     : S3(read);
5714     DECODE : S0;        // any decoder
5715     MEM    : S3;
5716 %}
5717 
5718 // Conditional move reg-reg long
5719 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5720 %{
5721     single_instruction;
5722     dst    : S4(write);
5723     src    : S3(read);
5724     cr     : S3(read);
5725     DECODE : S0(2);     // any 2 decoders
5726 %}
5727 
5728 // XXX
5729 // // Conditional move double reg-reg
5730 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5731 // %{
5732 //     single_instruction;
5733 //     dst    : S4(write);
5734 //     src    : S3(read);
5735 //     cr     : S3(read);
5736 //     DECODE : S0;     // any decoder
5737 // %}
5738 
5739 // Float reg-reg operation
5740 pipe_class fpu_reg(regD dst)
5741 %{
5742     instruction_count(2);
5743     dst    : S3(read);
5744     DECODE : S0(2);     // any 2 decoders
5745     FPU    : S3;
5746 %}
5747 
5748 // Float reg-reg operation
5749 pipe_class fpu_reg_reg(regD dst, regD src)
5750 %{
5751     instruction_count(2);
5752     dst    : S4(write);
5753     src    : S3(read);
5754     DECODE : S0(2);     // any 2 decoders
5755     FPU    : S3;
5756 %}
5757 
5758 // Float reg-reg operation
5759 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5760 %{
5761     instruction_count(3);
5762     dst    : S4(write);
5763     src1   : S3(read);
5764     src2   : S3(read);
5765     DECODE : S0(3);     // any 3 decoders
5766     FPU    : S3(2);
5767 %}
5768 
5769 // Float reg-reg operation
5770 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5771 %{
5772     instruction_count(4);
5773     dst    : S4(write);
5774     src1   : S3(read);
5775     src2   : S3(read);
5776     src3   : S3(read);
5777     DECODE : S0(4);     // any 3 decoders
5778     FPU    : S3(2);
5779 %}
5780 
5781 // Float reg-reg operation
5782 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5783 %{
5784     instruction_count(4);
5785     dst    : S4(write);
5786     src1   : S3(read);
5787     src2   : S3(read);
5788     src3   : S3(read);
5789     DECODE : S1(3);     // any 3 decoders
5790     D0     : S0;        // Big decoder only
5791     FPU    : S3(2);
5792     MEM    : S3;
5793 %}
5794 
5795 // Float reg-mem operation
5796 pipe_class fpu_reg_mem(regD dst, memory mem)
5797 %{
5798     instruction_count(2);
5799     dst    : S5(write);
5800     mem    : S3(read);
5801     D0     : S0;        // big decoder only
5802     DECODE : S1;        // any decoder for FPU POP
5803     FPU    : S4;
5804     MEM    : S3;        // any mem
5805 %}
5806 
5807 // Float reg-mem operation
5808 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5809 %{
5810     instruction_count(3);
5811     dst    : S5(write);
5812     src1   : S3(read);
5813     mem    : S3(read);
5814     D0     : S0;        // big decoder only
5815     DECODE : S1(2);     // any decoder for FPU POP
5816     FPU    : S4;
5817     MEM    : S3;        // any mem
5818 %}
5819 
5820 // Float mem-reg operation
5821 pipe_class fpu_mem_reg(memory mem, regD src)
5822 %{
5823     instruction_count(2);
5824     src    : S5(read);
5825     mem    : S3(read);
5826     DECODE : S0;        // any decoder for FPU PUSH
5827     D0     : S1;        // big decoder only
5828     FPU    : S4;
5829     MEM    : S3;        // any mem
5830 %}
5831 
5832 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5833 %{
5834     instruction_count(3);
5835     src1   : S3(read);
5836     src2   : S3(read);
5837     mem    : S3(read);
5838     DECODE : S0(2);     // any decoder for FPU PUSH
5839     D0     : S1;        // big decoder only
5840     FPU    : S4;
5841     MEM    : S3;        // any mem
5842 %}
5843 
5844 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5845 %{
5846     instruction_count(3);
5847     src1   : S3(read);
5848     src2   : S3(read);
5849     mem    : S4(read);
5850     DECODE : S0;        // any decoder for FPU PUSH
5851     D0     : S0(2);     // big decoder only
5852     FPU    : S4;
5853     MEM    : S3(2);     // any mem
5854 %}
5855 
5856 pipe_class fpu_mem_mem(memory dst, memory src1)
5857 %{
5858     instruction_count(2);
5859     src1   : S3(read);
5860     dst    : S4(read);
5861     D0     : S0(2);     // big decoder only
5862     MEM    : S3(2);     // any mem
5863 %}
5864 
5865 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5866 %{
5867     instruction_count(3);
5868     src1   : S3(read);
5869     src2   : S3(read);
5870     dst    : S4(read);
5871     D0     : S0(3);     // big decoder only
5872     FPU    : S4;
5873     MEM    : S3(3);     // any mem
5874 %}
5875 
5876 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5877 %{
5878     instruction_count(3);
5879     src1   : S4(read);
5880     mem    : S4(read);
5881     DECODE : S0;        // any decoder for FPU PUSH
5882     D0     : S0(2);     // big decoder only
5883     FPU    : S4;
5884     MEM    : S3(2);     // any mem
5885 %}
5886 
5887 // Float load constant
5888 pipe_class fpu_reg_con(regD dst)
5889 %{
5890     instruction_count(2);
5891     dst    : S5(write);
5892     D0     : S0;        // big decoder only for the load
5893     DECODE : S1;        // any decoder for FPU POP
5894     FPU    : S4;
5895     MEM    : S3;        // any mem
5896 %}
5897 
5898 // Float load constant
5899 pipe_class fpu_reg_reg_con(regD dst, regD src)
5900 %{
5901     instruction_count(3);
5902     dst    : S5(write);
5903     src    : S3(read);
5904     D0     : S0;        // big decoder only for the load
5905     DECODE : S1(2);     // any decoder for FPU POP
5906     FPU    : S4;
5907     MEM    : S3;        // any mem
5908 %}
5909 
5910 // UnConditional branch
5911 pipe_class pipe_jmp(label labl)
5912 %{
5913     single_instruction;
5914     BR   : S3;
5915 %}
5916 
5917 // Conditional branch
5918 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5919 %{
5920     single_instruction;
5921     cr    : S1(read);
5922     BR    : S3;
5923 %}
5924 
5925 // Allocation idiom
5926 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5927 %{
5928     instruction_count(1); force_serialization;
5929     fixed_latency(6);
5930     heap_ptr : S3(read);
5931     DECODE   : S0(3);
5932     D0       : S2;
5933     MEM      : S3;
5934     ALU      : S3(2);
5935     dst      : S5(write);
5936     BR       : S5;
5937 %}
5938 
5939 // Generic big/slow expanded idiom
5940 pipe_class pipe_slow()
5941 %{
5942     instruction_count(10); multiple_bundles; force_serialization;
5943     fixed_latency(100);
5944     D0  : S0(2);
5945     MEM : S3(2);
5946 %}
5947 
5948 // The real do-nothing guy
5949 pipe_class empty()
5950 %{
5951     instruction_count(0);
5952 %}
5953 
5954 // Define the class for the Nop node
5955 define
5956 %{
5957    MachNop = empty;
5958 %}
5959 
5960 %}
5961 
5962 //----------INSTRUCTIONS-------------------------------------------------------
5963 //
5964 // match      -- States which machine-independent subtree may be replaced
5965 //               by this instruction.
5966 // ins_cost   -- The estimated cost of this instruction is used by instruction
5967 //               selection to identify a minimum cost tree of machine
5968 //               instructions that matches a tree of machine-independent
5969 //               instructions.
5970 // format     -- A string providing the disassembly for this instruction.
5971 //               The value of an instruction's operand may be inserted
5972 //               by referring to it with a '$' prefix.
5973 // opcode     -- Three instruction opcodes may be provided.  These are referred
5974 //               to within an encode class as $primary, $secondary, and $tertiary
5975 //               rrspectively.  The primary opcode is commonly used to
5976 //               indicate the type of machine instruction, while secondary
5977 //               and tertiary are often used for prefix options or addressing
5978 //               modes.
5979 // ins_encode -- A list of encode classes with parameters. The encode class
5980 //               name must have been defined in an 'enc_class' specification
5981 //               in the encode section of the architecture description.
5982 
5983 
5984 //----------Load/Store/Move Instructions---------------------------------------
5985 //----------Load Instructions--------------------------------------------------
5986 
5987 // Load Byte (8 bit signed)
5988 instruct loadB(rRegI dst, memory mem)
5989 %{
5990   match(Set dst (LoadB mem));
5991 
5992   ins_cost(125);
5993   format %{ "movsbl  $dst, $mem\t# byte" %}
5994 
5995   ins_encode %{
5996     __ movsbl($dst$$Register, $mem$$Address);
5997   %}
5998 
5999   ins_pipe(ialu_reg_mem);
6000 %}
6001 
6002 // Load Byte (8 bit signed) into Long Register
6003 instruct loadB2L(rRegL dst, memory mem)
6004 %{
6005   match(Set dst (ConvI2L (LoadB mem)));
6006 
6007   ins_cost(125);
6008   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
6009 
6010   ins_encode %{
6011     __ movsbq($dst$$Register, $mem$$Address);
6012   %}
6013 
6014   ins_pipe(ialu_reg_mem);
6015 %}
6016 
6017 // Load Unsigned Byte (8 bit UNsigned)
6018 instruct loadUB(rRegI dst, memory mem)
6019 %{
6020   match(Set dst (LoadUB mem));
6021 
6022   ins_cost(125);
6023   format %{ "movzbl  $dst, $mem\t# ubyte" %}
6024 
6025   ins_encode %{
6026     __ movzbl($dst$$Register, $mem$$Address);
6027   %}
6028 
6029   ins_pipe(ialu_reg_mem);
6030 %}
6031 
6032 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6033 instruct loadUB2L(rRegL dst, memory mem)
6034 %{
6035   match(Set dst (ConvI2L (LoadUB mem)));
6036 
6037   ins_cost(125);
6038   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
6039 
6040   ins_encode %{
6041     __ movzbq($dst$$Register, $mem$$Address);
6042   %}
6043 
6044   ins_pipe(ialu_reg_mem);
6045 %}
6046 
6047 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
6048 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
6049   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6050   effect(KILL cr);
6051 
6052   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
6053             "andl    $dst, $mask" %}
6054   ins_encode %{
6055     Register Rdst = $dst$$Register;
6056     __ movzbq(Rdst, $mem$$Address);
6057     __ andl(Rdst, $mask$$constant);
6058   %}
6059   ins_pipe(ialu_reg_mem);
6060 %}
6061 
6062 // Load Short (16 bit signed)
6063 instruct loadS(rRegI dst, memory mem)
6064 %{
6065   match(Set dst (LoadS mem));
6066 
6067   ins_cost(125);
6068   format %{ "movswl $dst, $mem\t# short" %}
6069 
6070   ins_encode %{
6071     __ movswl($dst$$Register, $mem$$Address);
6072   %}
6073 
6074   ins_pipe(ialu_reg_mem);
6075 %}
6076 
6077 // Load Short (16 bit signed) to Byte (8 bit signed)
6078 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6079   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6080 
6081   ins_cost(125);
6082   format %{ "movsbl $dst, $mem\t# short -> byte" %}
6083   ins_encode %{
6084     __ movsbl($dst$$Register, $mem$$Address);
6085   %}
6086   ins_pipe(ialu_reg_mem);
6087 %}
6088 
6089 // Load Short (16 bit signed) into Long Register
6090 instruct loadS2L(rRegL dst, memory mem)
6091 %{
6092   match(Set dst (ConvI2L (LoadS mem)));
6093 
6094   ins_cost(125);
6095   format %{ "movswq $dst, $mem\t# short -> long" %}
6096 
6097   ins_encode %{
6098     __ movswq($dst$$Register, $mem$$Address);
6099   %}
6100 
6101   ins_pipe(ialu_reg_mem);
6102 %}
6103 
6104 // Load Unsigned Short/Char (16 bit UNsigned)
6105 instruct loadUS(rRegI dst, memory mem)
6106 %{
6107   match(Set dst (LoadUS mem));
6108 
6109   ins_cost(125);
6110   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6111 
6112   ins_encode %{
6113     __ movzwl($dst$$Register, $mem$$Address);
6114   %}
6115 
6116   ins_pipe(ialu_reg_mem);
6117 %}
6118 
6119 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6120 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6121   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6122 
6123   ins_cost(125);
6124   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6125   ins_encode %{
6126     __ movsbl($dst$$Register, $mem$$Address);
6127   %}
6128   ins_pipe(ialu_reg_mem);
6129 %}
6130 
6131 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6132 instruct loadUS2L(rRegL dst, memory mem)
6133 %{
6134   match(Set dst (ConvI2L (LoadUS mem)));
6135 
6136   ins_cost(125);
6137   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6138 
6139   ins_encode %{
6140     __ movzwq($dst$$Register, $mem$$Address);
6141   %}
6142 
6143   ins_pipe(ialu_reg_mem);
6144 %}
6145 
6146 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6147 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6148   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6149 
6150   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6151   ins_encode %{
6152     __ movzbq($dst$$Register, $mem$$Address);
6153   %}
6154   ins_pipe(ialu_reg_mem);
6155 %}
6156 
6157 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6158 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6159   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6160   effect(KILL cr);
6161 
6162   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6163             "andl    $dst, $mask" %}
6164   ins_encode %{
6165     Register Rdst = $dst$$Register;
6166     __ movzwq(Rdst, $mem$$Address);
6167     __ andl(Rdst, $mask$$constant);
6168   %}
6169   ins_pipe(ialu_reg_mem);
6170 %}
6171 
6172 // Load Integer
6173 instruct loadI(rRegI dst, memory mem)
6174 %{
6175   match(Set dst (LoadI mem));
6176 
6177   ins_cost(125);
6178   format %{ "movl    $dst, $mem\t# int" %}
6179 
6180   ins_encode %{
6181     __ movl($dst$$Register, $mem$$Address);
6182   %}
6183 
6184   ins_pipe(ialu_reg_mem);
6185 %}
6186 
6187 // Load Integer (32 bit signed) to Byte (8 bit signed)
6188 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6189   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6190 
6191   ins_cost(125);
6192   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6193   ins_encode %{
6194     __ movsbl($dst$$Register, $mem$$Address);
6195   %}
6196   ins_pipe(ialu_reg_mem);
6197 %}
6198 
6199 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6200 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6201   match(Set dst (AndI (LoadI mem) mask));
6202 
6203   ins_cost(125);
6204   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6205   ins_encode %{
6206     __ movzbl($dst$$Register, $mem$$Address);
6207   %}
6208   ins_pipe(ialu_reg_mem);
6209 %}
6210 
6211 // Load Integer (32 bit signed) to Short (16 bit signed)
6212 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6213   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6214 
6215   ins_cost(125);
6216   format %{ "movswl  $dst, $mem\t# int -> short" %}
6217   ins_encode %{
6218     __ movswl($dst$$Register, $mem$$Address);
6219   %}
6220   ins_pipe(ialu_reg_mem);
6221 %}
6222 
6223 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6224 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6225   match(Set dst (AndI (LoadI mem) mask));
6226 
6227   ins_cost(125);
6228   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6229   ins_encode %{
6230     __ movzwl($dst$$Register, $mem$$Address);
6231   %}
6232   ins_pipe(ialu_reg_mem);
6233 %}
6234 
6235 // Load Integer into Long Register
6236 instruct loadI2L(rRegL dst, memory mem)
6237 %{
6238   match(Set dst (ConvI2L (LoadI mem)));
6239 
6240   ins_cost(125);
6241   format %{ "movslq  $dst, $mem\t# int -> long" %}
6242 
6243   ins_encode %{
6244     __ movslq($dst$$Register, $mem$$Address);
6245   %}
6246 
6247   ins_pipe(ialu_reg_mem);
6248 %}
6249 
6250 // Load Integer with mask 0xFF into Long Register
6251 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6252   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6253 
6254   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6255   ins_encode %{
6256     __ movzbq($dst$$Register, $mem$$Address);
6257   %}
6258   ins_pipe(ialu_reg_mem);
6259 %}
6260 
6261 // Load Integer with mask 0xFFFF into Long Register
6262 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6263   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6264 
6265   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6266   ins_encode %{
6267     __ movzwq($dst$$Register, $mem$$Address);
6268   %}
6269   ins_pipe(ialu_reg_mem);
6270 %}
6271 
6272 // Load Integer with a 32-bit mask into Long Register
6273 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6274   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6275   effect(KILL cr);
6276 
6277   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6278             "andl    $dst, $mask" %}
6279   ins_encode %{
6280     Register Rdst = $dst$$Register;
6281     __ movl(Rdst, $mem$$Address);
6282     __ andl(Rdst, $mask$$constant);
6283   %}
6284   ins_pipe(ialu_reg_mem);
6285 %}
6286 
6287 // Load Unsigned Integer into Long Register
6288 instruct loadUI2L(rRegL dst, memory mem)
6289 %{
6290   match(Set dst (LoadUI2L mem));
6291 
6292   ins_cost(125);
6293   format %{ "movl    $dst, $mem\t# uint -> long" %}
6294 
6295   ins_encode %{
6296     __ movl($dst$$Register, $mem$$Address);
6297   %}
6298 
6299   ins_pipe(ialu_reg_mem);
6300 %}
6301 
6302 // Load Long
6303 instruct loadL(rRegL dst, memory mem)
6304 %{
6305   match(Set dst (LoadL mem));
6306 
6307   ins_cost(125);
6308   format %{ "movq    $dst, $mem\t# long" %}
6309 
6310   ins_encode %{
6311     __ movq($dst$$Register, $mem$$Address);
6312   %}
6313 
6314   ins_pipe(ialu_reg_mem); // XXX
6315 %}
6316 
6317 // Load Range
6318 instruct loadRange(rRegI dst, memory mem)
6319 %{
6320   match(Set dst (LoadRange mem));
6321 
6322   ins_cost(125); // XXX
6323   format %{ "movl    $dst, $mem\t# range" %}
6324   opcode(0x8B);
6325   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6326   ins_pipe(ialu_reg_mem);
6327 %}
6328 
6329 // Load Pointer
6330 instruct loadP(rRegP dst, memory mem)
6331 %{
6332   match(Set dst (LoadP mem));
6333 
6334   ins_cost(125); // XXX
6335   format %{ "movq    $dst, $mem\t# ptr" %}
6336   opcode(0x8B);
6337   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6338   ins_pipe(ialu_reg_mem); // XXX
6339 %}
6340 
6341 // Load Compressed Pointer
6342 instruct loadN(rRegN dst, memory mem)
6343 %{
6344    match(Set dst (LoadN mem));
6345 
6346    ins_cost(125); // XXX
6347    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6348    ins_encode %{
6349      __ movl($dst$$Register, $mem$$Address);
6350    %}
6351    ins_pipe(ialu_reg_mem); // XXX
6352 %}
6353 
6354 
6355 // Load Klass Pointer
6356 instruct loadKlass(rRegP dst, memory mem)
6357 %{
6358   match(Set dst (LoadKlass mem));
6359 
6360   ins_cost(125); // XXX
6361   format %{ "movq    $dst, $mem\t# class" %}
6362   opcode(0x8B);
6363   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6364   ins_pipe(ialu_reg_mem); // XXX
6365 %}
6366 
6367 // Load narrow Klass Pointer
6368 instruct loadNKlass(rRegN dst, memory mem)
6369 %{
6370   match(Set dst (LoadNKlass mem));
6371 
6372   ins_cost(125); // XXX
6373   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6374   ins_encode %{
6375     __ movl($dst$$Register, $mem$$Address);
6376   %}
6377   ins_pipe(ialu_reg_mem); // XXX
6378 %}
6379 
6380 // Load Float
6381 instruct loadF(regF dst, memory mem)
6382 %{
6383   match(Set dst (LoadF mem));
6384 
6385   ins_cost(145); // XXX
6386   format %{ "movss   $dst, $mem\t# float" %}
6387   opcode(0xF3, 0x0F, 0x10);
6388   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6389   ins_pipe(pipe_slow); // XXX
6390 %}
6391 
6392 // Load Double
6393 instruct loadD_partial(regD dst, memory mem)
6394 %{
6395   predicate(!UseXmmLoadAndClearUpper);
6396   match(Set dst (LoadD mem));
6397 
6398   ins_cost(145); // XXX
6399   format %{ "movlpd  $dst, $mem\t# double" %}
6400   opcode(0x66, 0x0F, 0x12);
6401   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6402   ins_pipe(pipe_slow); // XXX
6403 %}
6404 
6405 instruct loadD(regD dst, memory mem)
6406 %{
6407   predicate(UseXmmLoadAndClearUpper);
6408   match(Set dst (LoadD mem));
6409 
6410   ins_cost(145); // XXX
6411   format %{ "movsd   $dst, $mem\t# double" %}
6412   opcode(0xF2, 0x0F, 0x10);
6413   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6414   ins_pipe(pipe_slow); // XXX
6415 %}
6416 
6417 // Load Aligned Packed Byte to XMM register
6418 instruct loadA8B(regD dst, memory mem) %{
6419   match(Set dst (Load8B mem));
6420   ins_cost(125);
6421   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6422   ins_encode( movq_ld(dst, mem));
6423   ins_pipe( pipe_slow );
6424 %}
6425 
6426 // Load Aligned Packed Short to XMM register
6427 instruct loadA4S(regD dst, memory mem) %{
6428   match(Set dst (Load4S mem));
6429   ins_cost(125);
6430   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6431   ins_encode( movq_ld(dst, mem));
6432   ins_pipe( pipe_slow );
6433 %}
6434 
6435 // Load Aligned Packed Char to XMM register
6436 instruct loadA4C(regD dst, memory mem) %{
6437   match(Set dst (Load4C mem));
6438   ins_cost(125);
6439   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6440   ins_encode( movq_ld(dst, mem));
6441   ins_pipe( pipe_slow );
6442 %}
6443 
6444 // Load Aligned Packed Integer to XMM register
6445 instruct load2IU(regD dst, memory mem) %{
6446   match(Set dst (Load2I mem));
6447   ins_cost(125);
6448   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6449   ins_encode( movq_ld(dst, mem));
6450   ins_pipe( pipe_slow );
6451 %}
6452 
6453 // Load Aligned Packed Single to XMM
6454 instruct loadA2F(regD dst, memory mem) %{
6455   match(Set dst (Load2F mem));
6456   ins_cost(145);
6457   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6458   ins_encode( movq_ld(dst, mem));
6459   ins_pipe( pipe_slow );
6460 %}
6461 
6462 // Load Effective Address
6463 instruct leaP8(rRegP dst, indOffset8 mem)
6464 %{
6465   match(Set dst mem);
6466 
6467   ins_cost(110); // XXX
6468   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6469   opcode(0x8D);
6470   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6471   ins_pipe(ialu_reg_reg_fat);
6472 %}
6473 
6474 instruct leaP32(rRegP dst, indOffset32 mem)
6475 %{
6476   match(Set dst mem);
6477 
6478   ins_cost(110);
6479   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6480   opcode(0x8D);
6481   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6482   ins_pipe(ialu_reg_reg_fat);
6483 %}
6484 
6485 // instruct leaPIdx(rRegP dst, indIndex mem)
6486 // %{
6487 //   match(Set dst mem);
6488 
6489 //   ins_cost(110);
6490 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6491 //   opcode(0x8D);
6492 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6493 //   ins_pipe(ialu_reg_reg_fat);
6494 // %}
6495 
6496 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6497 %{
6498   match(Set dst mem);
6499 
6500   ins_cost(110);
6501   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6502   opcode(0x8D);
6503   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6504   ins_pipe(ialu_reg_reg_fat);
6505 %}
6506 
6507 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6508 %{
6509   match(Set dst mem);
6510 
6511   ins_cost(110);
6512   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6513   opcode(0x8D);
6514   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6515   ins_pipe(ialu_reg_reg_fat);
6516 %}
6517 
6518 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6519 %{
6520   match(Set dst mem);
6521 
6522   ins_cost(110);
6523   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6524   opcode(0x8D);
6525   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6526   ins_pipe(ialu_reg_reg_fat);
6527 %}
6528 
6529 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6530 %{
6531   match(Set dst mem);
6532 
6533   ins_cost(110);
6534   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6535   opcode(0x8D);
6536   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6537   ins_pipe(ialu_reg_reg_fat);
6538 %}
6539 
6540 // Load Effective Address which uses Narrow (32-bits) oop
6541 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6542 %{
6543   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6544   match(Set dst mem);
6545 
6546   ins_cost(110);
6547   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6548   opcode(0x8D);
6549   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6550   ins_pipe(ialu_reg_reg_fat);
6551 %}
6552 
6553 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6554 %{
6555   predicate(Universe::narrow_oop_shift() == 0);
6556   match(Set dst mem);
6557 
6558   ins_cost(110); // XXX
6559   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6560   opcode(0x8D);
6561   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6562   ins_pipe(ialu_reg_reg_fat);
6563 %}
6564 
6565 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6566 %{
6567   predicate(Universe::narrow_oop_shift() == 0);
6568   match(Set dst mem);
6569 
6570   ins_cost(110);
6571   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6572   opcode(0x8D);
6573   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6574   ins_pipe(ialu_reg_reg_fat);
6575 %}
6576 
6577 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6578 %{
6579   predicate(Universe::narrow_oop_shift() == 0);
6580   match(Set dst mem);
6581 
6582   ins_cost(110);
6583   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6584   opcode(0x8D);
6585   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6586   ins_pipe(ialu_reg_reg_fat);
6587 %}
6588 
6589 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6590 %{
6591   predicate(Universe::narrow_oop_shift() == 0);
6592   match(Set dst mem);
6593 
6594   ins_cost(110);
6595   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6596   opcode(0x8D);
6597   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6598   ins_pipe(ialu_reg_reg_fat);
6599 %}
6600 
6601 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6602 %{
6603   predicate(Universe::narrow_oop_shift() == 0);
6604   match(Set dst mem);
6605 
6606   ins_cost(110);
6607   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6608   opcode(0x8D);
6609   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6610   ins_pipe(ialu_reg_reg_fat);
6611 %}
6612 
6613 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6614 %{
6615   predicate(Universe::narrow_oop_shift() == 0);
6616   match(Set dst mem);
6617 
6618   ins_cost(110);
6619   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6620   opcode(0x8D);
6621   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6622   ins_pipe(ialu_reg_reg_fat);
6623 %}
6624 
6625 instruct loadConI(rRegI dst, immI src)
6626 %{
6627   match(Set dst src);
6628 
6629   format %{ "movl    $dst, $src\t# int" %}
6630   ins_encode(load_immI(dst, src));
6631   ins_pipe(ialu_reg_fat); // XXX
6632 %}
6633 
6634 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6635 %{
6636   match(Set dst src);
6637   effect(KILL cr);
6638 
6639   ins_cost(50);
6640   format %{ "xorl    $dst, $dst\t# int" %}
6641   opcode(0x33); /* + rd */
6642   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6643   ins_pipe(ialu_reg);
6644 %}
6645 
6646 instruct loadConL(rRegL dst, immL src)
6647 %{
6648   match(Set dst src);
6649 
6650   ins_cost(150);
6651   format %{ "movq    $dst, $src\t# long" %}
6652   ins_encode(load_immL(dst, src));
6653   ins_pipe(ialu_reg);
6654 %}
6655 
6656 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6657 %{
6658   match(Set dst src);
6659   effect(KILL cr);
6660 
6661   ins_cost(50);
6662   format %{ "xorl    $dst, $dst\t# long" %}
6663   opcode(0x33); /* + rd */
6664   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6665   ins_pipe(ialu_reg); // XXX
6666 %}
6667 
6668 instruct loadConUL32(rRegL dst, immUL32 src)
6669 %{
6670   match(Set dst src);
6671 
6672   ins_cost(60);
6673   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6674   ins_encode(load_immUL32(dst, src));
6675   ins_pipe(ialu_reg);
6676 %}
6677 
6678 instruct loadConL32(rRegL dst, immL32 src)
6679 %{
6680   match(Set dst src);
6681 
6682   ins_cost(70);
6683   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6684   ins_encode(load_immL32(dst, src));
6685   ins_pipe(ialu_reg);
6686 %}
6687 
6688 instruct loadConP(rRegP dst, immP src)
6689 %{
6690   match(Set dst src);
6691 
6692   format %{ "movq    $dst, $src\t# ptr" %}
6693   ins_encode(load_immP(dst, src));
6694   ins_pipe(ialu_reg_fat); // XXX
6695 %}
6696 
6697 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6698 %{
6699   match(Set dst src);
6700   effect(KILL cr);
6701 
6702   ins_cost(50);
6703   format %{ "xorl    $dst, $dst\t# ptr" %}
6704   opcode(0x33); /* + rd */
6705   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6706   ins_pipe(ialu_reg);
6707 %}
6708 
6709 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6710 %{
6711   match(Set dst src);
6712   effect(KILL cr);
6713 
6714   ins_cost(60);
6715   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6716   ins_encode(load_immP31(dst, src));
6717   ins_pipe(ialu_reg);
6718 %}
6719 
6720 instruct loadConF(regF dst, immF src)
6721 %{
6722   match(Set dst src);
6723   ins_cost(125);
6724 
6725   format %{ "movss   $dst, [$src]" %}
6726   ins_encode(load_conF(dst, src));
6727   ins_pipe(pipe_slow);
6728 %}
6729 
6730 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6731   match(Set dst src);
6732   effect(KILL cr);
6733   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6734   ins_encode %{
6735     __ xorq($dst$$Register, $dst$$Register);
6736   %}
6737   ins_pipe(ialu_reg);
6738 %}
6739 
6740 instruct loadConN(rRegN dst, immN src) %{
6741   match(Set dst src);
6742 
6743   ins_cost(125);
6744   format %{ "movl    $dst, $src\t# compressed ptr" %}
6745   ins_encode %{
6746     address con = (address)$src$$constant;
6747     if (con == NULL) {
6748       ShouldNotReachHere();
6749     } else {
6750       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6751     }
6752   %}
6753   ins_pipe(ialu_reg_fat); // XXX
6754 %}
6755 
6756 instruct loadConF0(regF dst, immF0 src)
6757 %{
6758   match(Set dst src);
6759   ins_cost(100);
6760 
6761   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6762   opcode(0x0F, 0x57);
6763   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6764   ins_pipe(pipe_slow);
6765 %}
6766 
6767 // Use the same format since predicate() can not be used here.
6768 instruct loadConD(regD dst, immD src)
6769 %{
6770   match(Set dst src);
6771   ins_cost(125);
6772 
6773   format %{ "movsd   $dst, [$src]" %}
6774   ins_encode(load_conD(dst, src));
6775   ins_pipe(pipe_slow);
6776 %}
6777 
6778 instruct loadConD0(regD dst, immD0 src)
6779 %{
6780   match(Set dst src);
6781   ins_cost(100);
6782 
6783   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6784   opcode(0x66, 0x0F, 0x57);
6785   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6786   ins_pipe(pipe_slow);
6787 %}
6788 
6789 instruct loadSSI(rRegI dst, stackSlotI src)
6790 %{
6791   match(Set dst src);
6792 
6793   ins_cost(125);
6794   format %{ "movl    $dst, $src\t# int stk" %}
6795   opcode(0x8B);
6796   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6797   ins_pipe(ialu_reg_mem);
6798 %}
6799 
6800 instruct loadSSL(rRegL dst, stackSlotL src)
6801 %{
6802   match(Set dst src);
6803 
6804   ins_cost(125);
6805   format %{ "movq    $dst, $src\t# long stk" %}
6806   opcode(0x8B);
6807   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6808   ins_pipe(ialu_reg_mem);
6809 %}
6810 
6811 instruct loadSSP(rRegP dst, stackSlotP src)
6812 %{
6813   match(Set dst src);
6814 
6815   ins_cost(125);
6816   format %{ "movq    $dst, $src\t# ptr stk" %}
6817   opcode(0x8B);
6818   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6819   ins_pipe(ialu_reg_mem);
6820 %}
6821 
6822 instruct loadSSF(regF dst, stackSlotF src)
6823 %{
6824   match(Set dst src);
6825 
6826   ins_cost(125);
6827   format %{ "movss   $dst, $src\t# float stk" %}
6828   opcode(0xF3, 0x0F, 0x10);
6829   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6830   ins_pipe(pipe_slow); // XXX
6831 %}
6832 
6833 // Use the same format since predicate() can not be used here.
6834 instruct loadSSD(regD dst, stackSlotD src)
6835 %{
6836   match(Set dst src);
6837 
6838   ins_cost(125);
6839   format %{ "movsd   $dst, $src\t# double stk" %}
6840   ins_encode  %{
6841     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6842   %}
6843   ins_pipe(pipe_slow); // XXX
6844 %}
6845 
6846 // Prefetch instructions.
6847 // Must be safe to execute with invalid address (cannot fault).
6848 
6849 instruct prefetchr( memory mem ) %{
6850   predicate(ReadPrefetchInstr==3);
6851   match(PrefetchRead mem);
6852   ins_cost(125);
6853 
6854   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6855   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6856   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6857   ins_pipe(ialu_mem);
6858 %}
6859 
6860 instruct prefetchrNTA( memory mem ) %{
6861   predicate(ReadPrefetchInstr==0);
6862   match(PrefetchRead mem);
6863   ins_cost(125);
6864 
6865   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6866   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6867   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6868   ins_pipe(ialu_mem);
6869 %}
6870 
6871 instruct prefetchrT0( memory mem ) %{
6872   predicate(ReadPrefetchInstr==1);
6873   match(PrefetchRead mem);
6874   ins_cost(125);
6875 
6876   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6877   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6878   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6879   ins_pipe(ialu_mem);
6880 %}
6881 
6882 instruct prefetchrT2( memory mem ) %{
6883   predicate(ReadPrefetchInstr==2);
6884   match(PrefetchRead mem);
6885   ins_cost(125);
6886 
6887   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6888   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6889   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6890   ins_pipe(ialu_mem);
6891 %}
6892 
6893 instruct prefetchw( memory mem ) %{
6894   predicate(AllocatePrefetchInstr==3);
6895   match(PrefetchWrite mem);
6896   ins_cost(125);
6897 
6898   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6899   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6900   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6901   ins_pipe(ialu_mem);
6902 %}
6903 
6904 instruct prefetchwNTA( memory mem ) %{
6905   predicate(AllocatePrefetchInstr==0);
6906   match(PrefetchWrite mem);
6907   ins_cost(125);
6908 
6909   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6910   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6911   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6912   ins_pipe(ialu_mem);
6913 %}
6914 
6915 instruct prefetchwT0( memory mem ) %{
6916   predicate(AllocatePrefetchInstr==1);
6917   match(PrefetchWrite mem);
6918   ins_cost(125);
6919 
6920   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6921   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6922   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6923   ins_pipe(ialu_mem);
6924 %}
6925 
6926 instruct prefetchwT2( memory mem ) %{
6927   predicate(AllocatePrefetchInstr==2);
6928   match(PrefetchWrite mem);
6929   ins_cost(125);
6930 
6931   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6932   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6933   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6934   ins_pipe(ialu_mem);
6935 %}
6936 
6937 //----------Store Instructions-------------------------------------------------
6938 
6939 // Store Byte
6940 instruct storeB(memory mem, rRegI src)
6941 %{
6942   match(Set mem (StoreB mem src));
6943 
6944   ins_cost(125); // XXX
6945   format %{ "movb    $mem, $src\t# byte" %}
6946   opcode(0x88);
6947   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6948   ins_pipe(ialu_mem_reg);
6949 %}
6950 
6951 // Store Char/Short
6952 instruct storeC(memory mem, rRegI src)
6953 %{
6954   match(Set mem (StoreC mem src));
6955 
6956   ins_cost(125); // XXX
6957   format %{ "movw    $mem, $src\t# char/short" %}
6958   opcode(0x89);
6959   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6960   ins_pipe(ialu_mem_reg);
6961 %}
6962 
6963 // Store Integer
6964 instruct storeI(memory mem, rRegI src)
6965 %{
6966   match(Set mem (StoreI mem src));
6967 
6968   ins_cost(125); // XXX
6969   format %{ "movl    $mem, $src\t# int" %}
6970   opcode(0x89);
6971   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6972   ins_pipe(ialu_mem_reg);
6973 %}
6974 
6975 // Store Long
6976 instruct storeL(memory mem, rRegL src)
6977 %{
6978   match(Set mem (StoreL mem src));
6979 
6980   ins_cost(125); // XXX
6981   format %{ "movq    $mem, $src\t# long" %}
6982   opcode(0x89);
6983   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6984   ins_pipe(ialu_mem_reg); // XXX
6985 %}
6986 
6987 // Store Pointer
6988 instruct storeP(memory mem, any_RegP src)
6989 %{
6990   match(Set mem (StoreP mem src));
6991 
6992   ins_cost(125); // XXX
6993   format %{ "movq    $mem, $src\t# ptr" %}
6994   opcode(0x89);
6995   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6996   ins_pipe(ialu_mem_reg);
6997 %}
6998 
6999 instruct storeImmP0(memory mem, immP0 zero)
7000 %{
7001   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7002   match(Set mem (StoreP mem zero));
7003 
7004   ins_cost(125); // XXX
7005   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
7006   ins_encode %{
7007     __ movq($mem$$Address, r12);
7008   %}
7009   ins_pipe(ialu_mem_reg);
7010 %}
7011 
7012 // Store NULL Pointer, mark word, or other simple pointer constant.
7013 instruct storeImmP(memory mem, immP31 src)
7014 %{
7015   match(Set mem (StoreP mem src));
7016 
7017   ins_cost(150); // XXX
7018   format %{ "movq    $mem, $src\t# ptr" %}
7019   opcode(0xC7); /* C7 /0 */
7020   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7021   ins_pipe(ialu_mem_imm);
7022 %}
7023 
7024 // Store Compressed Pointer
7025 instruct storeN(memory mem, rRegN src)
7026 %{
7027   match(Set mem (StoreN mem src));
7028 
7029   ins_cost(125); // XXX
7030   format %{ "movl    $mem, $src\t# compressed ptr" %}
7031   ins_encode %{
7032     __ movl($mem$$Address, $src$$Register);
7033   %}
7034   ins_pipe(ialu_mem_reg);
7035 %}
7036 
7037 instruct storeImmN0(memory mem, immN0 zero)
7038 %{
7039   predicate(Universe::narrow_oop_base() == NULL);
7040   match(Set mem (StoreN mem zero));
7041 
7042   ins_cost(125); // XXX
7043   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7044   ins_encode %{
7045     __ movl($mem$$Address, r12);
7046   %}
7047   ins_pipe(ialu_mem_reg);
7048 %}
7049 
7050 instruct storeImmN(memory mem, immN src)
7051 %{
7052   match(Set mem (StoreN mem src));
7053 
7054   ins_cost(150); // XXX
7055   format %{ "movl    $mem, $src\t# compressed ptr" %}
7056   ins_encode %{
7057     address con = (address)$src$$constant;
7058     if (con == NULL) {
7059       __ movl($mem$$Address, (int32_t)0);
7060     } else {
7061       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7062     }
7063   %}
7064   ins_pipe(ialu_mem_imm);
7065 %}
7066 
7067 // Store Integer Immediate
7068 instruct storeImmI0(memory mem, immI0 zero)
7069 %{
7070   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7071   match(Set mem (StoreI mem zero));
7072 
7073   ins_cost(125); // XXX
7074   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7075   ins_encode %{
7076     __ movl($mem$$Address, r12);
7077   %}
7078   ins_pipe(ialu_mem_reg);
7079 %}
7080 
7081 instruct storeImmI(memory mem, immI src)
7082 %{
7083   match(Set mem (StoreI mem src));
7084 
7085   ins_cost(150);
7086   format %{ "movl    $mem, $src\t# int" %}
7087   opcode(0xC7); /* C7 /0 */
7088   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7089   ins_pipe(ialu_mem_imm);
7090 %}
7091 
7092 // Store Long Immediate
7093 instruct storeImmL0(memory mem, immL0 zero)
7094 %{
7095   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7096   match(Set mem (StoreL mem zero));
7097 
7098   ins_cost(125); // XXX
7099   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7100   ins_encode %{
7101     __ movq($mem$$Address, r12);
7102   %}
7103   ins_pipe(ialu_mem_reg);
7104 %}
7105 
7106 instruct storeImmL(memory mem, immL32 src)
7107 %{
7108   match(Set mem (StoreL mem src));
7109 
7110   ins_cost(150);
7111   format %{ "movq    $mem, $src\t# long" %}
7112   opcode(0xC7); /* C7 /0 */
7113   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7114   ins_pipe(ialu_mem_imm);
7115 %}
7116 
7117 // Store Short/Char Immediate
7118 instruct storeImmC0(memory mem, immI0 zero)
7119 %{
7120   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7121   match(Set mem (StoreC mem zero));
7122 
7123   ins_cost(125); // XXX
7124   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7125   ins_encode %{
7126     __ movw($mem$$Address, r12);
7127   %}
7128   ins_pipe(ialu_mem_reg);
7129 %}
7130 
7131 instruct storeImmI16(memory mem, immI16 src)
7132 %{
7133   predicate(UseStoreImmI16);
7134   match(Set mem (StoreC mem src));
7135 
7136   ins_cost(150);
7137   format %{ "movw    $mem, $src\t# short/char" %}
7138   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7139   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7140   ins_pipe(ialu_mem_imm);
7141 %}
7142 
7143 // Store Byte Immediate
7144 instruct storeImmB0(memory mem, immI0 zero)
7145 %{
7146   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7147   match(Set mem (StoreB mem zero));
7148 
7149   ins_cost(125); // XXX
7150   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7151   ins_encode %{
7152     __ movb($mem$$Address, r12);
7153   %}
7154   ins_pipe(ialu_mem_reg);
7155 %}
7156 
7157 instruct storeImmB(memory mem, immI8 src)
7158 %{
7159   match(Set mem (StoreB mem src));
7160 
7161   ins_cost(150); // XXX
7162   format %{ "movb    $mem, $src\t# byte" %}
7163   opcode(0xC6); /* C6 /0 */
7164   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7165   ins_pipe(ialu_mem_imm);
7166 %}
7167 
7168 // Store Aligned Packed Byte XMM register to memory
7169 instruct storeA8B(memory mem, regD src) %{
7170   match(Set mem (Store8B mem src));
7171   ins_cost(145);
7172   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7173   ins_encode( movq_st(mem, src));
7174   ins_pipe( pipe_slow );
7175 %}
7176 
7177 // Store Aligned Packed Char/Short XMM register to memory
7178 instruct storeA4C(memory mem, regD src) %{
7179   match(Set mem (Store4C mem src));
7180   ins_cost(145);
7181   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7182   ins_encode( movq_st(mem, src));
7183   ins_pipe( pipe_slow );
7184 %}
7185 
7186 // Store Aligned Packed Integer XMM register to memory
7187 instruct storeA2I(memory mem, regD src) %{
7188   match(Set mem (Store2I mem src));
7189   ins_cost(145);
7190   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7191   ins_encode( movq_st(mem, src));
7192   ins_pipe( pipe_slow );
7193 %}
7194 
7195 // Store CMS card-mark Immediate
7196 instruct storeImmCM0_reg(memory mem, immI0 zero)
7197 %{
7198   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7199   match(Set mem (StoreCM mem zero));
7200 
7201   ins_cost(125); // XXX
7202   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7203   ins_encode %{
7204     __ movb($mem$$Address, r12);
7205   %}
7206   ins_pipe(ialu_mem_reg);
7207 %}
7208 
7209 instruct storeImmCM0(memory mem, immI0 src)
7210 %{
7211   match(Set mem (StoreCM mem src));
7212 
7213   ins_cost(150); // XXX
7214   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7215   opcode(0xC6); /* C6 /0 */
7216   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7217   ins_pipe(ialu_mem_imm);
7218 %}
7219 
7220 // Store Aligned Packed Single Float XMM register to memory
7221 instruct storeA2F(memory mem, regD src) %{
7222   match(Set mem (Store2F mem src));
7223   ins_cost(145);
7224   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7225   ins_encode( movq_st(mem, src));
7226   ins_pipe( pipe_slow );
7227 %}
7228 
7229 // Store Float
7230 instruct storeF(memory mem, regF src)
7231 %{
7232   match(Set mem (StoreF mem src));
7233 
7234   ins_cost(95); // XXX
7235   format %{ "movss   $mem, $src\t# float" %}
7236   opcode(0xF3, 0x0F, 0x11);
7237   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7238   ins_pipe(pipe_slow); // XXX
7239 %}
7240 
7241 // Store immediate Float value (it is faster than store from XMM register)
7242 instruct storeF0(memory mem, immF0 zero)
7243 %{
7244   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7245   match(Set mem (StoreF mem zero));
7246 
7247   ins_cost(25); // XXX
7248   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7249   ins_encode %{
7250     __ movl($mem$$Address, r12);
7251   %}
7252   ins_pipe(ialu_mem_reg);
7253 %}
7254 
7255 instruct storeF_imm(memory mem, immF src)
7256 %{
7257   match(Set mem (StoreF mem src));
7258 
7259   ins_cost(50);
7260   format %{ "movl    $mem, $src\t# float" %}
7261   opcode(0xC7); /* C7 /0 */
7262   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7263   ins_pipe(ialu_mem_imm);
7264 %}
7265 
7266 // Store Double
7267 instruct storeD(memory mem, regD src)
7268 %{
7269   match(Set mem (StoreD mem src));
7270 
7271   ins_cost(95); // XXX
7272   format %{ "movsd   $mem, $src\t# double" %}
7273   opcode(0xF2, 0x0F, 0x11);
7274   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7275   ins_pipe(pipe_slow); // XXX
7276 %}
7277 
7278 // Store immediate double 0.0 (it is faster than store from XMM register)
7279 instruct storeD0_imm(memory mem, immD0 src)
7280 %{
7281   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7282   match(Set mem (StoreD mem src));
7283 
7284   ins_cost(50);
7285   format %{ "movq    $mem, $src\t# double 0." %}
7286   opcode(0xC7); /* C7 /0 */
7287   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7288   ins_pipe(ialu_mem_imm);
7289 %}
7290 
7291 instruct storeD0(memory mem, immD0 zero)
7292 %{
7293   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7294   match(Set mem (StoreD mem zero));
7295 
7296   ins_cost(25); // XXX
7297   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7298   ins_encode %{
7299     __ movq($mem$$Address, r12);
7300   %}
7301   ins_pipe(ialu_mem_reg);
7302 %}
7303 
7304 instruct storeSSI(stackSlotI dst, rRegI src)
7305 %{
7306   match(Set dst src);
7307 
7308   ins_cost(100);
7309   format %{ "movl    $dst, $src\t# int stk" %}
7310   opcode(0x89);
7311   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7312   ins_pipe( ialu_mem_reg );
7313 %}
7314 
7315 instruct storeSSL(stackSlotL dst, rRegL src)
7316 %{
7317   match(Set dst src);
7318 
7319   ins_cost(100);
7320   format %{ "movq    $dst, $src\t# long stk" %}
7321   opcode(0x89);
7322   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7323   ins_pipe(ialu_mem_reg);
7324 %}
7325 
7326 instruct storeSSP(stackSlotP dst, rRegP src)
7327 %{
7328   match(Set dst src);
7329 
7330   ins_cost(100);
7331   format %{ "movq    $dst, $src\t# ptr stk" %}
7332   opcode(0x89);
7333   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7334   ins_pipe(ialu_mem_reg);
7335 %}
7336 
7337 instruct storeSSF(stackSlotF dst, regF src)
7338 %{
7339   match(Set dst src);
7340 
7341   ins_cost(95); // XXX
7342   format %{ "movss   $dst, $src\t# float stk" %}
7343   opcode(0xF3, 0x0F, 0x11);
7344   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7345   ins_pipe(pipe_slow); // XXX
7346 %}
7347 
7348 instruct storeSSD(stackSlotD dst, regD src)
7349 %{
7350   match(Set dst src);
7351 
7352   ins_cost(95); // XXX
7353   format %{ "movsd   $dst, $src\t# double stk" %}
7354   opcode(0xF2, 0x0F, 0x11);
7355   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7356   ins_pipe(pipe_slow); // XXX
7357 %}
7358 
7359 //----------BSWAP Instructions-------------------------------------------------
7360 instruct bytes_reverse_int(rRegI dst) %{
7361   match(Set dst (ReverseBytesI dst));
7362 
7363   format %{ "bswapl  $dst" %}
7364   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7365   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7366   ins_pipe( ialu_reg );
7367 %}
7368 
7369 instruct bytes_reverse_long(rRegL dst) %{
7370   match(Set dst (ReverseBytesL dst));
7371 
7372   format %{ "bswapq  $dst" %}
7373 
7374   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7375   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7376   ins_pipe( ialu_reg);
7377 %}
7378 
7379 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7380   match(Set dst (ReverseBytesUS dst));
7381 
7382   format %{ "bswapl  $dst\n\t" 
7383             "shrl    $dst,16\n\t" %}
7384   ins_encode %{
7385     __ bswapl($dst$$Register);
7386     __ shrl($dst$$Register, 16); 
7387   %}
7388   ins_pipe( ialu_reg );
7389 %}
7390 
7391 instruct bytes_reverse_short(rRegI dst) %{
7392   match(Set dst (ReverseBytesS dst));
7393 
7394   format %{ "bswapl  $dst\n\t" 
7395             "sar     $dst,16\n\t" %}
7396   ins_encode %{
7397     __ bswapl($dst$$Register);
7398     __ sarl($dst$$Register, 16); 
7399   %}
7400   ins_pipe( ialu_reg );
7401 %}
7402 
7403 instruct loadI_reversed(rRegI dst, memory src) %{
7404   match(Set dst (ReverseBytesI (LoadI src)));
7405 
7406   format %{ "bswap_movl $dst, $src" %}
7407   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7408   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src), REX_reg(dst), OpcS, opc3_reg(dst));
7409   ins_pipe( ialu_reg_mem );
7410 %}
7411 
7412 instruct loadL_reversed(rRegL dst, memory src) %{
7413   match(Set dst (ReverseBytesL (LoadL src)));
7414 
7415   format %{ "bswap_movq $dst, $src" %}
7416   opcode(0x8B, 0x0F, 0xC8); /* Opcode 8B 0F C8 */
7417   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src), REX_reg_wide(dst), OpcS, opc3_reg(dst));
7418   ins_pipe( ialu_reg_mem );
7419 %}
7420 
7421 instruct storeI_reversed(memory dst, rRegI src) %{
7422   match(Set dst (StoreI dst (ReverseBytesI  src)));
7423 
7424   format %{ "movl_bswap $dst, $src" %}
7425   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7426   ins_encode( REX_reg(src), OpcP, opc2_reg(src), REX_reg_mem(src, dst), OpcT, reg_mem(src, dst) );
7427   ins_pipe( ialu_mem_reg );
7428 %}
7429 
7430 instruct storeL_reversed(memory dst, rRegL src) %{
7431   match(Set dst (StoreL dst (ReverseBytesL  src)));
7432 
7433   format %{ "movq_bswap $dst, $src" %}
7434   opcode(0x0F, 0xC8, 0x89); /* Opcode 0F C8 89 */
7435   ins_encode( REX_reg_wide(src), OpcP, opc2_reg(src), REX_reg_mem_wide(src, dst), OpcT, reg_mem(src, dst) );
7436   ins_pipe( ialu_mem_reg );
7437 %}
7438 
7439 
7440 //---------- Zeros Count Instructions ------------------------------------------
7441 
7442 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7443   predicate(UseCountLeadingZerosInstruction);
7444   match(Set dst (CountLeadingZerosI src));
7445   effect(KILL cr);
7446 
7447   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7448   ins_encode %{
7449     __ lzcntl($dst$$Register, $src$$Register);
7450   %}
7451   ins_pipe(ialu_reg);
7452 %}
7453 
7454 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7455   predicate(!UseCountLeadingZerosInstruction);
7456   match(Set dst (CountLeadingZerosI src));
7457   effect(KILL cr);
7458 
7459   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7460             "jnz     skip\n\t"
7461             "movl    $dst, -1\n"
7462       "skip:\n\t"
7463             "negl    $dst\n\t"
7464             "addl    $dst, 31" %}
7465   ins_encode %{
7466     Register Rdst = $dst$$Register;
7467     Register Rsrc = $src$$Register;
7468     Label skip;
7469     __ bsrl(Rdst, Rsrc);
7470     __ jccb(Assembler::notZero, skip);
7471     __ movl(Rdst, -1);
7472     __ bind(skip);
7473     __ negl(Rdst);
7474     __ addl(Rdst, BitsPerInt - 1);
7475   %}
7476   ins_pipe(ialu_reg);
7477 %}
7478 
7479 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7480   predicate(UseCountLeadingZerosInstruction);
7481   match(Set dst (CountLeadingZerosL src));
7482   effect(KILL cr);
7483 
7484   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7485   ins_encode %{
7486     __ lzcntq($dst$$Register, $src$$Register);
7487   %}
7488   ins_pipe(ialu_reg);
7489 %}
7490 
7491 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7492   predicate(!UseCountLeadingZerosInstruction);
7493   match(Set dst (CountLeadingZerosL src));
7494   effect(KILL cr);
7495 
7496   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7497             "jnz     skip\n\t"
7498             "movl    $dst, -1\n"
7499       "skip:\n\t"
7500             "negl    $dst\n\t"
7501             "addl    $dst, 63" %}
7502   ins_encode %{
7503     Register Rdst = $dst$$Register;
7504     Register Rsrc = $src$$Register;
7505     Label skip;
7506     __ bsrq(Rdst, Rsrc);
7507     __ jccb(Assembler::notZero, skip);
7508     __ movl(Rdst, -1);
7509     __ bind(skip);
7510     __ negl(Rdst);
7511     __ addl(Rdst, BitsPerLong - 1);
7512   %}
7513   ins_pipe(ialu_reg);
7514 %}
7515 
7516 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7517   match(Set dst (CountTrailingZerosI src));
7518   effect(KILL cr);
7519 
7520   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7521             "jnz     done\n\t"
7522             "movl    $dst, 32\n"
7523       "done:" %}
7524   ins_encode %{
7525     Register Rdst = $dst$$Register;
7526     Label done;
7527     __ bsfl(Rdst, $src$$Register);
7528     __ jccb(Assembler::notZero, done);
7529     __ movl(Rdst, BitsPerInt);
7530     __ bind(done);
7531   %}
7532   ins_pipe(ialu_reg);
7533 %}
7534 
7535 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7536   match(Set dst (CountTrailingZerosL src));
7537   effect(KILL cr);
7538 
7539   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7540             "jnz     done\n\t"
7541             "movl    $dst, 64\n"
7542       "done:" %}
7543   ins_encode %{
7544     Register Rdst = $dst$$Register;
7545     Label done;
7546     __ bsfq(Rdst, $src$$Register);
7547     __ jccb(Assembler::notZero, done);
7548     __ movl(Rdst, BitsPerLong);
7549     __ bind(done);
7550   %}
7551   ins_pipe(ialu_reg);
7552 %}
7553 
7554 
7555 //---------- Population Count Instructions -------------------------------------
7556 
7557 instruct popCountI(rRegI dst, rRegI src) %{
7558   predicate(UsePopCountInstruction);
7559   match(Set dst (PopCountI src));
7560 
7561   format %{ "popcnt  $dst, $src" %}
7562   ins_encode %{
7563     __ popcntl($dst$$Register, $src$$Register);
7564   %}
7565   ins_pipe(ialu_reg);
7566 %}
7567 
7568 instruct popCountI_mem(rRegI dst, memory mem) %{
7569   predicate(UsePopCountInstruction);
7570   match(Set dst (PopCountI (LoadI mem)));
7571 
7572   format %{ "popcnt  $dst, $mem" %}
7573   ins_encode %{
7574     __ popcntl($dst$$Register, $mem$$Address);
7575   %}
7576   ins_pipe(ialu_reg);
7577 %}
7578 
7579 // Note: Long.bitCount(long) returns an int.
7580 instruct popCountL(rRegI dst, rRegL src) %{
7581   predicate(UsePopCountInstruction);
7582   match(Set dst (PopCountL src));
7583 
7584   format %{ "popcnt  $dst, $src" %}
7585   ins_encode %{
7586     __ popcntq($dst$$Register, $src$$Register);
7587   %}
7588   ins_pipe(ialu_reg);
7589 %}
7590 
7591 // Note: Long.bitCount(long) returns an int.
7592 instruct popCountL_mem(rRegI dst, memory mem) %{
7593   predicate(UsePopCountInstruction);
7594   match(Set dst (PopCountL (LoadL mem)));
7595 
7596   format %{ "popcnt  $dst, $mem" %}
7597   ins_encode %{
7598     __ popcntq($dst$$Register, $mem$$Address);
7599   %}
7600   ins_pipe(ialu_reg);
7601 %}
7602 
7603 
7604 //----------MemBar Instructions-----------------------------------------------
7605 // Memory barrier flavors
7606 
7607 instruct membar_acquire()
7608 %{
7609   match(MemBarAcquire);
7610   ins_cost(0);
7611 
7612   size(0);
7613   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7614   ins_encode();
7615   ins_pipe(empty);
7616 %}
7617 
7618 instruct membar_acquire_lock()
7619 %{
7620   match(MemBarAcquire);
7621   predicate(Matcher::prior_fast_lock(n));
7622   ins_cost(0);
7623 
7624   size(0);
7625   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7626   ins_encode();
7627   ins_pipe(empty);
7628 %}
7629 
7630 instruct membar_release()
7631 %{
7632   match(MemBarRelease);
7633   ins_cost(0);
7634 
7635   size(0);
7636   format %{ "MEMBAR-release ! (empty encoding)" %}
7637   ins_encode();
7638   ins_pipe(empty);
7639 %}
7640 
7641 instruct membar_release_lock()
7642 %{
7643   match(MemBarRelease);
7644   predicate(Matcher::post_fast_unlock(n));
7645   ins_cost(0);
7646 
7647   size(0);
7648   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7649   ins_encode();
7650   ins_pipe(empty);
7651 %}
7652 
7653 instruct membar_volatile(rFlagsReg cr) %{
7654   match(MemBarVolatile);
7655   effect(KILL cr);
7656   ins_cost(400);
7657 
7658   format %{ 
7659     $$template
7660     if (os::is_MP()) {
7661       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7662     } else {
7663       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7664     }
7665   %}
7666   ins_encode %{
7667     __ membar(Assembler::StoreLoad);
7668   %}
7669   ins_pipe(pipe_slow);
7670 %}
7671 
7672 instruct unnecessary_membar_volatile()
7673 %{
7674   match(MemBarVolatile);
7675   predicate(Matcher::post_store_load_barrier(n));
7676   ins_cost(0);
7677 
7678   size(0);
7679   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7680   ins_encode();
7681   ins_pipe(empty);
7682 %}
7683 
7684 //----------Move Instructions--------------------------------------------------
7685 
7686 instruct castX2P(rRegP dst, rRegL src)
7687 %{
7688   match(Set dst (CastX2P src));
7689 
7690   format %{ "movq    $dst, $src\t# long->ptr" %}
7691   ins_encode(enc_copy_wide(dst, src));
7692   ins_pipe(ialu_reg_reg); // XXX
7693 %}
7694 
7695 instruct castP2X(rRegL dst, rRegP src)
7696 %{
7697   match(Set dst (CastP2X src));
7698 
7699   format %{ "movq    $dst, $src\t# ptr -> long" %}
7700   ins_encode(enc_copy_wide(dst, src));
7701   ins_pipe(ialu_reg_reg); // XXX
7702 %}
7703 
7704 
7705 // Convert oop pointer into compressed form
7706 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7707   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7708   match(Set dst (EncodeP src));
7709   effect(KILL cr);
7710   format %{ "encode_heap_oop $dst,$src" %}
7711   ins_encode %{
7712     Register s = $src$$Register;
7713     Register d = $dst$$Register;
7714     if (s != d) {
7715       __ movq(d, s);
7716     }
7717     __ encode_heap_oop(d);
7718   %}
7719   ins_pipe(ialu_reg_long);
7720 %}
7721 
7722 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7723   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7724   match(Set dst (EncodeP src));
7725   effect(KILL cr);
7726   format %{ "encode_heap_oop_not_null $dst,$src" %}
7727   ins_encode %{
7728     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7729   %}
7730   ins_pipe(ialu_reg_long);
7731 %}
7732 
7733 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7734   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7735             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7736   match(Set dst (DecodeN src));
7737   effect(KILL cr);
7738   format %{ "decode_heap_oop $dst,$src" %}
7739   ins_encode %{
7740     Register s = $src$$Register;
7741     Register d = $dst$$Register;
7742     if (s != d) {
7743       __ movq(d, s);
7744     }
7745     __ decode_heap_oop(d);
7746   %}
7747   ins_pipe(ialu_reg_long);
7748 %}
7749 
7750 instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
7751   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7752             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7753   match(Set dst (DecodeN src));
7754   format %{ "decode_heap_oop_not_null $dst,$src" %}
7755   ins_encode %{
7756     Register s = $src$$Register;
7757     Register d = $dst$$Register;
7758     if (s != d) {
7759       __ decode_heap_oop_not_null(d, s);
7760     } else {
7761       __ decode_heap_oop_not_null(d);
7762     }
7763   %}
7764   ins_pipe(ialu_reg_long);
7765 %}
7766 
7767 
7768 //----------Conditional Move---------------------------------------------------
7769 // Jump
7770 // dummy instruction for generating temp registers
7771 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7772   match(Jump (LShiftL switch_val shift));
7773   ins_cost(350);
7774   predicate(false);
7775   effect(TEMP dest);
7776 
7777   format %{ "leaq    $dest, table_base\n\t"
7778             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7779   ins_encode(jump_enc_offset(switch_val, shift, dest));
7780   ins_pipe(pipe_jmp);
7781   ins_pc_relative(1);
7782 %}
7783 
7784 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7785   match(Jump (AddL (LShiftL switch_val shift) offset));
7786   ins_cost(350);
7787   effect(TEMP dest);
7788 
7789   format %{ "leaq    $dest, table_base\n\t"
7790             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7791   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7792   ins_pipe(pipe_jmp);
7793   ins_pc_relative(1);
7794 %}
7795 
7796 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7797   match(Jump switch_val);
7798   ins_cost(350);
7799   effect(TEMP dest);
7800 
7801   format %{ "leaq    $dest, table_base\n\t"
7802             "jmp     [$dest + $switch_val]\n\t" %}
7803   ins_encode(jump_enc(switch_val, dest));
7804   ins_pipe(pipe_jmp);
7805   ins_pc_relative(1);
7806 %}
7807 
7808 // Conditional move
7809 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7810 %{
7811   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7812 
7813   ins_cost(200); // XXX
7814   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7815   opcode(0x0F, 0x40);
7816   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7817   ins_pipe(pipe_cmov_reg);
7818 %}
7819 
7820 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7821   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7822 
7823   ins_cost(200); // XXX
7824   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7825   opcode(0x0F, 0x40);
7826   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7827   ins_pipe(pipe_cmov_reg);
7828 %}
7829 
7830 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7831   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7832   ins_cost(200);
7833   expand %{
7834     cmovI_regU(cop, cr, dst, src);
7835   %}
7836 %}
7837 
7838 // Conditional move
7839 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7840   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7841 
7842   ins_cost(250); // XXX
7843   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7844   opcode(0x0F, 0x40);
7845   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7846   ins_pipe(pipe_cmov_mem);
7847 %}
7848 
7849 // Conditional move
7850 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7851 %{
7852   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7853 
7854   ins_cost(250); // XXX
7855   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7856   opcode(0x0F, 0x40);
7857   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7858   ins_pipe(pipe_cmov_mem);
7859 %}
7860 
7861 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7862   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7863   ins_cost(250);
7864   expand %{
7865     cmovI_memU(cop, cr, dst, src);
7866   %}
7867 %}
7868 
7869 // Conditional move
7870 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7871 %{
7872   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7873 
7874   ins_cost(200); // XXX
7875   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7876   opcode(0x0F, 0x40);
7877   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7878   ins_pipe(pipe_cmov_reg);
7879 %}
7880 
7881 // Conditional move
7882 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7883 %{
7884   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7885 
7886   ins_cost(200); // XXX
7887   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7888   opcode(0x0F, 0x40);
7889   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7890   ins_pipe(pipe_cmov_reg);
7891 %}
7892 
7893 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7894   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7895   ins_cost(200);
7896   expand %{
7897     cmovN_regU(cop, cr, dst, src);
7898   %}
7899 %}
7900 
7901 // Conditional move
7902 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7903 %{
7904   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7905 
7906   ins_cost(200); // XXX
7907   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7908   opcode(0x0F, 0x40);
7909   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7910   ins_pipe(pipe_cmov_reg);  // XXX
7911 %}
7912 
7913 // Conditional move
7914 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7915 %{
7916   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7917 
7918   ins_cost(200); // XXX
7919   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7920   opcode(0x0F, 0x40);
7921   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7922   ins_pipe(pipe_cmov_reg); // XXX
7923 %}
7924 
7925 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7926   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7927   ins_cost(200);
7928   expand %{
7929     cmovP_regU(cop, cr, dst, src);
7930   %}
7931 %}
7932 
7933 // DISABLED: Requires the ADLC to emit a bottom_type call that
7934 // correctly meets the two pointer arguments; one is an incoming
7935 // register but the other is a memory operand.  ALSO appears to
7936 // be buggy with implicit null checks.
7937 //
7938 //// Conditional move
7939 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7940 //%{
7941 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7942 //  ins_cost(250);
7943 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7944 //  opcode(0x0F,0x40);
7945 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7946 //  ins_pipe( pipe_cmov_mem );
7947 //%}
7948 //
7949 //// Conditional move
7950 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7951 //%{
7952 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7953 //  ins_cost(250);
7954 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7955 //  opcode(0x0F,0x40);
7956 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7957 //  ins_pipe( pipe_cmov_mem );
7958 //%}
7959 
7960 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7961 %{
7962   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7963 
7964   ins_cost(200); // XXX
7965   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7966   opcode(0x0F, 0x40);
7967   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7968   ins_pipe(pipe_cmov_reg);  // XXX
7969 %}
7970 
7971 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7972 %{
7973   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7974 
7975   ins_cost(200); // XXX
7976   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7977   opcode(0x0F, 0x40);
7978   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7979   ins_pipe(pipe_cmov_mem);  // XXX
7980 %}
7981 
7982 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7983 %{
7984   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7985 
7986   ins_cost(200); // XXX
7987   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7988   opcode(0x0F, 0x40);
7989   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7990   ins_pipe(pipe_cmov_reg); // XXX
7991 %}
7992 
7993 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7994   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7995   ins_cost(200);
7996   expand %{
7997     cmovL_regU(cop, cr, dst, src);
7998   %}
7999 %}
8000 
8001 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
8002 %{
8003   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8004 
8005   ins_cost(200); // XXX
8006   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
8007   opcode(0x0F, 0x40);
8008   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
8009   ins_pipe(pipe_cmov_mem); // XXX
8010 %}
8011 
8012 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
8013   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
8014   ins_cost(200);
8015   expand %{
8016     cmovL_memU(cop, cr, dst, src);
8017   %}
8018 %}
8019 
8020 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
8021 %{
8022   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8023 
8024   ins_cost(200); // XXX
8025   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8026             "movss     $dst, $src\n"
8027     "skip:" %}
8028   ins_encode(enc_cmovf_branch(cop, dst, src));
8029   ins_pipe(pipe_slow);
8030 %}
8031 
8032 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
8033 // %{
8034 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
8035 
8036 //   ins_cost(200); // XXX
8037 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
8038 //             "movss     $dst, $src\n"
8039 //     "skip:" %}
8040 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
8041 //   ins_pipe(pipe_slow);
8042 // %}
8043 
8044 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
8045 %{
8046   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8047 
8048   ins_cost(200); // XXX
8049   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
8050             "movss     $dst, $src\n"
8051     "skip:" %}
8052   ins_encode(enc_cmovf_branch(cop, dst, src));
8053   ins_pipe(pipe_slow);
8054 %}
8055 
8056 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
8057   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8058   ins_cost(200);
8059   expand %{
8060     cmovF_regU(cop, cr, dst, src);
8061   %}
8062 %}
8063 
8064 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
8065 %{
8066   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8067 
8068   ins_cost(200); // XXX
8069   format %{ "jn$cop    skip\t# signed cmove double\n\t"
8070             "movsd     $dst, $src\n"
8071     "skip:" %}
8072   ins_encode(enc_cmovd_branch(cop, dst, src));
8073   ins_pipe(pipe_slow);
8074 %}
8075 
8076 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
8077 %{
8078   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8079 
8080   ins_cost(200); // XXX
8081   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8082             "movsd     $dst, $src\n"
8083     "skip:" %}
8084   ins_encode(enc_cmovd_branch(cop, dst, src));
8085   ins_pipe(pipe_slow);
8086 %}
8087 
8088 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8089   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8090   ins_cost(200);
8091   expand %{
8092     cmovD_regU(cop, cr, dst, src);
8093   %}
8094 %}
8095 
8096 //----------Arithmetic Instructions--------------------------------------------
8097 //----------Addition Instructions----------------------------------------------
8098 
8099 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8100 %{
8101   match(Set dst (AddI dst src));
8102   effect(KILL cr);
8103 
8104   format %{ "addl    $dst, $src\t# int" %}
8105   opcode(0x03);
8106   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8107   ins_pipe(ialu_reg_reg);
8108 %}
8109 
8110 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8111 %{
8112   match(Set dst (AddI dst src));
8113   effect(KILL cr);
8114 
8115   format %{ "addl    $dst, $src\t# int" %}
8116   opcode(0x81, 0x00); /* /0 id */
8117   ins_encode(OpcSErm(dst, src), Con8or32(src));
8118   ins_pipe( ialu_reg );
8119 %}
8120 
8121 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8122 %{
8123   match(Set dst (AddI dst (LoadI src)));
8124   effect(KILL cr);
8125 
8126   ins_cost(125); // XXX
8127   format %{ "addl    $dst, $src\t# int" %}
8128   opcode(0x03);
8129   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8130   ins_pipe(ialu_reg_mem);
8131 %}
8132 
8133 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8134 %{
8135   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8136   effect(KILL cr);
8137 
8138   ins_cost(150); // XXX
8139   format %{ "addl    $dst, $src\t# int" %}
8140   opcode(0x01); /* Opcode 01 /r */
8141   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8142   ins_pipe(ialu_mem_reg);
8143 %}
8144 
8145 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8146 %{
8147   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8148   effect(KILL cr);
8149 
8150   ins_cost(125); // XXX
8151   format %{ "addl    $dst, $src\t# int" %}
8152   opcode(0x81); /* Opcode 81 /0 id */
8153   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8154   ins_pipe(ialu_mem_imm);
8155 %}
8156 
8157 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8158 %{
8159   predicate(UseIncDec);
8160   match(Set dst (AddI dst src));
8161   effect(KILL cr);
8162 
8163   format %{ "incl    $dst\t# int" %}
8164   opcode(0xFF, 0x00); // FF /0
8165   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8166   ins_pipe(ialu_reg);
8167 %}
8168 
8169 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8170 %{
8171   predicate(UseIncDec);
8172   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8173   effect(KILL cr);
8174 
8175   ins_cost(125); // XXX
8176   format %{ "incl    $dst\t# int" %}
8177   opcode(0xFF); /* Opcode FF /0 */
8178   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8179   ins_pipe(ialu_mem_imm);
8180 %}
8181 
8182 // XXX why does that use AddI
8183 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8184 %{
8185   predicate(UseIncDec);
8186   match(Set dst (AddI dst src));
8187   effect(KILL cr);
8188 
8189   format %{ "decl    $dst\t# int" %}
8190   opcode(0xFF, 0x01); // FF /1
8191   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8192   ins_pipe(ialu_reg);
8193 %}
8194 
8195 // XXX why does that use AddI
8196 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8197 %{
8198   predicate(UseIncDec);
8199   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8200   effect(KILL cr);
8201 
8202   ins_cost(125); // XXX
8203   format %{ "decl    $dst\t# int" %}
8204   opcode(0xFF); /* Opcode FF /1 */
8205   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8206   ins_pipe(ialu_mem_imm);
8207 %}
8208 
8209 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8210 %{
8211   match(Set dst (AddI src0 src1));
8212 
8213   ins_cost(110);
8214   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8215   opcode(0x8D); /* 0x8D /r */
8216   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8217   ins_pipe(ialu_reg_reg);
8218 %}
8219 
8220 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8221 %{
8222   match(Set dst (AddL dst src));
8223   effect(KILL cr);
8224 
8225   format %{ "addq    $dst, $src\t# long" %}
8226   opcode(0x03);
8227   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8228   ins_pipe(ialu_reg_reg);
8229 %}
8230 
8231 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8232 %{
8233   match(Set dst (AddL dst src));
8234   effect(KILL cr);
8235 
8236   format %{ "addq    $dst, $src\t# long" %}
8237   opcode(0x81, 0x00); /* /0 id */
8238   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8239   ins_pipe( ialu_reg );
8240 %}
8241 
8242 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8243 %{
8244   match(Set dst (AddL dst (LoadL src)));
8245   effect(KILL cr);
8246 
8247   ins_cost(125); // XXX
8248   format %{ "addq    $dst, $src\t# long" %}
8249   opcode(0x03);
8250   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8251   ins_pipe(ialu_reg_mem);
8252 %}
8253 
8254 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8255 %{
8256   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8257   effect(KILL cr);
8258 
8259   ins_cost(150); // XXX
8260   format %{ "addq    $dst, $src\t# long" %}
8261   opcode(0x01); /* Opcode 01 /r */
8262   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8263   ins_pipe(ialu_mem_reg);
8264 %}
8265 
8266 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8267 %{
8268   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8269   effect(KILL cr);
8270 
8271   ins_cost(125); // XXX
8272   format %{ "addq    $dst, $src\t# long" %}
8273   opcode(0x81); /* Opcode 81 /0 id */
8274   ins_encode(REX_mem_wide(dst),
8275              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8276   ins_pipe(ialu_mem_imm);
8277 %}
8278 
8279 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8280 %{
8281   predicate(UseIncDec);
8282   match(Set dst (AddL dst src));
8283   effect(KILL cr);
8284 
8285   format %{ "incq    $dst\t# long" %}
8286   opcode(0xFF, 0x00); // FF /0
8287   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8288   ins_pipe(ialu_reg);
8289 %}
8290 
8291 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8292 %{
8293   predicate(UseIncDec);
8294   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8295   effect(KILL cr);
8296 
8297   ins_cost(125); // XXX
8298   format %{ "incq    $dst\t# long" %}
8299   opcode(0xFF); /* Opcode FF /0 */
8300   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8301   ins_pipe(ialu_mem_imm);
8302 %}
8303 
8304 // XXX why does that use AddL
8305 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8306 %{
8307   predicate(UseIncDec);
8308   match(Set dst (AddL dst src));
8309   effect(KILL cr);
8310 
8311   format %{ "decq    $dst\t# long" %}
8312   opcode(0xFF, 0x01); // FF /1
8313   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8314   ins_pipe(ialu_reg);
8315 %}
8316 
8317 // XXX why does that use AddL
8318 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8319 %{
8320   predicate(UseIncDec);
8321   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8322   effect(KILL cr);
8323 
8324   ins_cost(125); // XXX
8325   format %{ "decq    $dst\t# long" %}
8326   opcode(0xFF); /* Opcode FF /1 */
8327   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8328   ins_pipe(ialu_mem_imm);
8329 %}
8330 
8331 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8332 %{
8333   match(Set dst (AddL src0 src1));
8334 
8335   ins_cost(110);
8336   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8337   opcode(0x8D); /* 0x8D /r */
8338   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8339   ins_pipe(ialu_reg_reg);
8340 %}
8341 
8342 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8343 %{
8344   match(Set dst (AddP dst src));
8345   effect(KILL cr);
8346 
8347   format %{ "addq    $dst, $src\t# ptr" %}
8348   opcode(0x03);
8349   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8350   ins_pipe(ialu_reg_reg);
8351 %}
8352 
8353 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8354 %{
8355   match(Set dst (AddP dst src));
8356   effect(KILL cr);
8357 
8358   format %{ "addq    $dst, $src\t# ptr" %}
8359   opcode(0x81, 0x00); /* /0 id */
8360   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8361   ins_pipe( ialu_reg );
8362 %}
8363 
8364 // XXX addP mem ops ????
8365 
8366 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8367 %{
8368   match(Set dst (AddP src0 src1));
8369 
8370   ins_cost(110);
8371   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8372   opcode(0x8D); /* 0x8D /r */
8373   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8374   ins_pipe(ialu_reg_reg);
8375 %}
8376 
8377 instruct checkCastPP(rRegP dst)
8378 %{
8379   match(Set dst (CheckCastPP dst));
8380 
8381   size(0);
8382   format %{ "# checkcastPP of $dst" %}
8383   ins_encode(/* empty encoding */);
8384   ins_pipe(empty);
8385 %}
8386 
8387 instruct castPP(rRegP dst)
8388 %{
8389   match(Set dst (CastPP dst));
8390 
8391   size(0);
8392   format %{ "# castPP of $dst" %}
8393   ins_encode(/* empty encoding */);
8394   ins_pipe(empty);
8395 %}
8396 
8397 instruct castII(rRegI dst)
8398 %{
8399   match(Set dst (CastII dst));
8400 
8401   size(0);
8402   format %{ "# castII of $dst" %}
8403   ins_encode(/* empty encoding */);
8404   ins_cost(0);
8405   ins_pipe(empty);
8406 %}
8407 
8408 // LoadP-locked same as a regular LoadP when used with compare-swap
8409 instruct loadPLocked(rRegP dst, memory mem)
8410 %{
8411   match(Set dst (LoadPLocked mem));
8412 
8413   ins_cost(125); // XXX
8414   format %{ "movq    $dst, $mem\t# ptr locked" %}
8415   opcode(0x8B);
8416   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8417   ins_pipe(ialu_reg_mem); // XXX
8418 %}
8419 
8420 // LoadL-locked - same as a regular LoadL when used with compare-swap
8421 instruct loadLLocked(rRegL dst, memory mem)
8422 %{
8423   match(Set dst (LoadLLocked mem));
8424 
8425   ins_cost(125); // XXX
8426   format %{ "movq    $dst, $mem\t# long locked" %}
8427   opcode(0x8B);
8428   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8429   ins_pipe(ialu_reg_mem); // XXX
8430 %}
8431 
8432 // Conditional-store of the updated heap-top.
8433 // Used during allocation of the shared heap.
8434 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8435 
8436 instruct storePConditional(memory heap_top_ptr,
8437                            rax_RegP oldval, rRegP newval,
8438                            rFlagsReg cr)
8439 %{
8440   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8441  
8442   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8443             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8444   opcode(0x0F, 0xB1);
8445   ins_encode(lock_prefix,
8446              REX_reg_mem_wide(newval, heap_top_ptr),
8447              OpcP, OpcS,
8448              reg_mem(newval, heap_top_ptr));
8449   ins_pipe(pipe_cmpxchg);
8450 %}
8451 
8452 // Conditional-store of an int value.
8453 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8454 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8455 %{
8456   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8457   effect(KILL oldval);
8458 
8459   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8460   opcode(0x0F, 0xB1);
8461   ins_encode(lock_prefix,
8462              REX_reg_mem(newval, mem),
8463              OpcP, OpcS,
8464              reg_mem(newval, mem));
8465   ins_pipe(pipe_cmpxchg);
8466 %}
8467 
8468 // Conditional-store of a long value.
8469 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8470 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8471 %{
8472   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8473   effect(KILL oldval);
8474 
8475   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8476   opcode(0x0F, 0xB1);
8477   ins_encode(lock_prefix,
8478              REX_reg_mem_wide(newval, mem),
8479              OpcP, OpcS,
8480              reg_mem(newval, mem));
8481   ins_pipe(pipe_cmpxchg);
8482 %}
8483 
8484 
8485 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8486 instruct compareAndSwapP(rRegI res,
8487                          memory mem_ptr,
8488                          rax_RegP oldval, rRegP newval,
8489                          rFlagsReg cr)
8490 %{
8491   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8492   effect(KILL cr, KILL oldval);
8493 
8494   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8495             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8496             "sete    $res\n\t"
8497             "movzbl  $res, $res" %}
8498   opcode(0x0F, 0xB1);
8499   ins_encode(lock_prefix,
8500              REX_reg_mem_wide(newval, mem_ptr),
8501              OpcP, OpcS,
8502              reg_mem(newval, mem_ptr),
8503              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8504              REX_reg_breg(res, res), // movzbl
8505              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8506   ins_pipe( pipe_cmpxchg );
8507 %}
8508 
8509 instruct compareAndSwapL(rRegI res,
8510                          memory mem_ptr,
8511                          rax_RegL oldval, rRegL newval,
8512                          rFlagsReg cr)
8513 %{
8514   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8515   effect(KILL cr, KILL oldval);
8516 
8517   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8518             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8519             "sete    $res\n\t"
8520             "movzbl  $res, $res" %}
8521   opcode(0x0F, 0xB1);
8522   ins_encode(lock_prefix,
8523              REX_reg_mem_wide(newval, mem_ptr),
8524              OpcP, OpcS,
8525              reg_mem(newval, mem_ptr),
8526              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8527              REX_reg_breg(res, res), // movzbl
8528              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8529   ins_pipe( pipe_cmpxchg );
8530 %}
8531 
8532 instruct compareAndSwapI(rRegI res,
8533                          memory mem_ptr,
8534                          rax_RegI oldval, rRegI newval,
8535                          rFlagsReg cr)
8536 %{
8537   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8538   effect(KILL cr, KILL oldval);
8539 
8540   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8541             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8542             "sete    $res\n\t"
8543             "movzbl  $res, $res" %}
8544   opcode(0x0F, 0xB1);
8545   ins_encode(lock_prefix,
8546              REX_reg_mem(newval, mem_ptr),
8547              OpcP, OpcS,
8548              reg_mem(newval, mem_ptr),
8549              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8550              REX_reg_breg(res, res), // movzbl
8551              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8552   ins_pipe( pipe_cmpxchg );
8553 %}
8554 
8555 
8556 instruct compareAndSwapN(rRegI res,
8557                           memory mem_ptr,
8558                           rax_RegN oldval, rRegN newval,
8559                           rFlagsReg cr) %{
8560   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8561   effect(KILL cr, KILL oldval);
8562 
8563   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8564             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8565             "sete    $res\n\t"
8566             "movzbl  $res, $res" %}
8567   opcode(0x0F, 0xB1);
8568   ins_encode(lock_prefix,
8569              REX_reg_mem(newval, mem_ptr),
8570              OpcP, OpcS,
8571              reg_mem(newval, mem_ptr),
8572              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8573              REX_reg_breg(res, res), // movzbl
8574              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8575   ins_pipe( pipe_cmpxchg );
8576 %}
8577 
8578 //----------Subtraction Instructions-------------------------------------------
8579 
8580 // Integer Subtraction Instructions
8581 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8582 %{
8583   match(Set dst (SubI dst src));
8584   effect(KILL cr);
8585 
8586   format %{ "subl    $dst, $src\t# int" %}
8587   opcode(0x2B);
8588   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8589   ins_pipe(ialu_reg_reg);
8590 %}
8591 
8592 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8593 %{
8594   match(Set dst (SubI dst src));
8595   effect(KILL cr);
8596 
8597   format %{ "subl    $dst, $src\t# int" %}
8598   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8599   ins_encode(OpcSErm(dst, src), Con8or32(src));
8600   ins_pipe(ialu_reg);
8601 %}
8602 
8603 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8604 %{
8605   match(Set dst (SubI dst (LoadI src)));
8606   effect(KILL cr);
8607 
8608   ins_cost(125);
8609   format %{ "subl    $dst, $src\t# int" %}
8610   opcode(0x2B);
8611   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8612   ins_pipe(ialu_reg_mem);
8613 %}
8614 
8615 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8616 %{
8617   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8618   effect(KILL cr);
8619 
8620   ins_cost(150);
8621   format %{ "subl    $dst, $src\t# int" %}
8622   opcode(0x29); /* Opcode 29 /r */
8623   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8624   ins_pipe(ialu_mem_reg);
8625 %}
8626 
8627 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8628 %{
8629   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8630   effect(KILL cr);
8631 
8632   ins_cost(125); // XXX
8633   format %{ "subl    $dst, $src\t# int" %}
8634   opcode(0x81); /* Opcode 81 /5 id */
8635   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8636   ins_pipe(ialu_mem_imm);
8637 %}
8638 
8639 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8640 %{
8641   match(Set dst (SubL dst src));
8642   effect(KILL cr);
8643 
8644   format %{ "subq    $dst, $src\t# long" %}
8645   opcode(0x2B);
8646   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8647   ins_pipe(ialu_reg_reg);
8648 %}
8649 
8650 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8651 %{
8652   match(Set dst (SubL dst src));
8653   effect(KILL cr);
8654 
8655   format %{ "subq    $dst, $src\t# long" %}
8656   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8657   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8658   ins_pipe(ialu_reg);
8659 %}
8660 
8661 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8662 %{
8663   match(Set dst (SubL dst (LoadL src)));
8664   effect(KILL cr);
8665 
8666   ins_cost(125);
8667   format %{ "subq    $dst, $src\t# long" %}
8668   opcode(0x2B);
8669   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8670   ins_pipe(ialu_reg_mem);
8671 %}
8672 
8673 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8674 %{
8675   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8676   effect(KILL cr);
8677 
8678   ins_cost(150);
8679   format %{ "subq    $dst, $src\t# long" %}
8680   opcode(0x29); /* Opcode 29 /r */
8681   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8682   ins_pipe(ialu_mem_reg);
8683 %}
8684 
8685 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8686 %{
8687   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8688   effect(KILL cr);
8689 
8690   ins_cost(125); // XXX
8691   format %{ "subq    $dst, $src\t# long" %}
8692   opcode(0x81); /* Opcode 81 /5 id */
8693   ins_encode(REX_mem_wide(dst),
8694              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8695   ins_pipe(ialu_mem_imm);
8696 %}
8697 
8698 // Subtract from a pointer
8699 // XXX hmpf???
8700 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8701 %{
8702   match(Set dst (AddP dst (SubI zero src)));
8703   effect(KILL cr);
8704 
8705   format %{ "subq    $dst, $src\t# ptr - int" %}
8706   opcode(0x2B);
8707   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8708   ins_pipe(ialu_reg_reg);
8709 %}
8710 
8711 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8712 %{
8713   match(Set dst (SubI zero dst));
8714   effect(KILL cr);
8715 
8716   format %{ "negl    $dst\t# int" %}
8717   opcode(0xF7, 0x03);  // Opcode F7 /3
8718   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8719   ins_pipe(ialu_reg);
8720 %}
8721 
8722 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8723 %{
8724   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8725   effect(KILL cr);
8726 
8727   format %{ "negl    $dst\t# int" %}
8728   opcode(0xF7, 0x03);  // Opcode F7 /3
8729   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8730   ins_pipe(ialu_reg);
8731 %}
8732 
8733 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8734 %{
8735   match(Set dst (SubL zero dst));
8736   effect(KILL cr);
8737 
8738   format %{ "negq    $dst\t# long" %}
8739   opcode(0xF7, 0x03);  // Opcode F7 /3
8740   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8741   ins_pipe(ialu_reg);
8742 %}
8743 
8744 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8745 %{
8746   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8747   effect(KILL cr);
8748 
8749   format %{ "negq    $dst\t# long" %}
8750   opcode(0xF7, 0x03);  // Opcode F7 /3
8751   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8752   ins_pipe(ialu_reg);
8753 %}
8754 
8755 
8756 //----------Multiplication/Division Instructions-------------------------------
8757 // Integer Multiplication Instructions
8758 // Multiply Register
8759 
8760 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8761 %{
8762   match(Set dst (MulI dst src));
8763   effect(KILL cr);
8764 
8765   ins_cost(300);
8766   format %{ "imull   $dst, $src\t# int" %}
8767   opcode(0x0F, 0xAF);
8768   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8769   ins_pipe(ialu_reg_reg_alu0);
8770 %}
8771 
8772 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8773 %{
8774   match(Set dst (MulI src imm));
8775   effect(KILL cr);
8776 
8777   ins_cost(300);
8778   format %{ "imull   $dst, $src, $imm\t# int" %}
8779   opcode(0x69); /* 69 /r id */
8780   ins_encode(REX_reg_reg(dst, src),
8781              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8782   ins_pipe(ialu_reg_reg_alu0);
8783 %}
8784 
8785 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8786 %{
8787   match(Set dst (MulI dst (LoadI src)));
8788   effect(KILL cr);
8789 
8790   ins_cost(350);
8791   format %{ "imull   $dst, $src\t# int" %}
8792   opcode(0x0F, 0xAF);
8793   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8794   ins_pipe(ialu_reg_mem_alu0);
8795 %}
8796 
8797 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8798 %{
8799   match(Set dst (MulI (LoadI src) imm));
8800   effect(KILL cr);
8801 
8802   ins_cost(300);
8803   format %{ "imull   $dst, $src, $imm\t# int" %}
8804   opcode(0x69); /* 69 /r id */
8805   ins_encode(REX_reg_mem(dst, src),
8806              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8807   ins_pipe(ialu_reg_mem_alu0);
8808 %}
8809 
8810 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8811 %{
8812   match(Set dst (MulL dst src));
8813   effect(KILL cr);
8814 
8815   ins_cost(300);
8816   format %{ "imulq   $dst, $src\t# long" %}
8817   opcode(0x0F, 0xAF);
8818   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8819   ins_pipe(ialu_reg_reg_alu0);
8820 %}
8821 
8822 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8823 %{
8824   match(Set dst (MulL src imm));
8825   effect(KILL cr);
8826 
8827   ins_cost(300);
8828   format %{ "imulq   $dst, $src, $imm\t# long" %}
8829   opcode(0x69); /* 69 /r id */
8830   ins_encode(REX_reg_reg_wide(dst, src),
8831              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8832   ins_pipe(ialu_reg_reg_alu0);
8833 %}
8834 
8835 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8836 %{
8837   match(Set dst (MulL dst (LoadL src)));
8838   effect(KILL cr);
8839 
8840   ins_cost(350);
8841   format %{ "imulq   $dst, $src\t# long" %}
8842   opcode(0x0F, 0xAF);
8843   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8844   ins_pipe(ialu_reg_mem_alu0);
8845 %}
8846 
8847 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8848 %{
8849   match(Set dst (MulL (LoadL src) imm));
8850   effect(KILL cr);
8851 
8852   ins_cost(300);
8853   format %{ "imulq   $dst, $src, $imm\t# long" %}
8854   opcode(0x69); /* 69 /r id */
8855   ins_encode(REX_reg_mem_wide(dst, src),
8856              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8857   ins_pipe(ialu_reg_mem_alu0);
8858 %}
8859 
8860 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8861 %{
8862   match(Set dst (MulHiL src rax));
8863   effect(USE_KILL rax, KILL cr);
8864 
8865   ins_cost(300);
8866   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8867   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8868   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8869   ins_pipe(ialu_reg_reg_alu0);
8870 %}
8871 
8872 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8873                    rFlagsReg cr)
8874 %{
8875   match(Set rax (DivI rax div));
8876   effect(KILL rdx, KILL cr);
8877 
8878   ins_cost(30*100+10*100); // XXX
8879   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8880             "jne,s   normal\n\t"
8881             "xorl    rdx, rdx\n\t"
8882             "cmpl    $div, -1\n\t"
8883             "je,s    done\n"
8884     "normal: cdql\n\t"
8885             "idivl   $div\n"
8886     "done:"        %}
8887   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8888   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8889   ins_pipe(ialu_reg_reg_alu0);
8890 %}
8891 
8892 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8893                    rFlagsReg cr)
8894 %{
8895   match(Set rax (DivL rax div));
8896   effect(KILL rdx, KILL cr);
8897 
8898   ins_cost(30*100+10*100); // XXX
8899   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8900             "cmpq    rax, rdx\n\t"
8901             "jne,s   normal\n\t"
8902             "xorl    rdx, rdx\n\t"
8903             "cmpq    $div, -1\n\t"
8904             "je,s    done\n"
8905     "normal: cdqq\n\t"
8906             "idivq   $div\n"
8907     "done:"        %}
8908   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8909   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8910   ins_pipe(ialu_reg_reg_alu0);
8911 %}
8912 
8913 // Integer DIVMOD with Register, both quotient and mod results
8914 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8915                              rFlagsReg cr)
8916 %{
8917   match(DivModI rax div);
8918   effect(KILL cr);
8919 
8920   ins_cost(30*100+10*100); // XXX
8921   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8922             "jne,s   normal\n\t"
8923             "xorl    rdx, rdx\n\t"
8924             "cmpl    $div, -1\n\t"
8925             "je,s    done\n"
8926     "normal: cdql\n\t"
8927             "idivl   $div\n"
8928     "done:"        %}
8929   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8930   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8931   ins_pipe(pipe_slow);
8932 %}
8933 
8934 // Long DIVMOD with Register, both quotient and mod results
8935 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8936                              rFlagsReg cr)
8937 %{
8938   match(DivModL rax div);
8939   effect(KILL cr);
8940 
8941   ins_cost(30*100+10*100); // XXX
8942   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8943             "cmpq    rax, rdx\n\t"
8944             "jne,s   normal\n\t"
8945             "xorl    rdx, rdx\n\t"
8946             "cmpq    $div, -1\n\t"
8947             "je,s    done\n"
8948     "normal: cdqq\n\t"
8949             "idivq   $div\n"
8950     "done:"        %}
8951   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8952   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8953   ins_pipe(pipe_slow);
8954 %}
8955 
8956 //----------- DivL-By-Constant-Expansions--------------------------------------
8957 // DivI cases are handled by the compiler
8958 
8959 // Magic constant, reciprocal of 10
8960 instruct loadConL_0x6666666666666667(rRegL dst)
8961 %{
8962   effect(DEF dst);
8963 
8964   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8965   ins_encode(load_immL(dst, 0x6666666666666667));
8966   ins_pipe(ialu_reg);
8967 %}
8968 
8969 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8970 %{
8971   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8972 
8973   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8974   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8975   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8976   ins_pipe(ialu_reg_reg_alu0);
8977 %}
8978 
8979 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8980 %{
8981   effect(USE_DEF dst, KILL cr);
8982 
8983   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8984   opcode(0xC1, 0x7); /* C1 /7 ib */
8985   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8986   ins_pipe(ialu_reg);
8987 %}
8988 
8989 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8990 %{
8991   effect(USE_DEF dst, KILL cr);
8992 
8993   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8994   opcode(0xC1, 0x7); /* C1 /7 ib */
8995   ins_encode(reg_opc_imm_wide(dst, 0x2));
8996   ins_pipe(ialu_reg);
8997 %}
8998 
8999 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
9000 %{
9001   match(Set dst (DivL src div));
9002 
9003   ins_cost((5+8)*100);
9004   expand %{
9005     rax_RegL rax;                     // Killed temp
9006     rFlagsReg cr;                     // Killed
9007     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
9008     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
9009     sarL_rReg_63(src, cr);            // sarq  src, 63
9010     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
9011     subL_rReg(dst, src, cr);          // subl  rdx, src
9012   %}
9013 %}
9014 
9015 //-----------------------------------------------------------------------------
9016 
9017 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
9018                    rFlagsReg cr)
9019 %{
9020   match(Set rdx (ModI rax div));
9021   effect(KILL rax, KILL cr);
9022 
9023   ins_cost(300); // XXX
9024   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
9025             "jne,s   normal\n\t"
9026             "xorl    rdx, rdx\n\t"
9027             "cmpl    $div, -1\n\t"
9028             "je,s    done\n"
9029     "normal: cdql\n\t"
9030             "idivl   $div\n"
9031     "done:"        %}
9032   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9033   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
9034   ins_pipe(ialu_reg_reg_alu0);
9035 %}
9036 
9037 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
9038                    rFlagsReg cr)
9039 %{
9040   match(Set rdx (ModL rax div));
9041   effect(KILL rax, KILL cr);
9042 
9043   ins_cost(300); // XXX
9044   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
9045             "cmpq    rax, rdx\n\t"
9046             "jne,s   normal\n\t"
9047             "xorl    rdx, rdx\n\t"
9048             "cmpq    $div, -1\n\t"
9049             "je,s    done\n"
9050     "normal: cdqq\n\t"
9051             "idivq   $div\n"
9052     "done:"        %}
9053   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
9054   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
9055   ins_pipe(ialu_reg_reg_alu0);
9056 %}
9057 
9058 // Integer Shift Instructions
9059 // Shift Left by one
9060 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9061 %{
9062   match(Set dst (LShiftI dst shift));
9063   effect(KILL cr);
9064 
9065   format %{ "sall    $dst, $shift" %}
9066   opcode(0xD1, 0x4); /* D1 /4 */
9067   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9068   ins_pipe(ialu_reg);
9069 %}
9070 
9071 // Shift Left by one
9072 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9073 %{
9074   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9075   effect(KILL cr);
9076 
9077   format %{ "sall    $dst, $shift\t" %}
9078   opcode(0xD1, 0x4); /* D1 /4 */
9079   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9080   ins_pipe(ialu_mem_imm);
9081 %}
9082 
9083 // Shift Left by 8-bit immediate
9084 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9085 %{
9086   match(Set dst (LShiftI dst shift));
9087   effect(KILL cr);
9088 
9089   format %{ "sall    $dst, $shift" %}
9090   opcode(0xC1, 0x4); /* C1 /4 ib */
9091   ins_encode(reg_opc_imm(dst, shift));
9092   ins_pipe(ialu_reg);
9093 %}
9094 
9095 // Shift Left by 8-bit immediate
9096 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9097 %{
9098   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9099   effect(KILL cr);
9100 
9101   format %{ "sall    $dst, $shift" %}
9102   opcode(0xC1, 0x4); /* C1 /4 ib */
9103   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9104   ins_pipe(ialu_mem_imm);
9105 %}
9106 
9107 // Shift Left by variable
9108 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9109 %{
9110   match(Set dst (LShiftI dst shift));
9111   effect(KILL cr);
9112 
9113   format %{ "sall    $dst, $shift" %}
9114   opcode(0xD3, 0x4); /* D3 /4 */
9115   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9116   ins_pipe(ialu_reg_reg);
9117 %}
9118 
9119 // Shift Left by variable
9120 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9121 %{
9122   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9123   effect(KILL cr);
9124 
9125   format %{ "sall    $dst, $shift" %}
9126   opcode(0xD3, 0x4); /* D3 /4 */
9127   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9128   ins_pipe(ialu_mem_reg);
9129 %}
9130 
9131 // Arithmetic shift right by one
9132 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9133 %{
9134   match(Set dst (RShiftI dst shift));
9135   effect(KILL cr);
9136 
9137   format %{ "sarl    $dst, $shift" %}
9138   opcode(0xD1, 0x7); /* D1 /7 */
9139   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9140   ins_pipe(ialu_reg);
9141 %}
9142 
9143 // Arithmetic shift right by one
9144 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9145 %{
9146   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9147   effect(KILL cr);
9148 
9149   format %{ "sarl    $dst, $shift" %}
9150   opcode(0xD1, 0x7); /* D1 /7 */
9151   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9152   ins_pipe(ialu_mem_imm);
9153 %}
9154 
9155 // Arithmetic Shift Right by 8-bit immediate
9156 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9157 %{
9158   match(Set dst (RShiftI dst shift));
9159   effect(KILL cr);
9160 
9161   format %{ "sarl    $dst, $shift" %}
9162   opcode(0xC1, 0x7); /* C1 /7 ib */
9163   ins_encode(reg_opc_imm(dst, shift));
9164   ins_pipe(ialu_mem_imm);
9165 %}
9166 
9167 // Arithmetic Shift Right by 8-bit immediate
9168 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9169 %{
9170   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9171   effect(KILL cr);
9172 
9173   format %{ "sarl    $dst, $shift" %}
9174   opcode(0xC1, 0x7); /* C1 /7 ib */
9175   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9176   ins_pipe(ialu_mem_imm);
9177 %}
9178 
9179 // Arithmetic Shift Right by variable
9180 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9181 %{
9182   match(Set dst (RShiftI dst shift));
9183   effect(KILL cr);
9184 
9185   format %{ "sarl    $dst, $shift" %}
9186   opcode(0xD3, 0x7); /* D3 /7 */
9187   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9188   ins_pipe(ialu_reg_reg);
9189 %}
9190 
9191 // Arithmetic Shift Right by variable
9192 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9193 %{
9194   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9195   effect(KILL cr);
9196 
9197   format %{ "sarl    $dst, $shift" %}
9198   opcode(0xD3, 0x7); /* D3 /7 */
9199   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9200   ins_pipe(ialu_mem_reg);
9201 %}
9202 
9203 // Logical shift right by one
9204 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9205 %{
9206   match(Set dst (URShiftI dst shift));
9207   effect(KILL cr);
9208 
9209   format %{ "shrl    $dst, $shift" %}
9210   opcode(0xD1, 0x5); /* D1 /5 */
9211   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9212   ins_pipe(ialu_reg);
9213 %}
9214 
9215 // Logical shift right by one
9216 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9217 %{
9218   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9219   effect(KILL cr);
9220 
9221   format %{ "shrl    $dst, $shift" %}
9222   opcode(0xD1, 0x5); /* D1 /5 */
9223   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9224   ins_pipe(ialu_mem_imm);
9225 %}
9226 
9227 // Logical Shift Right by 8-bit immediate
9228 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9229 %{
9230   match(Set dst (URShiftI dst shift));
9231   effect(KILL cr);
9232 
9233   format %{ "shrl    $dst, $shift" %}
9234   opcode(0xC1, 0x5); /* C1 /5 ib */
9235   ins_encode(reg_opc_imm(dst, shift));
9236   ins_pipe(ialu_reg);
9237 %}
9238 
9239 // Logical Shift Right by 8-bit immediate
9240 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9241 %{
9242   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9243   effect(KILL cr);
9244 
9245   format %{ "shrl    $dst, $shift" %}
9246   opcode(0xC1, 0x5); /* C1 /5 ib */
9247   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9248   ins_pipe(ialu_mem_imm);
9249 %}
9250 
9251 // Logical Shift Right by variable
9252 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9253 %{
9254   match(Set dst (URShiftI dst shift));
9255   effect(KILL cr);
9256 
9257   format %{ "shrl    $dst, $shift" %}
9258   opcode(0xD3, 0x5); /* D3 /5 */
9259   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9260   ins_pipe(ialu_reg_reg);
9261 %}
9262 
9263 // Logical Shift Right by variable
9264 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9265 %{
9266   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9267   effect(KILL cr);
9268 
9269   format %{ "shrl    $dst, $shift" %}
9270   opcode(0xD3, 0x5); /* D3 /5 */
9271   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9272   ins_pipe(ialu_mem_reg);
9273 %}
9274 
9275 // Long Shift Instructions
9276 // Shift Left by one
9277 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9278 %{
9279   match(Set dst (LShiftL dst shift));
9280   effect(KILL cr);
9281 
9282   format %{ "salq    $dst, $shift" %}
9283   opcode(0xD1, 0x4); /* D1 /4 */
9284   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9285   ins_pipe(ialu_reg);
9286 %}
9287 
9288 // Shift Left by one
9289 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9290 %{
9291   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9292   effect(KILL cr);
9293 
9294   format %{ "salq    $dst, $shift" %}
9295   opcode(0xD1, 0x4); /* D1 /4 */
9296   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9297   ins_pipe(ialu_mem_imm);
9298 %}
9299 
9300 // Shift Left by 8-bit immediate
9301 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9302 %{
9303   match(Set dst (LShiftL dst shift));
9304   effect(KILL cr);
9305 
9306   format %{ "salq    $dst, $shift" %}
9307   opcode(0xC1, 0x4); /* C1 /4 ib */
9308   ins_encode(reg_opc_imm_wide(dst, shift));
9309   ins_pipe(ialu_reg);
9310 %}
9311 
9312 // Shift Left by 8-bit immediate
9313 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9314 %{
9315   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9316   effect(KILL cr);
9317 
9318   format %{ "salq    $dst, $shift" %}
9319   opcode(0xC1, 0x4); /* C1 /4 ib */
9320   ins_encode(REX_mem_wide(dst), OpcP,
9321              RM_opc_mem(secondary, dst), Con8or32(shift));
9322   ins_pipe(ialu_mem_imm);
9323 %}
9324 
9325 // Shift Left by variable
9326 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9327 %{
9328   match(Set dst (LShiftL dst shift));
9329   effect(KILL cr);
9330 
9331   format %{ "salq    $dst, $shift" %}
9332   opcode(0xD3, 0x4); /* D3 /4 */
9333   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9334   ins_pipe(ialu_reg_reg);
9335 %}
9336 
9337 // Shift Left by variable
9338 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9339 %{
9340   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9341   effect(KILL cr);
9342 
9343   format %{ "salq    $dst, $shift" %}
9344   opcode(0xD3, 0x4); /* D3 /4 */
9345   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9346   ins_pipe(ialu_mem_reg);
9347 %}
9348 
9349 // Arithmetic shift right by one
9350 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9351 %{
9352   match(Set dst (RShiftL dst shift));
9353   effect(KILL cr);
9354 
9355   format %{ "sarq    $dst, $shift" %}
9356   opcode(0xD1, 0x7); /* D1 /7 */
9357   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9358   ins_pipe(ialu_reg);
9359 %}
9360 
9361 // Arithmetic shift right by one
9362 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9363 %{
9364   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9365   effect(KILL cr);
9366 
9367   format %{ "sarq    $dst, $shift" %}
9368   opcode(0xD1, 0x7); /* D1 /7 */
9369   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9370   ins_pipe(ialu_mem_imm);
9371 %}
9372 
9373 // Arithmetic Shift Right by 8-bit immediate
9374 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9375 %{
9376   match(Set dst (RShiftL dst shift));
9377   effect(KILL cr);
9378 
9379   format %{ "sarq    $dst, $shift" %}
9380   opcode(0xC1, 0x7); /* C1 /7 ib */
9381   ins_encode(reg_opc_imm_wide(dst, shift));
9382   ins_pipe(ialu_mem_imm);
9383 %}
9384 
9385 // Arithmetic Shift Right by 8-bit immediate
9386 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9387 %{
9388   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9389   effect(KILL cr);
9390 
9391   format %{ "sarq    $dst, $shift" %}
9392   opcode(0xC1, 0x7); /* C1 /7 ib */
9393   ins_encode(REX_mem_wide(dst), OpcP,
9394              RM_opc_mem(secondary, dst), Con8or32(shift));
9395   ins_pipe(ialu_mem_imm);
9396 %}
9397 
9398 // Arithmetic Shift Right by variable
9399 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9400 %{
9401   match(Set dst (RShiftL dst shift));
9402   effect(KILL cr);
9403 
9404   format %{ "sarq    $dst, $shift" %}
9405   opcode(0xD3, 0x7); /* D3 /7 */
9406   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9407   ins_pipe(ialu_reg_reg);
9408 %}
9409 
9410 // Arithmetic Shift Right by variable
9411 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9412 %{
9413   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9414   effect(KILL cr);
9415 
9416   format %{ "sarq    $dst, $shift" %}
9417   opcode(0xD3, 0x7); /* D3 /7 */
9418   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9419   ins_pipe(ialu_mem_reg);
9420 %}
9421 
9422 // Logical shift right by one
9423 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9424 %{
9425   match(Set dst (URShiftL dst shift));
9426   effect(KILL cr);
9427 
9428   format %{ "shrq    $dst, $shift" %}
9429   opcode(0xD1, 0x5); /* D1 /5 */
9430   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9431   ins_pipe(ialu_reg);
9432 %}
9433 
9434 // Logical shift right by one
9435 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9436 %{
9437   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9438   effect(KILL cr);
9439 
9440   format %{ "shrq    $dst, $shift" %}
9441   opcode(0xD1, 0x5); /* D1 /5 */
9442   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9443   ins_pipe(ialu_mem_imm);
9444 %}
9445 
9446 // Logical Shift Right by 8-bit immediate
9447 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9448 %{
9449   match(Set dst (URShiftL dst shift));
9450   effect(KILL cr);
9451 
9452   format %{ "shrq    $dst, $shift" %}
9453   opcode(0xC1, 0x5); /* C1 /5 ib */
9454   ins_encode(reg_opc_imm_wide(dst, shift));
9455   ins_pipe(ialu_reg);
9456 %}
9457 
9458 
9459 // Logical Shift Right by 8-bit immediate
9460 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9461 %{
9462   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9463   effect(KILL cr);
9464 
9465   format %{ "shrq    $dst, $shift" %}
9466   opcode(0xC1, 0x5); /* C1 /5 ib */
9467   ins_encode(REX_mem_wide(dst), OpcP,
9468              RM_opc_mem(secondary, dst), Con8or32(shift));
9469   ins_pipe(ialu_mem_imm);
9470 %}
9471 
9472 // Logical Shift Right by variable
9473 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9474 %{
9475   match(Set dst (URShiftL dst shift));
9476   effect(KILL cr);
9477 
9478   format %{ "shrq    $dst, $shift" %}
9479   opcode(0xD3, 0x5); /* D3 /5 */
9480   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9481   ins_pipe(ialu_reg_reg);
9482 %}
9483 
9484 // Logical Shift Right by variable
9485 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9486 %{
9487   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9488   effect(KILL cr);
9489 
9490   format %{ "shrq    $dst, $shift" %}
9491   opcode(0xD3, 0x5); /* D3 /5 */
9492   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9493   ins_pipe(ialu_mem_reg);
9494 %}
9495 
9496 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9497 // This idiom is used by the compiler for the i2b bytecode.
9498 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9499 %{
9500   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9501 
9502   format %{ "movsbl  $dst, $src\t# i2b" %}
9503   opcode(0x0F, 0xBE);
9504   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9505   ins_pipe(ialu_reg_reg);
9506 %}
9507 
9508 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9509 // This idiom is used by the compiler the i2s bytecode.
9510 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9511 %{
9512   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9513 
9514   format %{ "movswl  $dst, $src\t# i2s" %}
9515   opcode(0x0F, 0xBF);
9516   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9517   ins_pipe(ialu_reg_reg);
9518 %}
9519 
9520 // ROL/ROR instructions
9521 
9522 // ROL expand
9523 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9524   effect(KILL cr, USE_DEF dst);
9525 
9526   format %{ "roll    $dst" %}
9527   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9528   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9529   ins_pipe(ialu_reg);
9530 %}
9531 
9532 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9533   effect(USE_DEF dst, USE shift, KILL cr);
9534 
9535   format %{ "roll    $dst, $shift" %}
9536   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9537   ins_encode( reg_opc_imm(dst, shift) );
9538   ins_pipe(ialu_reg);
9539 %}
9540 
9541 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9542 %{
9543   effect(USE_DEF dst, USE shift, KILL cr);
9544 
9545   format %{ "roll    $dst, $shift" %}
9546   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9547   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9548   ins_pipe(ialu_reg_reg);
9549 %}
9550 // end of ROL expand
9551 
9552 // Rotate Left by one
9553 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9554 %{
9555   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9556 
9557   expand %{
9558     rolI_rReg_imm1(dst, cr);
9559   %}
9560 %}
9561 
9562 // Rotate Left by 8-bit immediate
9563 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9564 %{
9565   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9566   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9567 
9568   expand %{
9569     rolI_rReg_imm8(dst, lshift, cr);
9570   %}
9571 %}
9572 
9573 // Rotate Left by variable
9574 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9575 %{
9576   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9577 
9578   expand %{
9579     rolI_rReg_CL(dst, shift, cr);
9580   %}
9581 %}
9582 
9583 // Rotate Left by variable
9584 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9585 %{
9586   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9587 
9588   expand %{
9589     rolI_rReg_CL(dst, shift, cr);
9590   %}
9591 %}
9592 
9593 // ROR expand
9594 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9595 %{
9596   effect(USE_DEF dst, KILL cr);
9597 
9598   format %{ "rorl    $dst" %}
9599   opcode(0xD1, 0x1); /* D1 /1 */
9600   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9601   ins_pipe(ialu_reg);
9602 %}
9603 
9604 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9605 %{
9606   effect(USE_DEF dst, USE shift, KILL cr);
9607 
9608   format %{ "rorl    $dst, $shift" %}
9609   opcode(0xC1, 0x1); /* C1 /1 ib */
9610   ins_encode(reg_opc_imm(dst, shift));
9611   ins_pipe(ialu_reg);
9612 %}
9613 
9614 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9615 %{
9616   effect(USE_DEF dst, USE shift, KILL cr);
9617 
9618   format %{ "rorl    $dst, $shift" %}
9619   opcode(0xD3, 0x1); /* D3 /1 */
9620   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9621   ins_pipe(ialu_reg_reg);
9622 %}
9623 // end of ROR expand
9624 
9625 // Rotate Right by one
9626 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9627 %{
9628   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9629 
9630   expand %{
9631     rorI_rReg_imm1(dst, cr);
9632   %}
9633 %}
9634 
9635 // Rotate Right by 8-bit immediate
9636 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9637 %{
9638   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9639   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9640 
9641   expand %{
9642     rorI_rReg_imm8(dst, rshift, cr);
9643   %}
9644 %}
9645 
9646 // Rotate Right by variable
9647 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9648 %{
9649   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9650 
9651   expand %{
9652     rorI_rReg_CL(dst, shift, cr);
9653   %}
9654 %}
9655 
9656 // Rotate Right by variable
9657 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9658 %{
9659   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9660 
9661   expand %{
9662     rorI_rReg_CL(dst, shift, cr);
9663   %}
9664 %}
9665 
9666 // for long rotate
9667 // ROL expand
9668 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9669   effect(USE_DEF dst, KILL cr);
9670 
9671   format %{ "rolq    $dst" %}
9672   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9673   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9674   ins_pipe(ialu_reg);
9675 %}
9676 
9677 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9678   effect(USE_DEF dst, USE shift, KILL cr);
9679 
9680   format %{ "rolq    $dst, $shift" %}
9681   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9682   ins_encode( reg_opc_imm_wide(dst, shift) );
9683   ins_pipe(ialu_reg);
9684 %}
9685 
9686 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9687 %{
9688   effect(USE_DEF dst, USE shift, KILL cr);
9689 
9690   format %{ "rolq    $dst, $shift" %}
9691   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9692   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9693   ins_pipe(ialu_reg_reg);
9694 %}
9695 // end of ROL expand
9696 
9697 // Rotate Left by one
9698 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9699 %{
9700   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9701 
9702   expand %{
9703     rolL_rReg_imm1(dst, cr);
9704   %}
9705 %}
9706 
9707 // Rotate Left by 8-bit immediate
9708 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9709 %{
9710   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9711   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9712 
9713   expand %{
9714     rolL_rReg_imm8(dst, lshift, cr);
9715   %}
9716 %}
9717 
9718 // Rotate Left by variable
9719 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9720 %{
9721   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9722 
9723   expand %{
9724     rolL_rReg_CL(dst, shift, cr);
9725   %}
9726 %}
9727 
9728 // Rotate Left by variable
9729 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9730 %{
9731   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9732 
9733   expand %{
9734     rolL_rReg_CL(dst, shift, cr);
9735   %}
9736 %}
9737 
9738 // ROR expand
9739 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9740 %{
9741   effect(USE_DEF dst, KILL cr);
9742 
9743   format %{ "rorq    $dst" %}
9744   opcode(0xD1, 0x1); /* D1 /1 */
9745   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9746   ins_pipe(ialu_reg);
9747 %}
9748 
9749 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9750 %{
9751   effect(USE_DEF dst, USE shift, KILL cr);
9752 
9753   format %{ "rorq    $dst, $shift" %}
9754   opcode(0xC1, 0x1); /* C1 /1 ib */
9755   ins_encode(reg_opc_imm_wide(dst, shift));
9756   ins_pipe(ialu_reg);
9757 %}
9758 
9759 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9760 %{
9761   effect(USE_DEF dst, USE shift, KILL cr);
9762 
9763   format %{ "rorq    $dst, $shift" %}
9764   opcode(0xD3, 0x1); /* D3 /1 */
9765   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9766   ins_pipe(ialu_reg_reg);
9767 %}
9768 // end of ROR expand
9769 
9770 // Rotate Right by one
9771 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9772 %{
9773   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9774 
9775   expand %{
9776     rorL_rReg_imm1(dst, cr);
9777   %}
9778 %}
9779 
9780 // Rotate Right by 8-bit immediate
9781 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9782 %{
9783   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9784   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9785 
9786   expand %{
9787     rorL_rReg_imm8(dst, rshift, cr);
9788   %}
9789 %}
9790 
9791 // Rotate Right by variable
9792 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9793 %{
9794   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9795 
9796   expand %{
9797     rorL_rReg_CL(dst, shift, cr);
9798   %}
9799 %}
9800 
9801 // Rotate Right by variable
9802 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9803 %{
9804   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9805 
9806   expand %{
9807     rorL_rReg_CL(dst, shift, cr);
9808   %}
9809 %}
9810 
9811 // Logical Instructions
9812 
9813 // Integer Logical Instructions
9814 
9815 // And Instructions
9816 // And Register with Register
9817 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9818 %{
9819   match(Set dst (AndI dst src));
9820   effect(KILL cr);
9821 
9822   format %{ "andl    $dst, $src\t# int" %}
9823   opcode(0x23);
9824   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9825   ins_pipe(ialu_reg_reg);
9826 %}
9827 
9828 // And Register with Immediate 255
9829 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9830 %{
9831   match(Set dst (AndI dst src));
9832 
9833   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9834   opcode(0x0F, 0xB6);
9835   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9836   ins_pipe(ialu_reg);
9837 %}
9838 
9839 // And Register with Immediate 255 and promote to long
9840 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9841 %{
9842   match(Set dst (ConvI2L (AndI src mask)));
9843 
9844   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9845   opcode(0x0F, 0xB6);
9846   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9847   ins_pipe(ialu_reg);
9848 %}
9849 
9850 // And Register with Immediate 65535
9851 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9852 %{
9853   match(Set dst (AndI dst src));
9854 
9855   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9856   opcode(0x0F, 0xB7);
9857   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9858   ins_pipe(ialu_reg);
9859 %}
9860 
9861 // And Register with Immediate 65535 and promote to long
9862 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9863 %{
9864   match(Set dst (ConvI2L (AndI src mask)));
9865 
9866   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9867   opcode(0x0F, 0xB7);
9868   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9869   ins_pipe(ialu_reg);
9870 %}
9871 
9872 // And Register with Immediate
9873 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9874 %{
9875   match(Set dst (AndI dst src));
9876   effect(KILL cr);
9877 
9878   format %{ "andl    $dst, $src\t# int" %}
9879   opcode(0x81, 0x04); /* Opcode 81 /4 */
9880   ins_encode(OpcSErm(dst, src), Con8or32(src));
9881   ins_pipe(ialu_reg);
9882 %}
9883 
9884 // And Register with Memory
9885 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9886 %{
9887   match(Set dst (AndI dst (LoadI src)));
9888   effect(KILL cr);
9889 
9890   ins_cost(125);
9891   format %{ "andl    $dst, $src\t# int" %}
9892   opcode(0x23);
9893   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9894   ins_pipe(ialu_reg_mem);
9895 %}
9896 
9897 // And Memory with Register
9898 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9899 %{
9900   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9901   effect(KILL cr);
9902 
9903   ins_cost(150);
9904   format %{ "andl    $dst, $src\t# int" %}
9905   opcode(0x21); /* Opcode 21 /r */
9906   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9907   ins_pipe(ialu_mem_reg);
9908 %}
9909 
9910 // And Memory with Immediate
9911 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9912 %{
9913   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9914   effect(KILL cr);
9915 
9916   ins_cost(125);
9917   format %{ "andl    $dst, $src\t# int" %}
9918   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9919   ins_encode(REX_mem(dst), OpcSE(src),
9920              RM_opc_mem(secondary, dst), Con8or32(src));
9921   ins_pipe(ialu_mem_imm);
9922 %}
9923 
9924 // Or Instructions
9925 // Or Register with Register
9926 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9927 %{
9928   match(Set dst (OrI dst src));
9929   effect(KILL cr);
9930 
9931   format %{ "orl     $dst, $src\t# int" %}
9932   opcode(0x0B);
9933   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9934   ins_pipe(ialu_reg_reg);
9935 %}
9936 
9937 // Or Register with Immediate
9938 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9939 %{
9940   match(Set dst (OrI dst src));
9941   effect(KILL cr);
9942 
9943   format %{ "orl     $dst, $src\t# int" %}
9944   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9945   ins_encode(OpcSErm(dst, src), Con8or32(src));
9946   ins_pipe(ialu_reg);
9947 %}
9948 
9949 // Or Register with Memory
9950 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9951 %{
9952   match(Set dst (OrI dst (LoadI src)));
9953   effect(KILL cr);
9954 
9955   ins_cost(125);
9956   format %{ "orl     $dst, $src\t# int" %}
9957   opcode(0x0B);
9958   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9959   ins_pipe(ialu_reg_mem);
9960 %}
9961 
9962 // Or Memory with Register
9963 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9964 %{
9965   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9966   effect(KILL cr);
9967 
9968   ins_cost(150);
9969   format %{ "orl     $dst, $src\t# int" %}
9970   opcode(0x09); /* Opcode 09 /r */
9971   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9972   ins_pipe(ialu_mem_reg);
9973 %}
9974 
9975 // Or Memory with Immediate
9976 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9977 %{
9978   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9979   effect(KILL cr);
9980 
9981   ins_cost(125);
9982   format %{ "orl     $dst, $src\t# int" %}
9983   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9984   ins_encode(REX_mem(dst), OpcSE(src),
9985              RM_opc_mem(secondary, dst), Con8or32(src));
9986   ins_pipe(ialu_mem_imm);
9987 %}
9988 
9989 // Xor Instructions
9990 // Xor Register with Register
9991 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9992 %{
9993   match(Set dst (XorI dst src));
9994   effect(KILL cr);
9995 
9996   format %{ "xorl    $dst, $src\t# int" %}
9997   opcode(0x33);
9998   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9999   ins_pipe(ialu_reg_reg);
10000 %}
10001 
10002 // Xor Register with Immediate -1
10003 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10004   match(Set dst (XorI dst imm));  
10005 
10006   format %{ "not    $dst" %}  
10007   ins_encode %{
10008      __ notl($dst$$Register);
10009   %}
10010   ins_pipe(ialu_reg);
10011 %}
10012 
10013 // Xor Register with Immediate
10014 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10015 %{
10016   match(Set dst (XorI dst src));
10017   effect(KILL cr);
10018 
10019   format %{ "xorl    $dst, $src\t# int" %}
10020   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10021   ins_encode(OpcSErm(dst, src), Con8or32(src));
10022   ins_pipe(ialu_reg);
10023 %}
10024 
10025 // Xor Register with Memory
10026 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10027 %{
10028   match(Set dst (XorI dst (LoadI src)));
10029   effect(KILL cr);
10030 
10031   ins_cost(125);
10032   format %{ "xorl    $dst, $src\t# int" %}
10033   opcode(0x33);
10034   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
10035   ins_pipe(ialu_reg_mem);
10036 %}
10037 
10038 // Xor Memory with Register
10039 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10040 %{
10041   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10042   effect(KILL cr);
10043 
10044   ins_cost(150);
10045   format %{ "xorl    $dst, $src\t# int" %}
10046   opcode(0x31); /* Opcode 31 /r */
10047   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
10048   ins_pipe(ialu_mem_reg);
10049 %}
10050 
10051 // Xor Memory with Immediate
10052 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10053 %{
10054   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10055   effect(KILL cr);
10056 
10057   ins_cost(125);
10058   format %{ "xorl    $dst, $src\t# int" %}
10059   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10060   ins_encode(REX_mem(dst), OpcSE(src),
10061              RM_opc_mem(secondary, dst), Con8or32(src));
10062   ins_pipe(ialu_mem_imm);
10063 %}
10064 
10065 
10066 // Long Logical Instructions
10067 
10068 // And Instructions
10069 // And Register with Register
10070 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10071 %{
10072   match(Set dst (AndL dst src));
10073   effect(KILL cr);
10074 
10075   format %{ "andq    $dst, $src\t# long" %}
10076   opcode(0x23);
10077   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10078   ins_pipe(ialu_reg_reg);
10079 %}
10080 
10081 // And Register with Immediate 255
10082 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10083 %{
10084   match(Set dst (AndL dst src));
10085 
10086   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10087   opcode(0x0F, 0xB6);
10088   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10089   ins_pipe(ialu_reg);
10090 %}
10091 
10092 // And Register with Immediate 65535
10093 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10094 %{
10095   match(Set dst (AndL dst src));
10096 
10097   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10098   opcode(0x0F, 0xB7);
10099   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10100   ins_pipe(ialu_reg);
10101 %}
10102 
10103 // And Register with Immediate
10104 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10105 %{
10106   match(Set dst (AndL dst src));
10107   effect(KILL cr);
10108 
10109   format %{ "andq    $dst, $src\t# long" %}
10110   opcode(0x81, 0x04); /* Opcode 81 /4 */
10111   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10112   ins_pipe(ialu_reg);
10113 %}
10114 
10115 // And Register with Memory
10116 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10117 %{
10118   match(Set dst (AndL dst (LoadL src)));
10119   effect(KILL cr);
10120 
10121   ins_cost(125);
10122   format %{ "andq    $dst, $src\t# long" %}
10123   opcode(0x23);
10124   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10125   ins_pipe(ialu_reg_mem);
10126 %}
10127 
10128 // And Memory with Register
10129 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10130 %{
10131   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10132   effect(KILL cr);
10133 
10134   ins_cost(150);
10135   format %{ "andq    $dst, $src\t# long" %}
10136   opcode(0x21); /* Opcode 21 /r */
10137   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10138   ins_pipe(ialu_mem_reg);
10139 %}
10140 
10141 // And Memory with Immediate
10142 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10143 %{
10144   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10145   effect(KILL cr);
10146 
10147   ins_cost(125);
10148   format %{ "andq    $dst, $src\t# long" %}
10149   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10150   ins_encode(REX_mem_wide(dst), OpcSE(src),
10151              RM_opc_mem(secondary, dst), Con8or32(src));
10152   ins_pipe(ialu_mem_imm);
10153 %}
10154 
10155 // Or Instructions
10156 // Or Register with Register
10157 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10158 %{
10159   match(Set dst (OrL dst src));
10160   effect(KILL cr);
10161 
10162   format %{ "orq     $dst, $src\t# long" %}
10163   opcode(0x0B);
10164   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10165   ins_pipe(ialu_reg_reg);
10166 %}
10167 
10168 // Use any_RegP to match R15 (TLS register) without spilling.
10169 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10170   match(Set dst (OrL dst (CastP2X src)));
10171   effect(KILL cr);
10172 
10173   format %{ "orq     $dst, $src\t# long" %}
10174   opcode(0x0B);
10175   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10176   ins_pipe(ialu_reg_reg);
10177 %}
10178 
10179 
10180 // Or Register with Immediate
10181 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10182 %{
10183   match(Set dst (OrL dst src));
10184   effect(KILL cr);
10185 
10186   format %{ "orq     $dst, $src\t# long" %}
10187   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10188   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10189   ins_pipe(ialu_reg);
10190 %}
10191 
10192 // Or Register with Memory
10193 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10194 %{
10195   match(Set dst (OrL dst (LoadL src)));
10196   effect(KILL cr);
10197 
10198   ins_cost(125);
10199   format %{ "orq     $dst, $src\t# long" %}
10200   opcode(0x0B);
10201   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10202   ins_pipe(ialu_reg_mem);
10203 %}
10204 
10205 // Or Memory with Register
10206 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10207 %{
10208   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10209   effect(KILL cr);
10210 
10211   ins_cost(150);
10212   format %{ "orq     $dst, $src\t# long" %}
10213   opcode(0x09); /* Opcode 09 /r */
10214   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10215   ins_pipe(ialu_mem_reg);
10216 %}
10217 
10218 // Or Memory with Immediate
10219 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10220 %{
10221   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10222   effect(KILL cr);
10223 
10224   ins_cost(125);
10225   format %{ "orq     $dst, $src\t# long" %}
10226   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10227   ins_encode(REX_mem_wide(dst), OpcSE(src),
10228              RM_opc_mem(secondary, dst), Con8or32(src));
10229   ins_pipe(ialu_mem_imm);
10230 %}
10231 
10232 // Xor Instructions
10233 // Xor Register with Register
10234 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10235 %{
10236   match(Set dst (XorL dst src));
10237   effect(KILL cr);
10238 
10239   format %{ "xorq    $dst, $src\t# long" %}
10240   opcode(0x33);
10241   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10242   ins_pipe(ialu_reg_reg);
10243 %}
10244 
10245 // Xor Register with Immediate -1
10246 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10247   match(Set dst (XorL dst imm));  
10248 
10249   format %{ "notq   $dst" %}  
10250   ins_encode %{
10251      __ notq($dst$$Register);
10252   %}
10253   ins_pipe(ialu_reg);
10254 %}
10255 
10256 // Xor Register with Immediate
10257 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10258 %{
10259   match(Set dst (XorL dst src));
10260   effect(KILL cr);
10261 
10262   format %{ "xorq    $dst, $src\t# long" %}
10263   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10264   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10265   ins_pipe(ialu_reg);
10266 %}
10267 
10268 // Xor Register with Memory
10269 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10270 %{
10271   match(Set dst (XorL dst (LoadL src)));
10272   effect(KILL cr);
10273 
10274   ins_cost(125);
10275   format %{ "xorq    $dst, $src\t# long" %}
10276   opcode(0x33);
10277   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10278   ins_pipe(ialu_reg_mem);
10279 %}
10280 
10281 // Xor Memory with Register
10282 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10283 %{
10284   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10285   effect(KILL cr);
10286 
10287   ins_cost(150);
10288   format %{ "xorq    $dst, $src\t# long" %}
10289   opcode(0x31); /* Opcode 31 /r */
10290   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10291   ins_pipe(ialu_mem_reg);
10292 %}
10293 
10294 // Xor Memory with Immediate
10295 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10296 %{
10297   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10298   effect(KILL cr);
10299 
10300   ins_cost(125);
10301   format %{ "xorq    $dst, $src\t# long" %}
10302   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10303   ins_encode(REX_mem_wide(dst), OpcSE(src),
10304              RM_opc_mem(secondary, dst), Con8or32(src));
10305   ins_pipe(ialu_mem_imm);
10306 %}
10307 
10308 // Convert Int to Boolean
10309 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10310 %{
10311   match(Set dst (Conv2B src));
10312   effect(KILL cr);
10313 
10314   format %{ "testl   $src, $src\t# ci2b\n\t"
10315             "setnz   $dst\n\t"
10316             "movzbl  $dst, $dst" %}
10317   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10318              setNZ_reg(dst),
10319              REX_reg_breg(dst, dst), // movzbl
10320              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10321   ins_pipe(pipe_slow); // XXX
10322 %}
10323 
10324 // Convert Pointer to Boolean
10325 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10326 %{
10327   match(Set dst (Conv2B src));
10328   effect(KILL cr);
10329 
10330   format %{ "testq   $src, $src\t# cp2b\n\t"
10331             "setnz   $dst\n\t"
10332             "movzbl  $dst, $dst" %}
10333   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10334              setNZ_reg(dst),
10335              REX_reg_breg(dst, dst), // movzbl
10336              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10337   ins_pipe(pipe_slow); // XXX
10338 %}
10339 
10340 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10341 %{
10342   match(Set dst (CmpLTMask p q));
10343   effect(KILL cr);
10344 
10345   ins_cost(400); // XXX
10346   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10347             "setlt   $dst\n\t"
10348             "movzbl  $dst, $dst\n\t"
10349             "negl    $dst" %}
10350   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10351              setLT_reg(dst),
10352              REX_reg_breg(dst, dst), // movzbl
10353              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10354              neg_reg(dst));
10355   ins_pipe(pipe_slow);
10356 %}
10357 
10358 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10359 %{
10360   match(Set dst (CmpLTMask dst zero));
10361   effect(KILL cr);
10362 
10363   ins_cost(100); // XXX
10364   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10365   opcode(0xC1, 0x7);  /* C1 /7 ib */
10366   ins_encode(reg_opc_imm(dst, 0x1F));
10367   ins_pipe(ialu_reg);
10368 %}
10369 
10370 
10371 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10372                          rRegI tmp,
10373                          rFlagsReg cr)
10374 %{
10375   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10376   effect(TEMP tmp, KILL cr);
10377 
10378   ins_cost(400); // XXX
10379   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10380             "sbbl    $tmp, $tmp\n\t"
10381             "andl    $tmp, $y\n\t"
10382             "addl    $p, $tmp" %}
10383   ins_encode(enc_cmpLTP(p, q, y, tmp));
10384   ins_pipe(pipe_cmplt);
10385 %}
10386 
10387 /* If I enable this, I encourage spilling in the inner loop of compress.
10388 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10389 %{
10390   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10391   effect( TEMP tmp, KILL cr );
10392   ins_cost(400);
10393 
10394   format %{ "SUB    $p,$q\n\t"
10395             "SBB    RCX,RCX\n\t"
10396             "AND    RCX,$y\n\t"
10397             "ADD    $p,RCX" %}
10398   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10399 %}
10400 */
10401 
10402 //---------- FP Instructions------------------------------------------------
10403 
10404 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10405 %{
10406   match(Set cr (CmpF src1 src2));
10407 
10408   ins_cost(145);
10409   format %{ "ucomiss $src1, $src2\n\t"
10410             "jnp,s   exit\n\t"
10411             "pushfq\t# saw NaN, set CF\n\t"
10412             "andq    [rsp], #0xffffff2b\n\t"
10413             "popfq\n"
10414     "exit:   nop\t# avoid branch to branch" %}
10415   opcode(0x0F, 0x2E);
10416   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10417              cmpfp_fixup);
10418   ins_pipe(pipe_slow);
10419 %}
10420 
10421 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10422   match(Set cr (CmpF src1 src2));
10423 
10424   ins_cost(145);
10425   format %{ "ucomiss $src1, $src2" %}
10426   ins_encode %{
10427     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10428   %}
10429   ins_pipe(pipe_slow);
10430 %}
10431 
10432 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10433 %{
10434   match(Set cr (CmpF src1 (LoadF src2)));
10435 
10436   ins_cost(145);
10437   format %{ "ucomiss $src1, $src2\n\t"
10438             "jnp,s   exit\n\t"
10439             "pushfq\t# saw NaN, set CF\n\t"
10440             "andq    [rsp], #0xffffff2b\n\t"
10441             "popfq\n"
10442     "exit:   nop\t# avoid branch to branch" %}
10443   opcode(0x0F, 0x2E);
10444   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10445              cmpfp_fixup);
10446   ins_pipe(pipe_slow);
10447 %}
10448 
10449 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10450   match(Set cr (CmpF src1 (LoadF src2)));
10451 
10452   ins_cost(100);
10453   format %{ "ucomiss $src1, $src2" %}
10454   opcode(0x0F, 0x2E);
10455   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10456   ins_pipe(pipe_slow);
10457 %}
10458 
10459 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10460 %{
10461   match(Set cr (CmpF src1 src2));
10462 
10463   ins_cost(145);
10464   format %{ "ucomiss $src1, $src2\n\t"
10465             "jnp,s   exit\n\t"
10466             "pushfq\t# saw NaN, set CF\n\t"
10467             "andq    [rsp], #0xffffff2b\n\t"
10468             "popfq\n"
10469     "exit:   nop\t# avoid branch to branch" %}
10470   opcode(0x0F, 0x2E);
10471   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10472              cmpfp_fixup);
10473   ins_pipe(pipe_slow);
10474 %}
10475 
10476 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10477   match(Set cr (CmpF src1 src2));
10478 
10479   ins_cost(100);
10480   format %{ "ucomiss $src1, $src2" %}
10481   opcode(0x0F, 0x2E);
10482   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10483   ins_pipe(pipe_slow);
10484 %}
10485 
10486 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10487 %{
10488   match(Set cr (CmpD src1 src2));
10489 
10490   ins_cost(145);
10491   format %{ "ucomisd $src1, $src2\n\t"
10492             "jnp,s   exit\n\t"
10493             "pushfq\t# saw NaN, set CF\n\t"
10494             "andq    [rsp], #0xffffff2b\n\t"
10495             "popfq\n"
10496     "exit:   nop\t# avoid branch to branch" %}
10497   opcode(0x66, 0x0F, 0x2E);
10498   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10499              cmpfp_fixup);
10500   ins_pipe(pipe_slow);
10501 %}
10502 
10503 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10504   match(Set cr (CmpD src1 src2));
10505 
10506   ins_cost(100);
10507   format %{ "ucomisd $src1, $src2 test" %}
10508   ins_encode %{
10509     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10510   %}
10511   ins_pipe(pipe_slow);
10512 %}
10513 
10514 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10515 %{
10516   match(Set cr (CmpD src1 (LoadD src2)));
10517 
10518   ins_cost(145);
10519   format %{ "ucomisd $src1, $src2\n\t"
10520             "jnp,s   exit\n\t"
10521             "pushfq\t# saw NaN, set CF\n\t"
10522             "andq    [rsp], #0xffffff2b\n\t"
10523             "popfq\n"
10524     "exit:   nop\t# avoid branch to branch" %}
10525   opcode(0x66, 0x0F, 0x2E);
10526   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10527              cmpfp_fixup);
10528   ins_pipe(pipe_slow);
10529 %}
10530 
10531 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10532   match(Set cr (CmpD src1 (LoadD src2)));
10533 
10534   ins_cost(100);
10535   format %{ "ucomisd $src1, $src2" %}
10536   opcode(0x66, 0x0F, 0x2E);
10537   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10538   ins_pipe(pipe_slow);
10539 %}
10540 
10541 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10542 %{
10543   match(Set cr (CmpD src1 src2));
10544 
10545   ins_cost(145);
10546   format %{ "ucomisd $src1, [$src2]\n\t"
10547             "jnp,s   exit\n\t"
10548             "pushfq\t# saw NaN, set CF\n\t"
10549             "andq    [rsp], #0xffffff2b\n\t"
10550             "popfq\n"
10551     "exit:   nop\t# avoid branch to branch" %}
10552   opcode(0x66, 0x0F, 0x2E);
10553   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10554              cmpfp_fixup);
10555   ins_pipe(pipe_slow);
10556 %}
10557 
10558 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10559   match(Set cr (CmpD src1 src2));
10560 
10561   ins_cost(100);
10562   format %{ "ucomisd $src1, [$src2]" %}
10563   opcode(0x66, 0x0F, 0x2E);
10564   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10565   ins_pipe(pipe_slow);
10566 %}
10567 
10568 // Compare into -1,0,1
10569 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10570 %{
10571   match(Set dst (CmpF3 src1 src2));
10572   effect(KILL cr);
10573 
10574   ins_cost(275);
10575   format %{ "ucomiss $src1, $src2\n\t"
10576             "movl    $dst, #-1\n\t"
10577             "jp,s    done\n\t"
10578             "jb,s    done\n\t"
10579             "setne   $dst\n\t"
10580             "movzbl  $dst, $dst\n"
10581     "done:" %}
10582 
10583   opcode(0x0F, 0x2E);
10584   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10585              cmpfp3(dst));
10586   ins_pipe(pipe_slow);
10587 %}
10588 
10589 // Compare into -1,0,1
10590 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10591 %{
10592   match(Set dst (CmpF3 src1 (LoadF src2)));
10593   effect(KILL cr);
10594 
10595   ins_cost(275);
10596   format %{ "ucomiss $src1, $src2\n\t"
10597             "movl    $dst, #-1\n\t"
10598             "jp,s    done\n\t"
10599             "jb,s    done\n\t"
10600             "setne   $dst\n\t"
10601             "movzbl  $dst, $dst\n"
10602     "done:" %}
10603 
10604   opcode(0x0F, 0x2E);
10605   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10606              cmpfp3(dst));
10607   ins_pipe(pipe_slow);
10608 %}
10609 
10610 // Compare into -1,0,1
10611 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10612 %{
10613   match(Set dst (CmpF3 src1 src2));
10614   effect(KILL cr);
10615 
10616   ins_cost(275);
10617   format %{ "ucomiss $src1, [$src2]\n\t"
10618             "movl    $dst, #-1\n\t"
10619             "jp,s    done\n\t"
10620             "jb,s    done\n\t"
10621             "setne   $dst\n\t"
10622             "movzbl  $dst, $dst\n"
10623     "done:" %}
10624 
10625   opcode(0x0F, 0x2E);
10626   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10627              cmpfp3(dst));
10628   ins_pipe(pipe_slow);
10629 %}
10630 
10631 // Compare into -1,0,1
10632 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10633 %{
10634   match(Set dst (CmpD3 src1 src2));
10635   effect(KILL cr);
10636 
10637   ins_cost(275);
10638   format %{ "ucomisd $src1, $src2\n\t"
10639             "movl    $dst, #-1\n\t"
10640             "jp,s    done\n\t"
10641             "jb,s    done\n\t"
10642             "setne   $dst\n\t"
10643             "movzbl  $dst, $dst\n"
10644     "done:" %}
10645 
10646   opcode(0x66, 0x0F, 0x2E);
10647   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10648              cmpfp3(dst));
10649   ins_pipe(pipe_slow);
10650 %}
10651 
10652 // Compare into -1,0,1
10653 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10654 %{
10655   match(Set dst (CmpD3 src1 (LoadD src2)));
10656   effect(KILL cr);
10657 
10658   ins_cost(275);
10659   format %{ "ucomisd $src1, $src2\n\t"
10660             "movl    $dst, #-1\n\t"
10661             "jp,s    done\n\t"
10662             "jb,s    done\n\t"
10663             "setne   $dst\n\t"
10664             "movzbl  $dst, $dst\n"
10665     "done:" %}
10666 
10667   opcode(0x66, 0x0F, 0x2E);
10668   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10669              cmpfp3(dst));
10670   ins_pipe(pipe_slow);
10671 %}
10672 
10673 // Compare into -1,0,1
10674 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10675 %{
10676   match(Set dst (CmpD3 src1 src2));
10677   effect(KILL cr);
10678 
10679   ins_cost(275);
10680   format %{ "ucomisd $src1, [$src2]\n\t"
10681             "movl    $dst, #-1\n\t"
10682             "jp,s    done\n\t"
10683             "jb,s    done\n\t"
10684             "setne   $dst\n\t"
10685             "movzbl  $dst, $dst\n"
10686     "done:" %}
10687 
10688   opcode(0x66, 0x0F, 0x2E);
10689   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10690              cmpfp3(dst));
10691   ins_pipe(pipe_slow);
10692 %}
10693 
10694 instruct addF_reg(regF dst, regF src)
10695 %{
10696   match(Set dst (AddF dst src));
10697 
10698   format %{ "addss   $dst, $src" %}
10699   ins_cost(150); // XXX
10700   opcode(0xF3, 0x0F, 0x58);
10701   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10702   ins_pipe(pipe_slow);
10703 %}
10704 
10705 instruct addF_mem(regF dst, memory src)
10706 %{
10707   match(Set dst (AddF dst (LoadF src)));
10708 
10709   format %{ "addss   $dst, $src" %}
10710   ins_cost(150); // XXX
10711   opcode(0xF3, 0x0F, 0x58);
10712   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10713   ins_pipe(pipe_slow);
10714 %}
10715 
10716 instruct addF_imm(regF dst, immF src)
10717 %{
10718   match(Set dst (AddF dst src));
10719 
10720   format %{ "addss   $dst, [$src]" %}
10721   ins_cost(150); // XXX
10722   opcode(0xF3, 0x0F, 0x58);
10723   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10724   ins_pipe(pipe_slow);
10725 %}
10726 
10727 instruct addD_reg(regD dst, regD src)
10728 %{
10729   match(Set dst (AddD dst src));
10730 
10731   format %{ "addsd   $dst, $src" %}
10732   ins_cost(150); // XXX
10733   opcode(0xF2, 0x0F, 0x58);
10734   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10735   ins_pipe(pipe_slow);
10736 %}
10737 
10738 instruct addD_mem(regD dst, memory src)
10739 %{
10740   match(Set dst (AddD dst (LoadD src)));
10741 
10742   format %{ "addsd   $dst, $src" %}
10743   ins_cost(150); // XXX
10744   opcode(0xF2, 0x0F, 0x58);
10745   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10746   ins_pipe(pipe_slow);
10747 %}
10748 
10749 instruct addD_imm(regD dst, immD src)
10750 %{
10751   match(Set dst (AddD dst src));
10752 
10753   format %{ "addsd   $dst, [$src]" %}
10754   ins_cost(150); // XXX
10755   opcode(0xF2, 0x0F, 0x58);
10756   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10757   ins_pipe(pipe_slow);
10758 %}
10759 
10760 instruct subF_reg(regF dst, regF src)
10761 %{
10762   match(Set dst (SubF dst src));
10763 
10764   format %{ "subss   $dst, $src" %}
10765   ins_cost(150); // XXX
10766   opcode(0xF3, 0x0F, 0x5C);
10767   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10768   ins_pipe(pipe_slow);
10769 %}
10770 
10771 instruct subF_mem(regF dst, memory src)
10772 %{
10773   match(Set dst (SubF dst (LoadF src)));
10774 
10775   format %{ "subss   $dst, $src" %}
10776   ins_cost(150); // XXX
10777   opcode(0xF3, 0x0F, 0x5C);
10778   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10779   ins_pipe(pipe_slow);
10780 %}
10781 
10782 instruct subF_imm(regF dst, immF src)
10783 %{
10784   match(Set dst (SubF dst src));
10785 
10786   format %{ "subss   $dst, [$src]" %}
10787   ins_cost(150); // XXX
10788   opcode(0xF3, 0x0F, 0x5C);
10789   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10790   ins_pipe(pipe_slow);
10791 %}
10792 
10793 instruct subD_reg(regD dst, regD src)
10794 %{
10795   match(Set dst (SubD dst src));
10796 
10797   format %{ "subsd   $dst, $src" %}
10798   ins_cost(150); // XXX
10799   opcode(0xF2, 0x0F, 0x5C);
10800   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10801   ins_pipe(pipe_slow);
10802 %}
10803 
10804 instruct subD_mem(regD dst, memory src)
10805 %{
10806   match(Set dst (SubD dst (LoadD src)));
10807 
10808   format %{ "subsd   $dst, $src" %}
10809   ins_cost(150); // XXX
10810   opcode(0xF2, 0x0F, 0x5C);
10811   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10812   ins_pipe(pipe_slow);
10813 %}
10814 
10815 instruct subD_imm(regD dst, immD src)
10816 %{
10817   match(Set dst (SubD dst src));
10818 
10819   format %{ "subsd   $dst, [$src]" %}
10820   ins_cost(150); // XXX
10821   opcode(0xF2, 0x0F, 0x5C);
10822   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10823   ins_pipe(pipe_slow);
10824 %}
10825 
10826 instruct mulF_reg(regF dst, regF src)
10827 %{
10828   match(Set dst (MulF dst src));
10829 
10830   format %{ "mulss   $dst, $src" %}
10831   ins_cost(150); // XXX
10832   opcode(0xF3, 0x0F, 0x59);
10833   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10834   ins_pipe(pipe_slow);
10835 %}
10836 
10837 instruct mulF_mem(regF dst, memory src)
10838 %{
10839   match(Set dst (MulF dst (LoadF src)));
10840 
10841   format %{ "mulss   $dst, $src" %}
10842   ins_cost(150); // XXX
10843   opcode(0xF3, 0x0F, 0x59);
10844   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10845   ins_pipe(pipe_slow);
10846 %}
10847 
10848 instruct mulF_imm(regF dst, immF src)
10849 %{
10850   match(Set dst (MulF dst src));
10851 
10852   format %{ "mulss   $dst, [$src]" %}
10853   ins_cost(150); // XXX
10854   opcode(0xF3, 0x0F, 0x59);
10855   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10856   ins_pipe(pipe_slow);
10857 %}
10858 
10859 instruct mulD_reg(regD dst, regD src)
10860 %{
10861   match(Set dst (MulD dst src));
10862 
10863   format %{ "mulsd   $dst, $src" %}
10864   ins_cost(150); // XXX
10865   opcode(0xF2, 0x0F, 0x59);
10866   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10867   ins_pipe(pipe_slow);
10868 %}
10869 
10870 instruct mulD_mem(regD dst, memory src)
10871 %{
10872   match(Set dst (MulD dst (LoadD src)));
10873 
10874   format %{ "mulsd   $dst, $src" %}
10875   ins_cost(150); // XXX
10876   opcode(0xF2, 0x0F, 0x59);
10877   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10878   ins_pipe(pipe_slow);
10879 %}
10880 
10881 instruct mulD_imm(regD dst, immD src)
10882 %{
10883   match(Set dst (MulD dst src));
10884 
10885   format %{ "mulsd   $dst, [$src]" %}
10886   ins_cost(150); // XXX
10887   opcode(0xF2, 0x0F, 0x59);
10888   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10889   ins_pipe(pipe_slow);
10890 %}
10891 
10892 instruct divF_reg(regF dst, regF src)
10893 %{
10894   match(Set dst (DivF dst src));
10895 
10896   format %{ "divss   $dst, $src" %}
10897   ins_cost(150); // XXX
10898   opcode(0xF3, 0x0F, 0x5E);
10899   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10900   ins_pipe(pipe_slow);
10901 %}
10902 
10903 instruct divF_mem(regF dst, memory src)
10904 %{
10905   match(Set dst (DivF dst (LoadF src)));
10906 
10907   format %{ "divss   $dst, $src" %}
10908   ins_cost(150); // XXX
10909   opcode(0xF3, 0x0F, 0x5E);
10910   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10911   ins_pipe(pipe_slow);
10912 %}
10913 
10914 instruct divF_imm(regF dst, immF src)
10915 %{
10916   match(Set dst (DivF dst src));
10917 
10918   format %{ "divss   $dst, [$src]" %}
10919   ins_cost(150); // XXX
10920   opcode(0xF3, 0x0F, 0x5E);
10921   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10922   ins_pipe(pipe_slow);
10923 %}
10924 
10925 instruct divD_reg(regD dst, regD src)
10926 %{
10927   match(Set dst (DivD dst src));
10928 
10929   format %{ "divsd   $dst, $src" %}
10930   ins_cost(150); // XXX
10931   opcode(0xF2, 0x0F, 0x5E);
10932   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10933   ins_pipe(pipe_slow);
10934 %}
10935 
10936 instruct divD_mem(regD dst, memory src)
10937 %{
10938   match(Set dst (DivD dst (LoadD src)));
10939 
10940   format %{ "divsd   $dst, $src" %}
10941   ins_cost(150); // XXX
10942   opcode(0xF2, 0x0F, 0x5E);
10943   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10944   ins_pipe(pipe_slow);
10945 %}
10946 
10947 instruct divD_imm(regD dst, immD src)
10948 %{
10949   match(Set dst (DivD dst src));
10950 
10951   format %{ "divsd   $dst, [$src]" %}
10952   ins_cost(150); // XXX
10953   opcode(0xF2, 0x0F, 0x5E);
10954   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10955   ins_pipe(pipe_slow);
10956 %}
10957 
10958 instruct sqrtF_reg(regF dst, regF src)
10959 %{
10960   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10961 
10962   format %{ "sqrtss  $dst, $src" %}
10963   ins_cost(150); // XXX
10964   opcode(0xF3, 0x0F, 0x51);
10965   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10966   ins_pipe(pipe_slow);
10967 %}
10968 
10969 instruct sqrtF_mem(regF dst, memory src)
10970 %{
10971   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10972 
10973   format %{ "sqrtss  $dst, $src" %}
10974   ins_cost(150); // XXX
10975   opcode(0xF3, 0x0F, 0x51);
10976   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10977   ins_pipe(pipe_slow);
10978 %}
10979 
10980 instruct sqrtF_imm(regF dst, immF src)
10981 %{
10982   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10983 
10984   format %{ "sqrtss  $dst, [$src]" %}
10985   ins_cost(150); // XXX
10986   opcode(0xF3, 0x0F, 0x51);
10987   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10988   ins_pipe(pipe_slow);
10989 %}
10990 
10991 instruct sqrtD_reg(regD dst, regD src)
10992 %{
10993   match(Set dst (SqrtD src));
10994 
10995   format %{ "sqrtsd  $dst, $src" %}
10996   ins_cost(150); // XXX
10997   opcode(0xF2, 0x0F, 0x51);
10998   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10999   ins_pipe(pipe_slow);
11000 %}
11001 
11002 instruct sqrtD_mem(regD dst, memory src)
11003 %{
11004   match(Set dst (SqrtD (LoadD src)));
11005 
11006   format %{ "sqrtsd  $dst, $src" %}
11007   ins_cost(150); // XXX
11008   opcode(0xF2, 0x0F, 0x51);
11009   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11010   ins_pipe(pipe_slow);
11011 %}
11012 
11013 instruct sqrtD_imm(regD dst, immD src)
11014 %{
11015   match(Set dst (SqrtD src));
11016 
11017   format %{ "sqrtsd  $dst, [$src]" %}
11018   ins_cost(150); // XXX
11019   opcode(0xF2, 0x0F, 0x51);
11020   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
11021   ins_pipe(pipe_slow);
11022 %}
11023 
11024 instruct absF_reg(regF dst)
11025 %{
11026   match(Set dst (AbsF dst));
11027 
11028   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
11029   ins_encode(absF_encoding(dst));
11030   ins_pipe(pipe_slow);
11031 %}
11032 
11033 instruct absD_reg(regD dst)
11034 %{
11035   match(Set dst (AbsD dst));
11036 
11037   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
11038             "# abs double by sign masking" %}
11039   ins_encode(absD_encoding(dst));
11040   ins_pipe(pipe_slow);
11041 %}
11042 
11043 instruct negF_reg(regF dst)
11044 %{
11045   match(Set dst (NegF dst));
11046 
11047   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
11048   ins_encode(negF_encoding(dst));
11049   ins_pipe(pipe_slow);
11050 %}
11051 
11052 instruct negD_reg(regD dst)
11053 %{
11054   match(Set dst (NegD dst));
11055 
11056   format %{ "xorpd   $dst, [0x8000000000000000]\t"
11057             "# neg double by sign flipping" %}
11058   ins_encode(negD_encoding(dst));
11059   ins_pipe(pipe_slow);
11060 %}
11061 
11062 // -----------Trig and Trancendental Instructions------------------------------
11063 instruct cosD_reg(regD dst) %{
11064   match(Set dst (CosD dst));
11065 
11066   format %{ "dcos   $dst\n\t" %}
11067   opcode(0xD9, 0xFF);
11068   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11069   ins_pipe( pipe_slow );
11070 %}
11071 
11072 instruct sinD_reg(regD dst) %{
11073   match(Set dst (SinD dst));
11074 
11075   format %{ "dsin   $dst\n\t" %}
11076   opcode(0xD9, 0xFE);
11077   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
11078   ins_pipe( pipe_slow );
11079 %}
11080 
11081 instruct tanD_reg(regD dst) %{
11082   match(Set dst (TanD dst));
11083 
11084   format %{ "dtan   $dst\n\t" %}
11085   ins_encode( Push_SrcXD(dst),
11086               Opcode(0xD9), Opcode(0xF2),   //fptan
11087               Opcode(0xDD), Opcode(0xD8),   //fstp st
11088               Push_ResultXD(dst) );
11089   ins_pipe( pipe_slow );
11090 %}
11091 
11092 instruct log10D_reg(regD dst) %{
11093   // The source and result Double operands in XMM registers
11094   match(Set dst (Log10D dst));
11095   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11096   // fyl2x        ; compute log_10(2) * log_2(x)
11097   format %{ "fldlg2\t\t\t#Log10\n\t"
11098             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11099          %}
11100    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11101               Push_SrcXD(dst),
11102               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11103               Push_ResultXD(dst));
11104 
11105   ins_pipe( pipe_slow );
11106 %}
11107 
11108 instruct logD_reg(regD dst) %{
11109   // The source and result Double operands in XMM registers
11110   match(Set dst (LogD dst));
11111   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11112   // fyl2x        ; compute log_e(2) * log_2(x)
11113   format %{ "fldln2\t\t\t#Log_e\n\t"
11114             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11115          %}
11116   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11117               Push_SrcXD(dst),
11118               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11119               Push_ResultXD(dst));
11120   ins_pipe( pipe_slow );
11121 %}
11122 
11123 
11124 
11125 //----------Arithmetic Conversion Instructions---------------------------------
11126 
11127 instruct roundFloat_nop(regF dst)
11128 %{
11129   match(Set dst (RoundFloat dst));
11130 
11131   ins_cost(0);
11132   ins_encode();
11133   ins_pipe(empty);
11134 %}
11135 
11136 instruct roundDouble_nop(regD dst)
11137 %{
11138   match(Set dst (RoundDouble dst));
11139 
11140   ins_cost(0);
11141   ins_encode();
11142   ins_pipe(empty);
11143 %}
11144 
11145 instruct convF2D_reg_reg(regD dst, regF src)
11146 %{
11147   match(Set dst (ConvF2D src));
11148 
11149   format %{ "cvtss2sd $dst, $src" %}
11150   opcode(0xF3, 0x0F, 0x5A);
11151   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11152   ins_pipe(pipe_slow); // XXX
11153 %}
11154 
11155 instruct convF2D_reg_mem(regD dst, memory src)
11156 %{
11157   match(Set dst (ConvF2D (LoadF src)));
11158 
11159   format %{ "cvtss2sd $dst, $src" %}
11160   opcode(0xF3, 0x0F, 0x5A);
11161   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11162   ins_pipe(pipe_slow); // XXX
11163 %}
11164 
11165 instruct convD2F_reg_reg(regF dst, regD src)
11166 %{
11167   match(Set dst (ConvD2F src));
11168 
11169   format %{ "cvtsd2ss $dst, $src" %}
11170   opcode(0xF2, 0x0F, 0x5A);
11171   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11172   ins_pipe(pipe_slow); // XXX
11173 %}
11174 
11175 instruct convD2F_reg_mem(regF dst, memory src)
11176 %{
11177   match(Set dst (ConvD2F (LoadD src)));
11178 
11179   format %{ "cvtsd2ss $dst, $src" %}
11180   opcode(0xF2, 0x0F, 0x5A);
11181   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11182   ins_pipe(pipe_slow); // XXX
11183 %}
11184 
11185 // XXX do mem variants
11186 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11187 %{
11188   match(Set dst (ConvF2I src));
11189   effect(KILL cr);
11190 
11191   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11192             "cmpl    $dst, #0x80000000\n\t"
11193             "jne,s   done\n\t"
11194             "subq    rsp, #8\n\t"
11195             "movss   [rsp], $src\n\t"
11196             "call    f2i_fixup\n\t"
11197             "popq    $dst\n"
11198     "done:   "%}
11199   opcode(0xF3, 0x0F, 0x2C);
11200   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11201              f2i_fixup(dst, src));
11202   ins_pipe(pipe_slow);
11203 %}
11204 
11205 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11206 %{
11207   match(Set dst (ConvF2L src));
11208   effect(KILL cr);
11209 
11210   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11211             "cmpq    $dst, [0x8000000000000000]\n\t"
11212             "jne,s   done\n\t"
11213             "subq    rsp, #8\n\t"
11214             "movss   [rsp], $src\n\t"
11215             "call    f2l_fixup\n\t"
11216             "popq    $dst\n"
11217     "done:   "%}
11218   opcode(0xF3, 0x0F, 0x2C);
11219   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11220              f2l_fixup(dst, src));
11221   ins_pipe(pipe_slow);
11222 %}
11223 
11224 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11225 %{
11226   match(Set dst (ConvD2I src));
11227   effect(KILL cr);
11228 
11229   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11230             "cmpl    $dst, #0x80000000\n\t"
11231             "jne,s   done\n\t"
11232             "subq    rsp, #8\n\t"
11233             "movsd   [rsp], $src\n\t"
11234             "call    d2i_fixup\n\t"
11235             "popq    $dst\n"
11236     "done:   "%}
11237   opcode(0xF2, 0x0F, 0x2C);
11238   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11239              d2i_fixup(dst, src));
11240   ins_pipe(pipe_slow);
11241 %}
11242 
11243 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11244 %{
11245   match(Set dst (ConvD2L src));
11246   effect(KILL cr);
11247 
11248   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11249             "cmpq    $dst, [0x8000000000000000]\n\t"
11250             "jne,s   done\n\t"
11251             "subq    rsp, #8\n\t"
11252             "movsd   [rsp], $src\n\t"
11253             "call    d2l_fixup\n\t"
11254             "popq    $dst\n"
11255     "done:   "%}
11256   opcode(0xF2, 0x0F, 0x2C);
11257   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11258              d2l_fixup(dst, src));
11259   ins_pipe(pipe_slow);
11260 %}
11261 
11262 instruct convI2F_reg_reg(regF dst, rRegI src)
11263 %{
11264   predicate(!UseXmmI2F);
11265   match(Set dst (ConvI2F src));
11266 
11267   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11268   opcode(0xF3, 0x0F, 0x2A);
11269   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11270   ins_pipe(pipe_slow); // XXX
11271 %}
11272 
11273 instruct convI2F_reg_mem(regF dst, memory src)
11274 %{
11275   match(Set dst (ConvI2F (LoadI src)));
11276 
11277   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11278   opcode(0xF3, 0x0F, 0x2A);
11279   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11280   ins_pipe(pipe_slow); // XXX
11281 %}
11282 
11283 instruct convI2D_reg_reg(regD dst, rRegI src)
11284 %{
11285   predicate(!UseXmmI2D);
11286   match(Set dst (ConvI2D src));
11287 
11288   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11289   opcode(0xF2, 0x0F, 0x2A);
11290   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11291   ins_pipe(pipe_slow); // XXX
11292 %}
11293 
11294 instruct convI2D_reg_mem(regD dst, memory src)
11295 %{
11296   match(Set dst (ConvI2D (LoadI src)));
11297 
11298   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11299   opcode(0xF2, 0x0F, 0x2A);
11300   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11301   ins_pipe(pipe_slow); // XXX
11302 %}
11303 
11304 instruct convXI2F_reg(regF dst, rRegI src)
11305 %{
11306   predicate(UseXmmI2F);
11307   match(Set dst (ConvI2F src));
11308 
11309   format %{ "movdl $dst, $src\n\t"
11310             "cvtdq2psl $dst, $dst\t# i2f" %}
11311   ins_encode %{
11312     __ movdl($dst$$XMMRegister, $src$$Register);
11313     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11314   %}
11315   ins_pipe(pipe_slow); // XXX
11316 %}
11317 
11318 instruct convXI2D_reg(regD dst, rRegI src)
11319 %{
11320   predicate(UseXmmI2D);
11321   match(Set dst (ConvI2D src));
11322 
11323   format %{ "movdl $dst, $src\n\t"
11324             "cvtdq2pdl $dst, $dst\t# i2d" %}
11325   ins_encode %{
11326     __ movdl($dst$$XMMRegister, $src$$Register);
11327     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11328   %}
11329   ins_pipe(pipe_slow); // XXX
11330 %}
11331 
11332 instruct convL2F_reg_reg(regF dst, rRegL src)
11333 %{
11334   match(Set dst (ConvL2F src));
11335 
11336   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11337   opcode(0xF3, 0x0F, 0x2A);
11338   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11339   ins_pipe(pipe_slow); // XXX
11340 %}
11341 
11342 instruct convL2F_reg_mem(regF dst, memory src)
11343 %{
11344   match(Set dst (ConvL2F (LoadL src)));
11345 
11346   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11347   opcode(0xF3, 0x0F, 0x2A);
11348   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11349   ins_pipe(pipe_slow); // XXX
11350 %}
11351 
11352 instruct convL2D_reg_reg(regD dst, rRegL src)
11353 %{
11354   match(Set dst (ConvL2D src));
11355 
11356   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11357   opcode(0xF2, 0x0F, 0x2A);
11358   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11359   ins_pipe(pipe_slow); // XXX
11360 %}
11361 
11362 instruct convL2D_reg_mem(regD dst, memory src)
11363 %{
11364   match(Set dst (ConvL2D (LoadL src)));
11365 
11366   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11367   opcode(0xF2, 0x0F, 0x2A);
11368   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11369   ins_pipe(pipe_slow); // XXX
11370 %}
11371 
11372 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11373 %{
11374   match(Set dst (ConvI2L src));
11375 
11376   ins_cost(125);
11377   format %{ "movslq  $dst, $src\t# i2l" %}
11378   ins_encode %{
11379     __ movslq($dst$$Register, $src$$Register);
11380   %}
11381   ins_pipe(ialu_reg_reg);
11382 %}
11383 
11384 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11385 // %{
11386 //   match(Set dst (ConvI2L src));
11387 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11388 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11389 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11390 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11391 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11392 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11393 
11394 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11395 //   ins_encode(enc_copy(dst, src));
11396 // //   opcode(0x63); // needs REX.W
11397 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11398 //   ins_pipe(ialu_reg_reg);
11399 // %}
11400 
11401 // Zero-extend convert int to long
11402 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11403 %{
11404   match(Set dst (AndL (ConvI2L src) mask));
11405 
11406   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11407   ins_encode(enc_copy(dst, src));
11408   ins_pipe(ialu_reg_reg);
11409 %}
11410 
11411 // Zero-extend convert int to long
11412 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11413 %{
11414   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11415 
11416   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11417   opcode(0x8B);
11418   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11419   ins_pipe(ialu_reg_mem);
11420 %}
11421 
11422 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11423 %{
11424   match(Set dst (AndL src mask));
11425 
11426   format %{ "movl    $dst, $src\t# zero-extend long" %}
11427   ins_encode(enc_copy_always(dst, src));
11428   ins_pipe(ialu_reg_reg);
11429 %}
11430 
11431 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11432 %{
11433   match(Set dst (ConvL2I src));
11434 
11435   format %{ "movl    $dst, $src\t# l2i" %}
11436   ins_encode(enc_copy_always(dst, src));
11437   ins_pipe(ialu_reg_reg);
11438 %}
11439 
11440 
11441 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11442   match(Set dst (MoveF2I src));
11443   effect(DEF dst, USE src);
11444 
11445   ins_cost(125);
11446   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11447   opcode(0x8B);
11448   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11449   ins_pipe(ialu_reg_mem);
11450 %}
11451 
11452 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11453   match(Set dst (MoveI2F src));
11454   effect(DEF dst, USE src);
11455 
11456   ins_cost(125);
11457   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11458   opcode(0xF3, 0x0F, 0x10);
11459   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11460   ins_pipe(pipe_slow);
11461 %}
11462 
11463 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11464   match(Set dst (MoveD2L src));
11465   effect(DEF dst, USE src);
11466 
11467   ins_cost(125);
11468   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11469   opcode(0x8B);
11470   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11471   ins_pipe(ialu_reg_mem);
11472 %}
11473 
11474 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11475   predicate(!UseXmmLoadAndClearUpper);
11476   match(Set dst (MoveL2D src));
11477   effect(DEF dst, USE src);
11478 
11479   ins_cost(125);
11480   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11481   opcode(0x66, 0x0F, 0x12);
11482   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11483   ins_pipe(pipe_slow);
11484 %}
11485 
11486 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11487   predicate(UseXmmLoadAndClearUpper);
11488   match(Set dst (MoveL2D src));
11489   effect(DEF dst, USE src);
11490 
11491   ins_cost(125);
11492   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11493   opcode(0xF2, 0x0F, 0x10);
11494   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11495   ins_pipe(pipe_slow);
11496 %}
11497 
11498 
11499 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11500   match(Set dst (MoveF2I src));
11501   effect(DEF dst, USE src);
11502 
11503   ins_cost(95); // XXX
11504   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11505   opcode(0xF3, 0x0F, 0x11);
11506   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11507   ins_pipe(pipe_slow);
11508 %}
11509 
11510 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11511   match(Set dst (MoveI2F src));
11512   effect(DEF dst, USE src);
11513 
11514   ins_cost(100);
11515   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11516   opcode(0x89);
11517   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11518   ins_pipe( ialu_mem_reg );
11519 %}
11520 
11521 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11522   match(Set dst (MoveD2L src));
11523   effect(DEF dst, USE src);
11524 
11525   ins_cost(95); // XXX
11526   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11527   opcode(0xF2, 0x0F, 0x11);
11528   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11529   ins_pipe(pipe_slow);
11530 %}
11531 
11532 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11533   match(Set dst (MoveL2D src));
11534   effect(DEF dst, USE src);
11535 
11536   ins_cost(100);
11537   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11538   opcode(0x89);
11539   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11540   ins_pipe(ialu_mem_reg);
11541 %}
11542 
11543 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11544   match(Set dst (MoveF2I src));
11545   effect(DEF dst, USE src);
11546   ins_cost(85);
11547   format %{ "movd    $dst,$src\t# MoveF2I" %}
11548   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11549   ins_pipe( pipe_slow );
11550 %}
11551 
11552 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11553   match(Set dst (MoveD2L src));
11554   effect(DEF dst, USE src);
11555   ins_cost(85);
11556   format %{ "movd    $dst,$src\t# MoveD2L" %}
11557   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11558   ins_pipe( pipe_slow );
11559 %}
11560 
11561 // The next instructions have long latency and use Int unit. Set high cost.
11562 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11563   match(Set dst (MoveI2F src));
11564   effect(DEF dst, USE src);
11565   ins_cost(300);
11566   format %{ "movd    $dst,$src\t# MoveI2F" %}
11567   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11568   ins_pipe( pipe_slow );
11569 %}
11570 
11571 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11572   match(Set dst (MoveL2D src));
11573   effect(DEF dst, USE src);
11574   ins_cost(300);
11575   format %{ "movd    $dst,$src\t# MoveL2D" %}
11576   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11577   ins_pipe( pipe_slow );
11578 %}
11579 
11580 // Replicate scalar to packed byte (1 byte) values in xmm
11581 instruct Repl8B_reg(regD dst, regD src) %{
11582   match(Set dst (Replicate8B src));
11583   format %{ "MOVDQA  $dst,$src\n\t"
11584             "PUNPCKLBW $dst,$dst\n\t"
11585             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11586   ins_encode( pshufd_8x8(dst, src));
11587   ins_pipe( pipe_slow );
11588 %}
11589 
11590 // Replicate scalar to packed byte (1 byte) values in xmm
11591 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11592   match(Set dst (Replicate8B src));
11593   format %{ "MOVD    $dst,$src\n\t"
11594             "PUNPCKLBW $dst,$dst\n\t"
11595             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11596   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11597   ins_pipe( pipe_slow );
11598 %}
11599 
11600 // Replicate scalar zero to packed byte (1 byte) values in xmm
11601 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11602   match(Set dst (Replicate8B zero));
11603   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11604   ins_encode( pxor(dst, dst));
11605   ins_pipe( fpu_reg_reg );
11606 %}
11607 
11608 // Replicate scalar to packed shore (2 byte) values in xmm
11609 instruct Repl4S_reg(regD dst, regD src) %{
11610   match(Set dst (Replicate4S src));
11611   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11612   ins_encode( pshufd_4x16(dst, src));
11613   ins_pipe( fpu_reg_reg );
11614 %}
11615 
11616 // Replicate scalar to packed shore (2 byte) values in xmm
11617 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11618   match(Set dst (Replicate4S src));
11619   format %{ "MOVD    $dst,$src\n\t"
11620             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11621   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11622   ins_pipe( fpu_reg_reg );
11623 %}
11624 
11625 // Replicate scalar zero to packed short (2 byte) values in xmm
11626 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11627   match(Set dst (Replicate4S zero));
11628   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11629   ins_encode( pxor(dst, dst));
11630   ins_pipe( fpu_reg_reg );
11631 %}
11632 
11633 // Replicate scalar to packed char (2 byte) values in xmm
11634 instruct Repl4C_reg(regD dst, regD src) %{
11635   match(Set dst (Replicate4C src));
11636   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11637   ins_encode( pshufd_4x16(dst, src));
11638   ins_pipe( fpu_reg_reg );
11639 %}
11640 
11641 // Replicate scalar to packed char (2 byte) values in xmm
11642 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11643   match(Set dst (Replicate4C src));
11644   format %{ "MOVD    $dst,$src\n\t"
11645             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11646   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11647   ins_pipe( fpu_reg_reg );
11648 %}
11649 
11650 // Replicate scalar zero to packed char (2 byte) values in xmm
11651 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11652   match(Set dst (Replicate4C zero));
11653   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11654   ins_encode( pxor(dst, dst));
11655   ins_pipe( fpu_reg_reg );
11656 %}
11657 
11658 // Replicate scalar to packed integer (4 byte) values in xmm
11659 instruct Repl2I_reg(regD dst, regD src) %{
11660   match(Set dst (Replicate2I src));
11661   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11662   ins_encode( pshufd(dst, src, 0x00));
11663   ins_pipe( fpu_reg_reg );
11664 %}
11665 
11666 // Replicate scalar to packed integer (4 byte) values in xmm
11667 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11668   match(Set dst (Replicate2I src));
11669   format %{ "MOVD   $dst,$src\n\t"
11670             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11671   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11672   ins_pipe( fpu_reg_reg );
11673 %}
11674 
11675 // Replicate scalar zero to packed integer (2 byte) values in xmm
11676 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11677   match(Set dst (Replicate2I zero));
11678   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11679   ins_encode( pxor(dst, dst));
11680   ins_pipe( fpu_reg_reg );
11681 %}
11682 
11683 // Replicate scalar to packed single precision floating point values in xmm
11684 instruct Repl2F_reg(regD dst, regD src) %{
11685   match(Set dst (Replicate2F src));
11686   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11687   ins_encode( pshufd(dst, src, 0xe0));
11688   ins_pipe( fpu_reg_reg );
11689 %}
11690 
11691 // Replicate scalar to packed single precision floating point values in xmm
11692 instruct Repl2F_regF(regD dst, regF src) %{
11693   match(Set dst (Replicate2F src));
11694   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11695   ins_encode( pshufd(dst, src, 0xe0));
11696   ins_pipe( fpu_reg_reg );
11697 %}
11698 
11699 // Replicate scalar to packed single precision floating point values in xmm
11700 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11701   match(Set dst (Replicate2F zero));
11702   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11703   ins_encode( pxor(dst, dst));
11704   ins_pipe( fpu_reg_reg );
11705 %}
11706 
11707 
11708 // =======================================================================
11709 // fast clearing of an array
11710 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11711                   rFlagsReg cr)
11712 %{
11713   match(Set dummy (ClearArray cnt base));
11714   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11715 
11716   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11717             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11718   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11719              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11720   ins_pipe(pipe_slow);
11721 %}
11722 
11723 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
11724                         rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
11725 %{
11726   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11727   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11728 
11729   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11730   ins_encode %{
11731     __ string_compare($str1$$Register, $str2$$Register,
11732                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11733                       $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11734   %}
11735   ins_pipe( pipe_slow );
11736 %}
11737 
11738 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11739                         rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
11740 %{
11741   predicate(UseSSE42Intrinsics);
11742   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11743   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
11744 
11745   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11746   ins_encode %{
11747     __ string_indexof($str1$$Register, $str2$$Register,
11748                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11749                       $tmp1$$XMMRegister, $tmp2$$Register);
11750   %}
11751   ins_pipe( pipe_slow );
11752 %}
11753 
11754 // fast string equals
11755 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11756                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11757 %{
11758   match(Set result (StrEquals (Binary str1 str2) cnt));
11759   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11760 
11761   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11762   ins_encode %{
11763     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11764                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11765                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11766   %}
11767   ins_pipe( pipe_slow );
11768 %}
11769 
11770 // fast array equals
11771 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11772                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11773 %{
11774   match(Set result (AryEq ary1 ary2));
11775   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11776   //ins_cost(300);
11777 
11778   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11779   ins_encode %{
11780     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11781                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11782                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11783   %}
11784   ins_pipe( pipe_slow );
11785 %}
11786 
11787 //----------Control Flow Instructions------------------------------------------
11788 // Signed compare Instructions
11789 
11790 // XXX more variants!!
11791 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11792 %{
11793   match(Set cr (CmpI op1 op2));
11794   effect(DEF cr, USE op1, USE op2);
11795 
11796   format %{ "cmpl    $op1, $op2" %}
11797   opcode(0x3B);  /* Opcode 3B /r */
11798   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11799   ins_pipe(ialu_cr_reg_reg);
11800 %}
11801 
11802 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11803 %{
11804   match(Set cr (CmpI op1 op2));
11805 
11806   format %{ "cmpl    $op1, $op2" %}
11807   opcode(0x81, 0x07); /* Opcode 81 /7 */
11808   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11809   ins_pipe(ialu_cr_reg_imm);
11810 %}
11811 
11812 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11813 %{
11814   match(Set cr (CmpI op1 (LoadI op2)));
11815 
11816   ins_cost(500); // XXX
11817   format %{ "cmpl    $op1, $op2" %}
11818   opcode(0x3B); /* Opcode 3B /r */
11819   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11820   ins_pipe(ialu_cr_reg_mem);
11821 %}
11822 
11823 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11824 %{
11825   match(Set cr (CmpI src zero));
11826 
11827   format %{ "testl   $src, $src" %}
11828   opcode(0x85);
11829   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11830   ins_pipe(ialu_cr_reg_imm);
11831 %}
11832 
11833 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11834 %{
11835   match(Set cr (CmpI (AndI src con) zero));
11836 
11837   format %{ "testl   $src, $con" %}
11838   opcode(0xF7, 0x00);
11839   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11840   ins_pipe(ialu_cr_reg_imm);
11841 %}
11842 
11843 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11844 %{
11845   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11846 
11847   format %{ "testl   $src, $mem" %}
11848   opcode(0x85);
11849   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11850   ins_pipe(ialu_cr_reg_mem);
11851 %}
11852 
11853 // Unsigned compare Instructions; really, same as signed except they
11854 // produce an rFlagsRegU instead of rFlagsReg.
11855 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11856 %{
11857   match(Set cr (CmpU op1 op2));
11858 
11859   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11860   opcode(0x3B); /* Opcode 3B /r */
11861   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11862   ins_pipe(ialu_cr_reg_reg);
11863 %}
11864 
11865 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11866 %{
11867   match(Set cr (CmpU op1 op2));
11868 
11869   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11870   opcode(0x81,0x07); /* Opcode 81 /7 */
11871   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11872   ins_pipe(ialu_cr_reg_imm);
11873 %}
11874 
11875 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11876 %{
11877   match(Set cr (CmpU op1 (LoadI op2)));
11878 
11879   ins_cost(500); // XXX
11880   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11881   opcode(0x3B); /* Opcode 3B /r */
11882   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11883   ins_pipe(ialu_cr_reg_mem);
11884 %}
11885 
11886 // // // Cisc-spilled version of cmpU_rReg
11887 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11888 // //%{
11889 // //  match(Set cr (CmpU (LoadI op1) op2));
11890 // //
11891 // //  format %{ "CMPu   $op1,$op2" %}
11892 // //  ins_cost(500);
11893 // //  opcode(0x39);  /* Opcode 39 /r */
11894 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11895 // //%}
11896 
11897 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11898 %{
11899   match(Set cr (CmpU src zero));
11900 
11901   format %{ "testl  $src, $src\t# unsigned" %}
11902   opcode(0x85);
11903   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11904   ins_pipe(ialu_cr_reg_imm);
11905 %}
11906 
11907 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11908 %{
11909   match(Set cr (CmpP op1 op2));
11910 
11911   format %{ "cmpq    $op1, $op2\t# ptr" %}
11912   opcode(0x3B); /* Opcode 3B /r */
11913   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11914   ins_pipe(ialu_cr_reg_reg);
11915 %}
11916 
11917 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11918 %{
11919   match(Set cr (CmpP op1 (LoadP op2)));
11920 
11921   ins_cost(500); // XXX
11922   format %{ "cmpq    $op1, $op2\t# ptr" %}
11923   opcode(0x3B); /* Opcode 3B /r */
11924   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11925   ins_pipe(ialu_cr_reg_mem);
11926 %}
11927 
11928 // // // Cisc-spilled version of cmpP_rReg
11929 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11930 // //%{
11931 // //  match(Set cr (CmpP (LoadP op1) op2));
11932 // //
11933 // //  format %{ "CMPu   $op1,$op2" %}
11934 // //  ins_cost(500);
11935 // //  opcode(0x39);  /* Opcode 39 /r */
11936 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11937 // //%}
11938 
11939 // XXX this is generalized by compP_rReg_mem???
11940 // Compare raw pointer (used in out-of-heap check).
11941 // Only works because non-oop pointers must be raw pointers
11942 // and raw pointers have no anti-dependencies.
11943 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11944 %{
11945   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11946   match(Set cr (CmpP op1 (LoadP op2)));
11947 
11948   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11949   opcode(0x3B); /* Opcode 3B /r */
11950   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11951   ins_pipe(ialu_cr_reg_mem);
11952 %}
11953 
11954 // This will generate a signed flags result. This should be OK since
11955 // any compare to a zero should be eq/neq.
11956 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11957 %{
11958   match(Set cr (CmpP src zero));
11959 
11960   format %{ "testq   $src, $src\t# ptr" %}
11961   opcode(0x85);
11962   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11963   ins_pipe(ialu_cr_reg_imm);
11964 %}
11965 
11966 // This will generate a signed flags result. This should be OK since
11967 // any compare to a zero should be eq/neq.
11968 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11969 %{
11970   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11971   match(Set cr (CmpP (LoadP op) zero));
11972 
11973   ins_cost(500); // XXX
11974   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11975   opcode(0xF7); /* Opcode F7 /0 */
11976   ins_encode(REX_mem_wide(op),
11977              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11978   ins_pipe(ialu_cr_reg_imm);
11979 %}
11980 
11981 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11982 %{
11983   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11984   match(Set cr (CmpP (LoadP mem) zero));
11985 
11986   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11987   ins_encode %{
11988     __ cmpq(r12, $mem$$Address);
11989   %}
11990   ins_pipe(ialu_cr_reg_mem);
11991 %}
11992 
11993 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11994 %{
11995   match(Set cr (CmpN op1 op2));
11996 
11997   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11998   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11999   ins_pipe(ialu_cr_reg_reg);
12000 %}
12001 
12002 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12003 %{
12004   match(Set cr (CmpN src (LoadN mem)));
12005 
12006   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12007   ins_encode %{
12008     __ cmpl($src$$Register, $mem$$Address);
12009   %}
12010   ins_pipe(ialu_cr_reg_mem);
12011 %}
12012 
12013 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12014   match(Set cr (CmpN op1 op2));
12015 
12016   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12017   ins_encode %{
12018     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12019   %}
12020   ins_pipe(ialu_cr_reg_imm);
12021 %}
12022 
12023 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12024 %{
12025   match(Set cr (CmpN src (LoadN mem)));
12026 
12027   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12028   ins_encode %{
12029     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12030   %}
12031   ins_pipe(ialu_cr_reg_mem);
12032 %}
12033 
12034 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12035   match(Set cr (CmpN src zero));
12036 
12037   format %{ "testl   $src, $src\t# compressed ptr" %}
12038   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12039   ins_pipe(ialu_cr_reg_imm);
12040 %}
12041 
12042 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12043 %{
12044   predicate(Universe::narrow_oop_base() != NULL);
12045   match(Set cr (CmpN (LoadN mem) zero));
12046 
12047   ins_cost(500); // XXX
12048   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12049   ins_encode %{
12050     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12051   %}
12052   ins_pipe(ialu_cr_reg_mem);
12053 %}
12054 
12055 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12056 %{
12057   predicate(Universe::narrow_oop_base() == NULL);
12058   match(Set cr (CmpN (LoadN mem) zero));
12059 
12060   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12061   ins_encode %{
12062     __ cmpl(r12, $mem$$Address);
12063   %}
12064   ins_pipe(ialu_cr_reg_mem);
12065 %}
12066 
12067 // Yanked all unsigned pointer compare operations.
12068 // Pointer compares are done with CmpP which is already unsigned.
12069 
12070 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12071 %{
12072   match(Set cr (CmpL op1 op2));
12073 
12074   format %{ "cmpq    $op1, $op2" %}
12075   opcode(0x3B);  /* Opcode 3B /r */
12076   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
12077   ins_pipe(ialu_cr_reg_reg);
12078 %}
12079 
12080 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12081 %{
12082   match(Set cr (CmpL op1 op2));
12083 
12084   format %{ "cmpq    $op1, $op2" %}
12085   opcode(0x81, 0x07); /* Opcode 81 /7 */
12086   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12087   ins_pipe(ialu_cr_reg_imm);
12088 %}
12089 
12090 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12091 %{
12092   match(Set cr (CmpL op1 (LoadL op2)));
12093 
12094   format %{ "cmpq    $op1, $op2" %}
12095   opcode(0x3B); /* Opcode 3B /r */
12096   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12097   ins_pipe(ialu_cr_reg_mem);
12098 %}
12099 
12100 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12101 %{
12102   match(Set cr (CmpL src zero));
12103 
12104   format %{ "testq   $src, $src" %}
12105   opcode(0x85);
12106   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12107   ins_pipe(ialu_cr_reg_imm);
12108 %}
12109 
12110 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12111 %{
12112   match(Set cr (CmpL (AndL src con) zero));
12113 
12114   format %{ "testq   $src, $con\t# long" %}
12115   opcode(0xF7, 0x00);
12116   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12117   ins_pipe(ialu_cr_reg_imm);
12118 %}
12119 
12120 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12121 %{
12122   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12123 
12124   format %{ "testq   $src, $mem" %}
12125   opcode(0x85);
12126   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12127   ins_pipe(ialu_cr_reg_mem);
12128 %}
12129 
12130 // Manifest a CmpL result in an integer register.  Very painful.
12131 // This is the test to avoid.
12132 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12133 %{
12134   match(Set dst (CmpL3 src1 src2));
12135   effect(KILL flags);
12136 
12137   ins_cost(275); // XXX
12138   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12139             "movl    $dst, -1\n\t"
12140             "jl,s    done\n\t"
12141             "setne   $dst\n\t"
12142             "movzbl  $dst, $dst\n\t"
12143     "done:" %}
12144   ins_encode(cmpl3_flag(src1, src2, dst));
12145   ins_pipe(pipe_slow);
12146 %}
12147 
12148 //----------Max and Min--------------------------------------------------------
12149 // Min Instructions
12150 
12151 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12152 %{
12153   effect(USE_DEF dst, USE src, USE cr);
12154 
12155   format %{ "cmovlgt $dst, $src\t# min" %}
12156   opcode(0x0F, 0x4F);
12157   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12158   ins_pipe(pipe_cmov_reg);
12159 %}
12160 
12161 
12162 instruct minI_rReg(rRegI dst, rRegI src)
12163 %{
12164   match(Set dst (MinI dst src));
12165 
12166   ins_cost(200);
12167   expand %{
12168     rFlagsReg cr;
12169     compI_rReg(cr, dst, src);
12170     cmovI_reg_g(dst, src, cr);
12171   %}
12172 %}
12173 
12174 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12175 %{
12176   effect(USE_DEF dst, USE src, USE cr);
12177 
12178   format %{ "cmovllt $dst, $src\t# max" %}
12179   opcode(0x0F, 0x4C);
12180   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12181   ins_pipe(pipe_cmov_reg);
12182 %}
12183 
12184 
12185 instruct maxI_rReg(rRegI dst, rRegI src)
12186 %{
12187   match(Set dst (MaxI dst src));
12188 
12189   ins_cost(200);
12190   expand %{
12191     rFlagsReg cr;
12192     compI_rReg(cr, dst, src);
12193     cmovI_reg_l(dst, src, cr);
12194   %}
12195 %}
12196 
12197 // ============================================================================
12198 // Branch Instructions
12199 
12200 // Jump Direct - Label defines a relative address from JMP+1
12201 instruct jmpDir(label labl)
12202 %{
12203   match(Goto);
12204   effect(USE labl);
12205 
12206   ins_cost(300);
12207   format %{ "jmp     $labl" %}
12208   size(5);
12209   opcode(0xE9);
12210   ins_encode(OpcP, Lbl(labl));
12211   ins_pipe(pipe_jmp);
12212   ins_pc_relative(1);
12213 %}
12214 
12215 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12216 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12217 %{
12218   match(If cop cr);
12219   effect(USE labl);
12220 
12221   ins_cost(300);
12222   format %{ "j$cop     $labl" %}
12223   size(6);
12224   opcode(0x0F, 0x80);
12225   ins_encode(Jcc(cop, labl));
12226   ins_pipe(pipe_jcc);
12227   ins_pc_relative(1);
12228 %}
12229 
12230 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12231 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12232 %{
12233   match(CountedLoopEnd cop cr);
12234   effect(USE labl);
12235 
12236   ins_cost(300);
12237   format %{ "j$cop     $labl\t# loop end" %}
12238   size(6);
12239   opcode(0x0F, 0x80);
12240   ins_encode(Jcc(cop, labl));
12241   ins_pipe(pipe_jcc);
12242   ins_pc_relative(1);
12243 %}
12244 
12245 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12246 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12247   match(CountedLoopEnd cop cmp);
12248   effect(USE labl);
12249 
12250   ins_cost(300);
12251   format %{ "j$cop,u   $labl\t# loop end" %}
12252   size(6);
12253   opcode(0x0F, 0x80);
12254   ins_encode(Jcc(cop, labl));
12255   ins_pipe(pipe_jcc);
12256   ins_pc_relative(1);
12257 %}
12258 
12259 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12260   match(CountedLoopEnd cop cmp);
12261   effect(USE labl);
12262 
12263   ins_cost(200);
12264   format %{ "j$cop,u   $labl\t# loop end" %}
12265   size(6);
12266   opcode(0x0F, 0x80);
12267   ins_encode(Jcc(cop, labl));
12268   ins_pipe(pipe_jcc);
12269   ins_pc_relative(1);
12270 %}
12271 
12272 // Jump Direct Conditional - using unsigned comparison
12273 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12274   match(If cop cmp);
12275   effect(USE labl);
12276 
12277   ins_cost(300);
12278   format %{ "j$cop,u  $labl" %}
12279   size(6);
12280   opcode(0x0F, 0x80);
12281   ins_encode(Jcc(cop, labl));
12282   ins_pipe(pipe_jcc);
12283   ins_pc_relative(1);
12284 %}
12285 
12286 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12287   match(If cop cmp);
12288   effect(USE labl);
12289 
12290   ins_cost(200);
12291   format %{ "j$cop,u  $labl" %}
12292   size(6);
12293   opcode(0x0F, 0x80);
12294   ins_encode(Jcc(cop, labl));
12295   ins_pipe(pipe_jcc);
12296   ins_pc_relative(1);
12297 %}
12298 
12299 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12300   match(If cop cmp);
12301   effect(USE labl);
12302 
12303   ins_cost(200);
12304   format %{ $$template
12305     if ($cop$$cmpcode == Assembler::notEqual) {
12306       $$emit$$"jp,u   $labl\n\t"
12307       $$emit$$"j$cop,u   $labl"
12308     } else {
12309       $$emit$$"jp,u   done\n\t"
12310       $$emit$$"j$cop,u   $labl\n\t"
12311       $$emit$$"done:"
12312     }
12313   %}
12314   size(12);
12315   opcode(0x0F, 0x80);
12316   ins_encode %{
12317     Label* l = $labl$$label;
12318     $$$emit8$primary;
12319     emit_cc(cbuf, $secondary, Assembler::parity);
12320     int parity_disp = -1;
12321     if ($cop$$cmpcode == Assembler::notEqual) {
12322        // the two jumps 6 bytes apart so the jump distances are too
12323        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12324     } else if ($cop$$cmpcode == Assembler::equal) {
12325        parity_disp = 6;
12326     } else {
12327        ShouldNotReachHere();
12328     }
12329     emit_d32(cbuf, parity_disp);
12330     $$$emit8$primary;
12331     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12332     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12333     emit_d32(cbuf, disp);
12334   %}
12335   ins_pipe(pipe_jcc);
12336   ins_pc_relative(1);
12337 %}
12338 
12339 // ============================================================================
12340 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12341 // superklass array for an instance of the superklass.  Set a hidden
12342 // internal cache on a hit (cache is checked with exposed code in
12343 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12344 // encoding ALSO sets flags.
12345 
12346 instruct partialSubtypeCheck(rdi_RegP result,
12347                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12348                              rFlagsReg cr)
12349 %{
12350   match(Set result (PartialSubtypeCheck sub super));
12351   effect(KILL rcx, KILL cr);
12352 
12353   ins_cost(1100);  // slightly larger than the next version
12354   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12355             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12356             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12357             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12358             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12359             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12360             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12361     "miss:\t" %}
12362 
12363   opcode(0x1); // Force a XOR of RDI
12364   ins_encode(enc_PartialSubtypeCheck());
12365   ins_pipe(pipe_slow);
12366 %}
12367 
12368 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12369                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12370                                      immP0 zero,
12371                                      rdi_RegP result)
12372 %{
12373   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12374   effect(KILL rcx, KILL result);
12375 
12376   ins_cost(1000);
12377   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12378             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12379             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12380             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12381             "jne,s   miss\t\t# Missed: flags nz\n\t"
12382             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12383     "miss:\t" %}
12384 
12385   opcode(0x0); // No need to XOR RDI
12386   ins_encode(enc_PartialSubtypeCheck());
12387   ins_pipe(pipe_slow);
12388 %}
12389 
12390 // ============================================================================
12391 // Branch Instructions -- short offset versions
12392 //
12393 // These instructions are used to replace jumps of a long offset (the default
12394 // match) with jumps of a shorter offset.  These instructions are all tagged
12395 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12396 // match rules in general matching.  Instead, the ADLC generates a conversion
12397 // method in the MachNode which can be used to do in-place replacement of the
12398 // long variant with the shorter variant.  The compiler will determine if a
12399 // branch can be taken by the is_short_branch_offset() predicate in the machine
12400 // specific code section of the file.
12401 
12402 // Jump Direct - Label defines a relative address from JMP+1
12403 instruct jmpDir_short(label labl) %{
12404   match(Goto);
12405   effect(USE labl);
12406 
12407   ins_cost(300);
12408   format %{ "jmp,s   $labl" %}
12409   size(2);
12410   opcode(0xEB);
12411   ins_encode(OpcP, LblShort(labl));
12412   ins_pipe(pipe_jmp);
12413   ins_pc_relative(1);
12414   ins_short_branch(1);
12415 %}
12416 
12417 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12418 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12419   match(If cop cr);
12420   effect(USE labl);
12421 
12422   ins_cost(300);
12423   format %{ "j$cop,s   $labl" %}
12424   size(2);
12425   opcode(0x70);
12426   ins_encode(JccShort(cop, labl));
12427   ins_pipe(pipe_jcc);
12428   ins_pc_relative(1);
12429   ins_short_branch(1);
12430 %}
12431 
12432 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12433 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12434   match(CountedLoopEnd cop cr);
12435   effect(USE labl);
12436 
12437   ins_cost(300);
12438   format %{ "j$cop,s   $labl\t# loop end" %}
12439   size(2);
12440   opcode(0x70);
12441   ins_encode(JccShort(cop, labl));
12442   ins_pipe(pipe_jcc);
12443   ins_pc_relative(1);
12444   ins_short_branch(1);
12445 %}
12446 
12447 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12448 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12449   match(CountedLoopEnd cop cmp);
12450   effect(USE labl);
12451 
12452   ins_cost(300);
12453   format %{ "j$cop,us  $labl\t# loop end" %}
12454   size(2);
12455   opcode(0x70);
12456   ins_encode(JccShort(cop, labl));
12457   ins_pipe(pipe_jcc);
12458   ins_pc_relative(1);
12459   ins_short_branch(1);
12460 %}
12461 
12462 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12463   match(CountedLoopEnd cop cmp);
12464   effect(USE labl);
12465 
12466   ins_cost(300);
12467   format %{ "j$cop,us  $labl\t# loop end" %}
12468   size(2);
12469   opcode(0x70);
12470   ins_encode(JccShort(cop, labl));
12471   ins_pipe(pipe_jcc);
12472   ins_pc_relative(1);
12473   ins_short_branch(1);
12474 %}
12475 
12476 // Jump Direct Conditional - using unsigned comparison
12477 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12478   match(If cop cmp);
12479   effect(USE labl);
12480 
12481   ins_cost(300);
12482   format %{ "j$cop,us  $labl" %}
12483   size(2);
12484   opcode(0x70);
12485   ins_encode(JccShort(cop, labl));
12486   ins_pipe(pipe_jcc);
12487   ins_pc_relative(1);
12488   ins_short_branch(1);
12489 %}
12490 
12491 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12492   match(If cop cmp);
12493   effect(USE labl);
12494 
12495   ins_cost(300);
12496   format %{ "j$cop,us  $labl" %}
12497   size(2);
12498   opcode(0x70);
12499   ins_encode(JccShort(cop, labl));
12500   ins_pipe(pipe_jcc);
12501   ins_pc_relative(1);
12502   ins_short_branch(1);
12503 %}
12504 
12505 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12506   match(If cop cmp);
12507   effect(USE labl);
12508 
12509   ins_cost(300);
12510   format %{ $$template
12511     if ($cop$$cmpcode == Assembler::notEqual) {
12512       $$emit$$"jp,u,s   $labl\n\t"
12513       $$emit$$"j$cop,u,s   $labl"
12514     } else {
12515       $$emit$$"jp,u,s   done\n\t"
12516       $$emit$$"j$cop,u,s  $labl\n\t"
12517       $$emit$$"done:"
12518     }
12519   %}
12520   size(4);
12521   opcode(0x70);
12522   ins_encode %{
12523     Label* l = $labl$$label;
12524     emit_cc(cbuf, $primary, Assembler::parity);
12525     int parity_disp = -1;
12526     if ($cop$$cmpcode == Assembler::notEqual) {
12527       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12528     } else if ($cop$$cmpcode == Assembler::equal) {
12529       parity_disp = 2;
12530     } else {
12531       ShouldNotReachHere();
12532     }
12533     emit_d8(cbuf, parity_disp);
12534     emit_cc(cbuf, $primary, $cop$$cmpcode);
12535     int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12536     emit_d8(cbuf, disp);
12537     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12538     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12539   %}
12540   ins_pipe(pipe_jcc);
12541   ins_pc_relative(1);
12542   ins_short_branch(1);
12543 %}
12544 
12545 // ============================================================================
12546 // inlined locking and unlocking
12547 
12548 instruct cmpFastLock(rFlagsReg cr,
12549                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12550 %{
12551   match(Set cr (FastLock object box));
12552   effect(TEMP tmp, TEMP scr);
12553 
12554   ins_cost(300);
12555   format %{ "fastlock $object,$box,$tmp,$scr" %}
12556   ins_encode(Fast_Lock(object, box, tmp, scr));
12557   ins_pipe(pipe_slow);
12558   ins_pc_relative(1);
12559 %}
12560 
12561 instruct cmpFastUnlock(rFlagsReg cr,
12562                        rRegP object, rax_RegP box, rRegP tmp)
12563 %{
12564   match(Set cr (FastUnlock object box));
12565   effect(TEMP tmp);
12566 
12567   ins_cost(300);
12568   format %{ "fastunlock $object, $box, $tmp" %}
12569   ins_encode(Fast_Unlock(object, box, tmp));
12570   ins_pipe(pipe_slow);
12571   ins_pc_relative(1);
12572 %}
12573 
12574 
12575 // ============================================================================
12576 // Safepoint Instructions
12577 instruct safePoint_poll(rFlagsReg cr)
12578 %{
12579   match(SafePoint);
12580   effect(KILL cr);
12581 
12582   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12583             "# Safepoint: poll for GC" %}
12584   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12585   ins_cost(125);
12586   ins_encode(enc_safepoint_poll);
12587   ins_pipe(ialu_reg_mem);
12588 %}
12589 
12590 // ============================================================================
12591 // Procedure Call/Return Instructions
12592 // Call Java Static Instruction
12593 // Note: If this code changes, the corresponding ret_addr_offset() and
12594 //       compute_padding() functions will have to be adjusted.
12595 instruct CallStaticJavaDirect(method meth) %{
12596   match(CallStaticJava);
12597   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12598   effect(USE meth);
12599 
12600   ins_cost(300);
12601   format %{ "call,static " %}
12602   opcode(0xE8); /* E8 cd */
12603   ins_encode(Java_Static_Call(meth), call_epilog);
12604   ins_pipe(pipe_slow);
12605   ins_pc_relative(1);
12606   ins_alignment(4);
12607 %}
12608 
12609 // Call Java Static Instruction (method handle version)
12610 // Note: If this code changes, the corresponding ret_addr_offset() and
12611 //       compute_padding() functions will have to be adjusted.
12612 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp) %{
12613   match(CallStaticJava);
12614   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12615   effect(USE meth);
12616   // RBP is saved by all callees (for interpreter stack correction).
12617   // We use it here for a similar purpose, in {preserve,restore}_SP.
12618 
12619   ins_cost(300);
12620   format %{ "call,static/MethodHandle " %}
12621   opcode(0xE8); /* E8 cd */
12622   ins_encode(preserve_SP,
12623              Java_Static_Call(meth),
12624              restore_SP,
12625              call_epilog);
12626   ins_pipe(pipe_slow);
12627   ins_pc_relative(1);
12628   ins_alignment(4);
12629 %}
12630 
12631 // Call Java Dynamic Instruction
12632 // Note: If this code changes, the corresponding ret_addr_offset() and
12633 //       compute_padding() functions will have to be adjusted.
12634 instruct CallDynamicJavaDirect(method meth)
12635 %{
12636   match(CallDynamicJava);
12637   effect(USE meth);
12638 
12639   ins_cost(300);
12640   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12641             "call,dynamic " %}
12642   opcode(0xE8); /* E8 cd */
12643   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12644   ins_pipe(pipe_slow);
12645   ins_pc_relative(1);
12646   ins_alignment(4);
12647 %}
12648 
12649 // Call Runtime Instruction
12650 instruct CallRuntimeDirect(method meth)
12651 %{
12652   match(CallRuntime);
12653   effect(USE meth);
12654 
12655   ins_cost(300);
12656   format %{ "call,runtime " %}
12657   opcode(0xE8); /* E8 cd */
12658   ins_encode(Java_To_Runtime(meth));
12659   ins_pipe(pipe_slow);
12660   ins_pc_relative(1);
12661 %}
12662 
12663 // Call runtime without safepoint
12664 instruct CallLeafDirect(method meth)
12665 %{
12666   match(CallLeaf);
12667   effect(USE meth);
12668 
12669   ins_cost(300);
12670   format %{ "call_leaf,runtime " %}
12671   opcode(0xE8); /* E8 cd */
12672   ins_encode(Java_To_Runtime(meth));
12673   ins_pipe(pipe_slow);
12674   ins_pc_relative(1);
12675 %}
12676 
12677 // Call runtime without safepoint
12678 instruct CallLeafNoFPDirect(method meth)
12679 %{
12680   match(CallLeafNoFP);
12681   effect(USE meth);
12682 
12683   ins_cost(300);
12684   format %{ "call_leaf_nofp,runtime " %}
12685   opcode(0xE8); /* E8 cd */
12686   ins_encode(Java_To_Runtime(meth));
12687   ins_pipe(pipe_slow);
12688   ins_pc_relative(1);
12689 %}
12690 
12691 // Return Instruction
12692 // Remove the return address & jump to it.
12693 // Notice: We always emit a nop after a ret to make sure there is room
12694 // for safepoint patching
12695 instruct Ret()
12696 %{
12697   match(Return);
12698 
12699   format %{ "ret" %}
12700   opcode(0xC3);
12701   ins_encode(OpcP);
12702   ins_pipe(pipe_jmp);
12703 %}
12704 
12705 // Tail Call; Jump from runtime stub to Java code.
12706 // Also known as an 'interprocedural jump'.
12707 // Target of jump will eventually return to caller.
12708 // TailJump below removes the return address.
12709 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12710 %{
12711   match(TailCall jump_target method_oop);
12712 
12713   ins_cost(300);
12714   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12715   opcode(0xFF, 0x4); /* Opcode FF /4 */
12716   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12717   ins_pipe(pipe_jmp);
12718 %}
12719 
12720 // Tail Jump; remove the return address; jump to target.
12721 // TailCall above leaves the return address around.
12722 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12723 %{
12724   match(TailJump jump_target ex_oop);
12725 
12726   ins_cost(300);
12727   format %{ "popq    rdx\t# pop return address\n\t"
12728             "jmp     $jump_target" %}
12729   opcode(0xFF, 0x4); /* Opcode FF /4 */
12730   ins_encode(Opcode(0x5a), // popq rdx
12731              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12732   ins_pipe(pipe_jmp);
12733 %}
12734 
12735 // Create exception oop: created by stack-crawling runtime code.
12736 // Created exception is now available to this handler, and is setup
12737 // just prior to jumping to this handler.  No code emitted.
12738 instruct CreateException(rax_RegP ex_oop)
12739 %{
12740   match(Set ex_oop (CreateEx));
12741 
12742   size(0);
12743   // use the following format syntax
12744   format %{ "# exception oop is in rax; no code emitted" %}
12745   ins_encode();
12746   ins_pipe(empty);
12747 %}
12748 
12749 // Rethrow exception:
12750 // The exception oop will come in the first argument position.
12751 // Then JUMP (not call) to the rethrow stub code.
12752 instruct RethrowException()
12753 %{
12754   match(Rethrow);
12755 
12756   // use the following format syntax
12757   format %{ "jmp     rethrow_stub" %}
12758   ins_encode(enc_rethrow);
12759   ins_pipe(pipe_jmp);
12760 %}
12761 
12762 
12763 //----------PEEPHOLE RULES-----------------------------------------------------
12764 // These must follow all instruction definitions as they use the names
12765 // defined in the instructions definitions.
12766 //
12767 // peepmatch ( root_instr_name [preceding_instruction]* );
12768 //
12769 // peepconstraint %{
12770 // (instruction_number.operand_name relational_op instruction_number.operand_name
12771 //  [, ...] );
12772 // // instruction numbers are zero-based using left to right order in peepmatch
12773 //
12774 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12775 // // provide an instruction_number.operand_name for each operand that appears
12776 // // in the replacement instruction's match rule
12777 //
12778 // ---------VM FLAGS---------------------------------------------------------
12779 //
12780 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12781 //
12782 // Each peephole rule is given an identifying number starting with zero and
12783 // increasing by one in the order seen by the parser.  An individual peephole
12784 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12785 // on the command-line.
12786 //
12787 // ---------CURRENT LIMITATIONS----------------------------------------------
12788 //
12789 // Only match adjacent instructions in same basic block
12790 // Only equality constraints
12791 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12792 // Only one replacement instruction
12793 //
12794 // ---------EXAMPLE----------------------------------------------------------
12795 //
12796 // // pertinent parts of existing instructions in architecture description
12797 // instruct movI(rRegI dst, rRegI src)
12798 // %{
12799 //   match(Set dst (CopyI src));
12800 // %}
12801 //
12802 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12803 // %{
12804 //   match(Set dst (AddI dst src));
12805 //   effect(KILL cr);
12806 // %}
12807 //
12808 // // Change (inc mov) to lea
12809 // peephole %{
12810 //   // increment preceeded by register-register move
12811 //   peepmatch ( incI_rReg movI );
12812 //   // require that the destination register of the increment
12813 //   // match the destination register of the move
12814 //   peepconstraint ( 0.dst == 1.dst );
12815 //   // construct a replacement instruction that sets
12816 //   // the destination to ( move's source register + one )
12817 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12818 // %}
12819 //
12820 
12821 // Implementation no longer uses movX instructions since
12822 // machine-independent system no longer uses CopyX nodes.
12823 //
12824 // peephole
12825 // %{
12826 //   peepmatch (incI_rReg movI);
12827 //   peepconstraint (0.dst == 1.dst);
12828 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12829 // %}
12830 
12831 // peephole
12832 // %{
12833 //   peepmatch (decI_rReg movI);
12834 //   peepconstraint (0.dst == 1.dst);
12835 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12836 // %}
12837 
12838 // peephole
12839 // %{
12840 //   peepmatch (addI_rReg_imm movI);
12841 //   peepconstraint (0.dst == 1.dst);
12842 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12843 // %}
12844 
12845 // peephole
12846 // %{
12847 //   peepmatch (incL_rReg movL);
12848 //   peepconstraint (0.dst == 1.dst);
12849 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12850 // %}
12851 
12852 // peephole
12853 // %{
12854 //   peepmatch (decL_rReg movL);
12855 //   peepconstraint (0.dst == 1.dst);
12856 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12857 // %}
12858 
12859 // peephole
12860 // %{
12861 //   peepmatch (addL_rReg_imm movL);
12862 //   peepconstraint (0.dst == 1.dst);
12863 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12864 // %}
12865 
12866 // peephole
12867 // %{
12868 //   peepmatch (addP_rReg_imm movP);
12869 //   peepconstraint (0.dst == 1.dst);
12870 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12871 // %}
12872 
12873 // // Change load of spilled value to only a spill
12874 // instruct storeI(memory mem, rRegI src)
12875 // %{
12876 //   match(Set mem (StoreI mem src));
12877 // %}
12878 //
12879 // instruct loadI(rRegI dst, memory mem)
12880 // %{
12881 //   match(Set dst (LoadI mem));
12882 // %}
12883 //
12884 
12885 peephole
12886 %{
12887   peepmatch (loadI storeI);
12888   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12889   peepreplace (storeI(1.mem 1.mem 1.src));
12890 %}
12891 
12892 peephole
12893 %{
12894   peepmatch (loadL storeL);
12895   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12896   peepreplace (storeL(1.mem 1.mem 1.src));
12897 %}
12898 
12899 //----------SMARTSPILL RULES---------------------------------------------------
12900 // These must follow all instruction definitions as they use the names
12901 // defined in the instructions definitions.