1 //
   2 // Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input.
 578 // Since amd64 does not have absolute addressing but RIP-relative
 579 // addressing and the polling page is within 2G, it doesn't.
 580 bool SafePointNode::needs_polling_address_input()
 581 {
 582   return false;
 583 }
 584 
 585 //
 586 // Compute padding required for nodes which need alignment
 587 //
 588 
 589 // The address of the call instruction needs to be 4-byte aligned to
 590 // ensure that it does not span a cache line so that it can be patched.
 591 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 592 {
 593   current_offset += 1; // skip call opcode byte
 594   return round_to(current_offset, alignment_required()) - current_offset;
 595 }
 596 
 597 // The address of the call instruction needs to be 4-byte aligned to
 598 // ensure that it does not span a cache line so that it can be patched.
 599 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 600 {
 601   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 602   current_offset += 1; // skip call opcode byte
 603   return round_to(current_offset, alignment_required()) - current_offset;
 604 }
 605 
 606 // The address of the call instruction needs to be 4-byte aligned to
 607 // ensure that it does not span a cache line so that it can be patched.
 608 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 609 {
 610   current_offset += 11; // skip movq instruction + call opcode byte
 611   return round_to(current_offset, alignment_required()) - current_offset;
 612 }
 613 
 614 #ifndef PRODUCT
 615 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 616 {
 617   st->print("INT3");
 618 }
 619 #endif
 620 
 621 // EMIT_RM()
 622 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 623   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 624   cbuf.insts()->emit_int8(c);
 625 }
 626 
 627 // EMIT_CC()
 628 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 629   unsigned char c = (unsigned char) (f1 | f2);
 630   cbuf.insts()->emit_int8(c);
 631 }
 632 
 633 // EMIT_OPCODE()
 634 void emit_opcode(CodeBuffer &cbuf, int code) {
 635   cbuf.insts()->emit_int8((unsigned char) code);
 636 }
 637 
 638 // EMIT_OPCODE() w/ relocation information
 639 void emit_opcode(CodeBuffer &cbuf,
 640                  int code, relocInfo::relocType reloc, int offset, int format)
 641 {
 642   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 643   emit_opcode(cbuf, code);
 644 }
 645 
 646 // EMIT_D8()
 647 void emit_d8(CodeBuffer &cbuf, int d8) {
 648   cbuf.insts()->emit_int8((unsigned char) d8);
 649 }
 650 
 651 // EMIT_D16()
 652 void emit_d16(CodeBuffer &cbuf, int d16) {
 653   cbuf.insts()->emit_int16(d16);
 654 }
 655 
 656 // EMIT_D32()
 657 void emit_d32(CodeBuffer &cbuf, int d32) {
 658   cbuf.insts()->emit_int32(d32);
 659 }
 660 
 661 // EMIT_D64()
 662 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 663   cbuf.insts()->emit_int64(d64);
 664 }
 665 
 666 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 667 void emit_d32_reloc(CodeBuffer& cbuf,
 668                     int d32,
 669                     relocInfo::relocType reloc,
 670                     int format)
 671 {
 672   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 673   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 674   cbuf.insts()->emit_int32(d32);
 675 }
 676 
 677 // emit 32 bit value and construct relocation entry from RelocationHolder
 678 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 679 #ifdef ASSERT
 680   if (rspec.reloc()->type() == relocInfo::oop_type &&
 681       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 682     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 683   }
 684 #endif
 685   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 686   cbuf.insts()->emit_int32(d32);
 687 }
 688 
 689 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 690   address next_ip = cbuf.insts_end() + 4;
 691   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 692                  external_word_Relocation::spec(addr),
 693                  RELOC_DISP32);
 694 }
 695 
 696 
 697 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 698 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 699   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 700   cbuf.insts()->emit_int64(d64);
 701 }
 702 
 703 // emit 64 bit value and construct relocation entry from RelocationHolder
 704 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 705 #ifdef ASSERT
 706   if (rspec.reloc()->type() == relocInfo::oop_type &&
 707       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 708     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 709            "cannot embed scavengable oops in code");
 710   }
 711 #endif
 712   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 713   cbuf.insts()->emit_int64(d64);
 714 }
 715 
 716 // Access stack slot for load or store
 717 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 718 {
 719   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 720   if (-0x80 <= disp && disp < 0x80) {
 721     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 722     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 723     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 724   } else {
 725     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 726     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 727     emit_d32(cbuf, disp);     // Displacement // R/M byte
 728   }
 729 }
 730 
 731    // rRegI ereg, memory mem) %{    // emit_reg_mem
 732 void encode_RegMem(CodeBuffer &cbuf,
 733                    int reg,
 734                    int base, int index, int scale, int disp, bool disp_is_oop)
 735 {
 736   assert(!disp_is_oop, "cannot have disp");
 737   int regenc = reg & 7;
 738   int baseenc = base & 7;
 739   int indexenc = index & 7;
 740 
 741   // There is no index & no scale, use form without SIB byte
 742   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 743     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 744     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 745       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 746     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 747       // If 8-bit displacement, mode 0x1
 748       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 749       emit_d8(cbuf, disp);
 750     } else {
 751       // If 32-bit displacement
 752       if (base == -1) { // Special flag for absolute address
 753         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 754         if (disp_is_oop) {
 755           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 756         } else {
 757           emit_d32(cbuf, disp);
 758         }
 759       } else {
 760         // Normal base + offset
 761         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 762         if (disp_is_oop) {
 763           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 764         } else {
 765           emit_d32(cbuf, disp);
 766         }
 767       }
 768     }
 769   } else {
 770     // Else, encode with the SIB byte
 771     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 772     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 773       // If no displacement
 774       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 775       emit_rm(cbuf, scale, indexenc, baseenc);
 776     } else {
 777       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 778         // If 8-bit displacement, mode 0x1
 779         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 780         emit_rm(cbuf, scale, indexenc, baseenc);
 781         emit_d8(cbuf, disp);
 782       } else {
 783         // If 32-bit displacement
 784         if (base == 0x04 ) {
 785           emit_rm(cbuf, 0x2, regenc, 0x4);
 786           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 787         } else {
 788           emit_rm(cbuf, 0x2, regenc, 0x4);
 789           emit_rm(cbuf, scale, indexenc, baseenc); // *
 790         }
 791         if (disp_is_oop) {
 792           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 793         } else {
 794           emit_d32(cbuf, disp);
 795         }
 796       }
 797     }
 798   }
 799 }
 800 
 801 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 802 {
 803   if (dstenc != srcenc) {
 804     if (dstenc < 8) {
 805       if (srcenc >= 8) {
 806         emit_opcode(cbuf, Assembler::REX_B);
 807         srcenc -= 8;
 808       }
 809     } else {
 810       if (srcenc < 8) {
 811         emit_opcode(cbuf, Assembler::REX_R);
 812       } else {
 813         emit_opcode(cbuf, Assembler::REX_RB);
 814         srcenc -= 8;
 815       }
 816       dstenc -= 8;
 817     }
 818 
 819     emit_opcode(cbuf, 0x8B);
 820     emit_rm(cbuf, 0x3, dstenc, srcenc);
 821   }
 822 }
 823 
 824 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 825   if( dst_encoding == src_encoding ) {
 826     // reg-reg copy, use an empty encoding
 827   } else {
 828     MacroAssembler _masm(&cbuf);
 829 
 830     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 831   }
 832 }
 833 
 834 
 835 //=============================================================================
 836 #ifndef PRODUCT
 837 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 838 {
 839   Compile* C = ra_->C;
 840 
 841   int framesize = C->frame_slots() << LogBytesPerInt;
 842   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 843   // Remove wordSize for return adr already pushed
 844   // and another for the RBP we are going to save
 845   framesize -= 2*wordSize;
 846   bool need_nop = true;
 847 
 848   // Calls to C2R adapters often do not accept exceptional returns.
 849   // We require that their callers must bang for them.  But be
 850   // careful, because some VM calls (such as call site linkage) can
 851   // use several kilobytes of stack.  But the stack safety zone should
 852   // account for that.  See bugs 4446381, 4468289, 4497237.
 853   if (C->need_stack_bang(framesize)) {
 854     st->print_cr("# stack bang"); st->print("\t");
 855     need_nop = false;
 856   }
 857   st->print_cr("pushq   rbp"); st->print("\t");
 858 
 859   if (VerifyStackAtCalls) {
 860     // Majik cookie to verify stack depth
 861     st->print_cr("pushq   0xffffffffbadb100d"
 862                   "\t# Majik cookie for stack depth check");
 863     st->print("\t");
 864     framesize -= wordSize; // Remove 2 for cookie
 865     need_nop = false;
 866   }
 867 
 868   if (framesize) {
 869     st->print("subq    rsp, #%d\t# Create frame", framesize);
 870     if (framesize < 0x80 && need_nop) {
 871       st->print("\n\tnop\t# nop for patch_verified_entry");
 872     }
 873   }
 874 }
 875 #endif
 876 
 877 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 878 {
 879   Compile* C = ra_->C;
 880 
 881   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 882   // NativeJump::patch_verified_entry will be able to patch out the entry
 883   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 884   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 885   // 6 bytes. So if we don't do the fldcw or the push then we must
 886   // use the 6 byte frame allocation even if we have no frame. :-(
 887   // If method sets FPU control word do it now
 888 
 889   int framesize = C->frame_slots() << LogBytesPerInt;
 890   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 891   // Remove wordSize for return adr already pushed
 892   // and another for the RBP we are going to save
 893   framesize -= 2*wordSize;
 894   bool need_nop = true;
 895 
 896   // Calls to C2R adapters often do not accept exceptional returns.
 897   // We require that their callers must bang for them.  But be
 898   // careful, because some VM calls (such as call site linkage) can
 899   // use several kilobytes of stack.  But the stack safety zone should
 900   // account for that.  See bugs 4446381, 4468289, 4497237.
 901   if (C->need_stack_bang(framesize)) {
 902     MacroAssembler masm(&cbuf);
 903     masm.generate_stack_overflow_check(framesize);
 904     need_nop = false;
 905   }
 906 
 907   // We always push rbp so that on return to interpreter rbp will be
 908   // restored correctly and we can correct the stack.
 909   emit_opcode(cbuf, 0x50 | RBP_enc);
 910 
 911   if (VerifyStackAtCalls) {
 912     // Majik cookie to verify stack depth
 913     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 914     emit_d32(cbuf, 0xbadb100d);
 915     framesize -= wordSize; // Remove 2 for cookie
 916     need_nop = false;
 917   }
 918 
 919   if (framesize) {
 920     emit_opcode(cbuf, Assembler::REX_W);
 921     if (framesize < 0x80) {
 922       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 923       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 924       emit_d8(cbuf, framesize);
 925       if (need_nop) {
 926         emit_opcode(cbuf, 0x90); // nop
 927       }
 928     } else {
 929       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 930       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 931       emit_d32(cbuf, framesize);
 932     }
 933   }
 934 
 935   C->set_frame_complete(cbuf.insts_size());
 936 
 937 #ifdef ASSERT
 938   if (VerifyStackAtCalls) {
 939     Label L;
 940     MacroAssembler masm(&cbuf);
 941     masm.push(rax);
 942     masm.mov(rax, rsp);
 943     masm.andptr(rax, StackAlignmentInBytes-1);
 944     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 945     masm.pop(rax);
 946     masm.jcc(Assembler::equal, L);
 947     masm.stop("Stack is not properly aligned!");
 948     masm.bind(L);
 949   }
 950 #endif
 951 }
 952 
 953 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 954 {
 955   return MachNode::size(ra_); // too many variables; just compute it
 956                               // the hard way
 957 }
 958 
 959 int MachPrologNode::reloc() const
 960 {
 961   return 0; // a large enough number
 962 }
 963 
 964 //=============================================================================
 965 #ifndef PRODUCT
 966 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 967 {
 968   Compile* C = ra_->C;
 969   int framesize = C->frame_slots() << LogBytesPerInt;
 970   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 971   // Remove word for return adr already pushed
 972   // and RBP
 973   framesize -= 2*wordSize;
 974 
 975   if (framesize) {
 976     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
 977     st->print("\t");
 978   }
 979 
 980   st->print_cr("popq\trbp");
 981   if (do_polling() && C->is_method_compilation()) {
 982     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
 983                   "# Safepoint: poll for GC");
 984     st->print("\t");
 985   }
 986 }
 987 #endif
 988 
 989 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 990 {
 991   Compile* C = ra_->C;
 992   int framesize = C->frame_slots() << LogBytesPerInt;
 993   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 994   // Remove word for return adr already pushed
 995   // and RBP
 996   framesize -= 2*wordSize;
 997 
 998   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 999 
1000   if (framesize) {
1001     emit_opcode(cbuf, Assembler::REX_W);
1002     if (framesize < 0x80) {
1003       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1004       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1005       emit_d8(cbuf, framesize);
1006     } else {
1007       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1008       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1009       emit_d32(cbuf, framesize);
1010     }
1011   }
1012 
1013   // popq rbp
1014   emit_opcode(cbuf, 0x58 | RBP_enc);
1015 
1016   if (do_polling() && C->is_method_compilation()) {
1017     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1018     // XXX reg_mem doesn't support RIP-relative addressing yet
1019     cbuf.set_insts_mark();
1020     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_return_type, 0); // XXX
1021     emit_opcode(cbuf, 0x85); // testl
1022     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1023     // cbuf.insts_mark() is beginning of instruction
1024     emit_d32_reloc(cbuf, os::get_polling_page());
1025 //                    relocInfo::poll_return_type,
1026   }
1027 }
1028 
1029 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1030 {
1031   Compile* C = ra_->C;
1032   int framesize = C->frame_slots() << LogBytesPerInt;
1033   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1034   // Remove word for return adr already pushed
1035   // and RBP
1036   framesize -= 2*wordSize;
1037 
1038   uint size = 0;
1039 
1040   if (do_polling() && C->is_method_compilation()) {
1041     size += 6;
1042   }
1043 
1044   // count popq rbp
1045   size++;
1046 
1047   if (framesize) {
1048     if (framesize < 0x80) {
1049       size += 4;
1050     } else if (framesize) {
1051       size += 7;
1052     }
1053   }
1054 
1055   return size;
1056 }
1057 
1058 int MachEpilogNode::reloc() const
1059 {
1060   return 2; // a large enough number
1061 }
1062 
1063 const Pipeline* MachEpilogNode::pipeline() const
1064 {
1065   return MachNode::pipeline_class();
1066 }
1067 
1068 int MachEpilogNode::safepoint_offset() const
1069 {
1070   return 0;
1071 }
1072 
1073 //=============================================================================
1074 
1075 enum RC {
1076   rc_bad,
1077   rc_int,
1078   rc_float,
1079   rc_stack
1080 };
1081 
1082 static enum RC rc_class(OptoReg::Name reg)
1083 {
1084   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1085 
1086   if (OptoReg::is_stack(reg)) return rc_stack;
1087 
1088   VMReg r = OptoReg::as_VMReg(reg);
1089 
1090   if (r->is_Register()) return rc_int;
1091 
1092   assert(r->is_XMMRegister(), "must be");
1093   return rc_float;
1094 }
1095 
1096 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1097                                        PhaseRegAlloc* ra_,
1098                                        bool do_size,
1099                                        outputStream* st) const
1100 {
1101 
1102   // Get registers to move
1103   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1104   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1105   OptoReg::Name dst_second = ra_->get_reg_second(this);
1106   OptoReg::Name dst_first = ra_->get_reg_first(this);
1107 
1108   enum RC src_second_rc = rc_class(src_second);
1109   enum RC src_first_rc = rc_class(src_first);
1110   enum RC dst_second_rc = rc_class(dst_second);
1111   enum RC dst_first_rc = rc_class(dst_first);
1112 
1113   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1114          "must move at least 1 register" );
1115 
1116   if (src_first == dst_first && src_second == dst_second) {
1117     // Self copy, no move
1118     return 0;
1119   } else if (src_first_rc == rc_stack) {
1120     // mem ->
1121     if (dst_first_rc == rc_stack) {
1122       // mem -> mem
1123       assert(src_second != dst_first, "overlap");
1124       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1125           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1126         // 64-bit
1127         int src_offset = ra_->reg2offset(src_first);
1128         int dst_offset = ra_->reg2offset(dst_first);
1129         if (cbuf) {
1130           emit_opcode(*cbuf, 0xFF);
1131           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1132 
1133           emit_opcode(*cbuf, 0x8F);
1134           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1135 
1136 #ifndef PRODUCT
1137         } else if (!do_size) {
1138           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1139                      "popq    [rsp + #%d]",
1140                      src_offset,
1141                      dst_offset);
1142 #endif
1143         }
1144         return
1145           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1146           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1147       } else {
1148         // 32-bit
1149         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1150         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1151         // No pushl/popl, so:
1152         int src_offset = ra_->reg2offset(src_first);
1153         int dst_offset = ra_->reg2offset(dst_first);
1154         if (cbuf) {
1155           emit_opcode(*cbuf, Assembler::REX_W);
1156           emit_opcode(*cbuf, 0x89);
1157           emit_opcode(*cbuf, 0x44);
1158           emit_opcode(*cbuf, 0x24);
1159           emit_opcode(*cbuf, 0xF8);
1160 
1161           emit_opcode(*cbuf, 0x8B);
1162           encode_RegMem(*cbuf,
1163                         RAX_enc,
1164                         RSP_enc, 0x4, 0, src_offset,
1165                         false);
1166 
1167           emit_opcode(*cbuf, 0x89);
1168           encode_RegMem(*cbuf,
1169                         RAX_enc,
1170                         RSP_enc, 0x4, 0, dst_offset,
1171                         false);
1172 
1173           emit_opcode(*cbuf, Assembler::REX_W);
1174           emit_opcode(*cbuf, 0x8B);
1175           emit_opcode(*cbuf, 0x44);
1176           emit_opcode(*cbuf, 0x24);
1177           emit_opcode(*cbuf, 0xF8);
1178 
1179 #ifndef PRODUCT
1180         } else if (!do_size) {
1181           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1182                      "movl    rax, [rsp + #%d]\n\t"
1183                      "movl    [rsp + #%d], rax\n\t"
1184                      "movq    rax, [rsp - #8]",
1185                      src_offset,
1186                      dst_offset);
1187 #endif
1188         }
1189         return
1190           5 + // movq
1191           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1192           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1193           5; // movq
1194       }
1195     } else if (dst_first_rc == rc_int) {
1196       // mem -> gpr
1197       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1198           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1199         // 64-bit
1200         int offset = ra_->reg2offset(src_first);
1201         if (cbuf) {
1202           if (Matcher::_regEncode[dst_first] < 8) {
1203             emit_opcode(*cbuf, Assembler::REX_W);
1204           } else {
1205             emit_opcode(*cbuf, Assembler::REX_WR);
1206           }
1207           emit_opcode(*cbuf, 0x8B);
1208           encode_RegMem(*cbuf,
1209                         Matcher::_regEncode[dst_first],
1210                         RSP_enc, 0x4, 0, offset,
1211                         false);
1212 #ifndef PRODUCT
1213         } else if (!do_size) {
1214           st->print("movq    %s, [rsp + #%d]\t# spill",
1215                      Matcher::regName[dst_first],
1216                      offset);
1217 #endif
1218         }
1219         return
1220           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1221       } else {
1222         // 32-bit
1223         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1224         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1225         int offset = ra_->reg2offset(src_first);
1226         if (cbuf) {
1227           if (Matcher::_regEncode[dst_first] >= 8) {
1228             emit_opcode(*cbuf, Assembler::REX_R);
1229           }
1230           emit_opcode(*cbuf, 0x8B);
1231           encode_RegMem(*cbuf,
1232                         Matcher::_regEncode[dst_first],
1233                         RSP_enc, 0x4, 0, offset,
1234                         false);
1235 #ifndef PRODUCT
1236         } else if (!do_size) {
1237           st->print("movl    %s, [rsp + #%d]\t# spill",
1238                      Matcher::regName[dst_first],
1239                      offset);
1240 #endif
1241         }
1242         return
1243           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1244           ((Matcher::_regEncode[dst_first] < 8)
1245            ? 3
1246            : 4); // REX
1247       }
1248     } else if (dst_first_rc == rc_float) {
1249       // mem-> xmm
1250       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1251           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1252         // 64-bit
1253         int offset = ra_->reg2offset(src_first);
1254         if (cbuf) {
1255           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1256           if (Matcher::_regEncode[dst_first] >= 8) {
1257             emit_opcode(*cbuf, Assembler::REX_R);
1258           }
1259           emit_opcode(*cbuf, 0x0F);
1260           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1261           encode_RegMem(*cbuf,
1262                         Matcher::_regEncode[dst_first],
1263                         RSP_enc, 0x4, 0, offset,
1264                         false);
1265 #ifndef PRODUCT
1266         } else if (!do_size) {
1267           st->print("%s  %s, [rsp + #%d]\t# spill",
1268                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1269                      Matcher::regName[dst_first],
1270                      offset);
1271 #endif
1272         }
1273         return
1274           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1275           ((Matcher::_regEncode[dst_first] < 8)
1276            ? 5
1277            : 6); // REX
1278       } else {
1279         // 32-bit
1280         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1281         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1282         int offset = ra_->reg2offset(src_first);
1283         if (cbuf) {
1284           emit_opcode(*cbuf, 0xF3);
1285           if (Matcher::_regEncode[dst_first] >= 8) {
1286             emit_opcode(*cbuf, Assembler::REX_R);
1287           }
1288           emit_opcode(*cbuf, 0x0F);
1289           emit_opcode(*cbuf, 0x10);
1290           encode_RegMem(*cbuf,
1291                         Matcher::_regEncode[dst_first],
1292                         RSP_enc, 0x4, 0, offset,
1293                         false);
1294 #ifndef PRODUCT
1295         } else if (!do_size) {
1296           st->print("movss   %s, [rsp + #%d]\t# spill",
1297                      Matcher::regName[dst_first],
1298                      offset);
1299 #endif
1300         }
1301         return
1302           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1303           ((Matcher::_regEncode[dst_first] < 8)
1304            ? 5
1305            : 6); // REX
1306       }
1307     }
1308   } else if (src_first_rc == rc_int) {
1309     // gpr ->
1310     if (dst_first_rc == rc_stack) {
1311       // gpr -> mem
1312       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1313           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1314         // 64-bit
1315         int offset = ra_->reg2offset(dst_first);
1316         if (cbuf) {
1317           if (Matcher::_regEncode[src_first] < 8) {
1318             emit_opcode(*cbuf, Assembler::REX_W);
1319           } else {
1320             emit_opcode(*cbuf, Assembler::REX_WR);
1321           }
1322           emit_opcode(*cbuf, 0x89);
1323           encode_RegMem(*cbuf,
1324                         Matcher::_regEncode[src_first],
1325                         RSP_enc, 0x4, 0, offset,
1326                         false);
1327 #ifndef PRODUCT
1328         } else if (!do_size) {
1329           st->print("movq    [rsp + #%d], %s\t# spill",
1330                      offset,
1331                      Matcher::regName[src_first]);
1332 #endif
1333         }
1334         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1335       } else {
1336         // 32-bit
1337         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1338         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1339         int offset = ra_->reg2offset(dst_first);
1340         if (cbuf) {
1341           if (Matcher::_regEncode[src_first] >= 8) {
1342             emit_opcode(*cbuf, Assembler::REX_R);
1343           }
1344           emit_opcode(*cbuf, 0x89);
1345           encode_RegMem(*cbuf,
1346                         Matcher::_regEncode[src_first],
1347                         RSP_enc, 0x4, 0, offset,
1348                         false);
1349 #ifndef PRODUCT
1350         } else if (!do_size) {
1351           st->print("movl    [rsp + #%d], %s\t# spill",
1352                      offset,
1353                      Matcher::regName[src_first]);
1354 #endif
1355         }
1356         return
1357           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1358           ((Matcher::_regEncode[src_first] < 8)
1359            ? 3
1360            : 4); // REX
1361       }
1362     } else if (dst_first_rc == rc_int) {
1363       // gpr -> gpr
1364       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1365           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1366         // 64-bit
1367         if (cbuf) {
1368           if (Matcher::_regEncode[dst_first] < 8) {
1369             if (Matcher::_regEncode[src_first] < 8) {
1370               emit_opcode(*cbuf, Assembler::REX_W);
1371             } else {
1372               emit_opcode(*cbuf, Assembler::REX_WB);
1373             }
1374           } else {
1375             if (Matcher::_regEncode[src_first] < 8) {
1376               emit_opcode(*cbuf, Assembler::REX_WR);
1377             } else {
1378               emit_opcode(*cbuf, Assembler::REX_WRB);
1379             }
1380           }
1381           emit_opcode(*cbuf, 0x8B);
1382           emit_rm(*cbuf, 0x3,
1383                   Matcher::_regEncode[dst_first] & 7,
1384                   Matcher::_regEncode[src_first] & 7);
1385 #ifndef PRODUCT
1386         } else if (!do_size) {
1387           st->print("movq    %s, %s\t# spill",
1388                      Matcher::regName[dst_first],
1389                      Matcher::regName[src_first]);
1390 #endif
1391         }
1392         return 3; // REX
1393       } else {
1394         // 32-bit
1395         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1396         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1397         if (cbuf) {
1398           if (Matcher::_regEncode[dst_first] < 8) {
1399             if (Matcher::_regEncode[src_first] >= 8) {
1400               emit_opcode(*cbuf, Assembler::REX_B);
1401             }
1402           } else {
1403             if (Matcher::_regEncode[src_first] < 8) {
1404               emit_opcode(*cbuf, Assembler::REX_R);
1405             } else {
1406               emit_opcode(*cbuf, Assembler::REX_RB);
1407             }
1408           }
1409           emit_opcode(*cbuf, 0x8B);
1410           emit_rm(*cbuf, 0x3,
1411                   Matcher::_regEncode[dst_first] & 7,
1412                   Matcher::_regEncode[src_first] & 7);
1413 #ifndef PRODUCT
1414         } else if (!do_size) {
1415           st->print("movl    %s, %s\t# spill",
1416                      Matcher::regName[dst_first],
1417                      Matcher::regName[src_first]);
1418 #endif
1419         }
1420         return
1421           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1422           ? 2
1423           : 3; // REX
1424       }
1425     } else if (dst_first_rc == rc_float) {
1426       // gpr -> xmm
1427       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1428           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1429         // 64-bit
1430         if (cbuf) {
1431           emit_opcode(*cbuf, 0x66);
1432           if (Matcher::_regEncode[dst_first] < 8) {
1433             if (Matcher::_regEncode[src_first] < 8) {
1434               emit_opcode(*cbuf, Assembler::REX_W);
1435             } else {
1436               emit_opcode(*cbuf, Assembler::REX_WB);
1437             }
1438           } else {
1439             if (Matcher::_regEncode[src_first] < 8) {
1440               emit_opcode(*cbuf, Assembler::REX_WR);
1441             } else {
1442               emit_opcode(*cbuf, Assembler::REX_WRB);
1443             }
1444           }
1445           emit_opcode(*cbuf, 0x0F);
1446           emit_opcode(*cbuf, 0x6E);
1447           emit_rm(*cbuf, 0x3,
1448                   Matcher::_regEncode[dst_first] & 7,
1449                   Matcher::_regEncode[src_first] & 7);
1450 #ifndef PRODUCT
1451         } else if (!do_size) {
1452           st->print("movdq   %s, %s\t# spill",
1453                      Matcher::regName[dst_first],
1454                      Matcher::regName[src_first]);
1455 #endif
1456         }
1457         return 5; // REX
1458       } else {
1459         // 32-bit
1460         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1461         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1462         if (cbuf) {
1463           emit_opcode(*cbuf, 0x66);
1464           if (Matcher::_regEncode[dst_first] < 8) {
1465             if (Matcher::_regEncode[src_first] >= 8) {
1466               emit_opcode(*cbuf, Assembler::REX_B);
1467             }
1468           } else {
1469             if (Matcher::_regEncode[src_first] < 8) {
1470               emit_opcode(*cbuf, Assembler::REX_R);
1471             } else {
1472               emit_opcode(*cbuf, Assembler::REX_RB);
1473             }
1474           }
1475           emit_opcode(*cbuf, 0x0F);
1476           emit_opcode(*cbuf, 0x6E);
1477           emit_rm(*cbuf, 0x3,
1478                   Matcher::_regEncode[dst_first] & 7,
1479                   Matcher::_regEncode[src_first] & 7);
1480 #ifndef PRODUCT
1481         } else if (!do_size) {
1482           st->print("movdl   %s, %s\t# spill",
1483                      Matcher::regName[dst_first],
1484                      Matcher::regName[src_first]);
1485 #endif
1486         }
1487         return
1488           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1489           ? 4
1490           : 5; // REX
1491       }
1492     }
1493   } else if (src_first_rc == rc_float) {
1494     // xmm ->
1495     if (dst_first_rc == rc_stack) {
1496       // xmm -> mem
1497       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1498           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1499         // 64-bit
1500         int offset = ra_->reg2offset(dst_first);
1501         if (cbuf) {
1502           emit_opcode(*cbuf, 0xF2);
1503           if (Matcher::_regEncode[src_first] >= 8) {
1504               emit_opcode(*cbuf, Assembler::REX_R);
1505           }
1506           emit_opcode(*cbuf, 0x0F);
1507           emit_opcode(*cbuf, 0x11);
1508           encode_RegMem(*cbuf,
1509                         Matcher::_regEncode[src_first],
1510                         RSP_enc, 0x4, 0, offset,
1511                         false);
1512 #ifndef PRODUCT
1513         } else if (!do_size) {
1514           st->print("movsd   [rsp + #%d], %s\t# spill",
1515                      offset,
1516                      Matcher::regName[src_first]);
1517 #endif
1518         }
1519         return
1520           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1521           ((Matcher::_regEncode[src_first] < 8)
1522            ? 5
1523            : 6); // REX
1524       } else {
1525         // 32-bit
1526         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1527         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1528         int offset = ra_->reg2offset(dst_first);
1529         if (cbuf) {
1530           emit_opcode(*cbuf, 0xF3);
1531           if (Matcher::_regEncode[src_first] >= 8) {
1532               emit_opcode(*cbuf, Assembler::REX_R);
1533           }
1534           emit_opcode(*cbuf, 0x0F);
1535           emit_opcode(*cbuf, 0x11);
1536           encode_RegMem(*cbuf,
1537                         Matcher::_regEncode[src_first],
1538                         RSP_enc, 0x4, 0, offset,
1539                         false);
1540 #ifndef PRODUCT
1541         } else if (!do_size) {
1542           st->print("movss   [rsp + #%d], %s\t# spill",
1543                      offset,
1544                      Matcher::regName[src_first]);
1545 #endif
1546         }
1547         return
1548           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1549           ((Matcher::_regEncode[src_first] < 8)
1550            ? 5
1551            : 6); // REX
1552       }
1553     } else if (dst_first_rc == rc_int) {
1554       // xmm -> gpr
1555       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1556           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1557         // 64-bit
1558         if (cbuf) {
1559           emit_opcode(*cbuf, 0x66);
1560           if (Matcher::_regEncode[dst_first] < 8) {
1561             if (Matcher::_regEncode[src_first] < 8) {
1562               emit_opcode(*cbuf, Assembler::REX_W);
1563             } else {
1564               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1565             }
1566           } else {
1567             if (Matcher::_regEncode[src_first] < 8) {
1568               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1569             } else {
1570               emit_opcode(*cbuf, Assembler::REX_WRB);
1571             }
1572           }
1573           emit_opcode(*cbuf, 0x0F);
1574           emit_opcode(*cbuf, 0x7E);
1575           emit_rm(*cbuf, 0x3,
1576                   Matcher::_regEncode[src_first] & 7,
1577                   Matcher::_regEncode[dst_first] & 7);
1578 #ifndef PRODUCT
1579         } else if (!do_size) {
1580           st->print("movdq   %s, %s\t# spill",
1581                      Matcher::regName[dst_first],
1582                      Matcher::regName[src_first]);
1583 #endif
1584         }
1585         return 5; // REX
1586       } else {
1587         // 32-bit
1588         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1589         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1590         if (cbuf) {
1591           emit_opcode(*cbuf, 0x66);
1592           if (Matcher::_regEncode[dst_first] < 8) {
1593             if (Matcher::_regEncode[src_first] >= 8) {
1594               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1595             }
1596           } else {
1597             if (Matcher::_regEncode[src_first] < 8) {
1598               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1599             } else {
1600               emit_opcode(*cbuf, Assembler::REX_RB);
1601             }
1602           }
1603           emit_opcode(*cbuf, 0x0F);
1604           emit_opcode(*cbuf, 0x7E);
1605           emit_rm(*cbuf, 0x3,
1606                   Matcher::_regEncode[src_first] & 7,
1607                   Matcher::_regEncode[dst_first] & 7);
1608 #ifndef PRODUCT
1609         } else if (!do_size) {
1610           st->print("movdl   %s, %s\t# spill",
1611                      Matcher::regName[dst_first],
1612                      Matcher::regName[src_first]);
1613 #endif
1614         }
1615         return
1616           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1617           ? 4
1618           : 5; // REX
1619       }
1620     } else if (dst_first_rc == rc_float) {
1621       // xmm -> xmm
1622       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1623           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1624         // 64-bit
1625         if (cbuf) {
1626           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1627           if (Matcher::_regEncode[dst_first] < 8) {
1628             if (Matcher::_regEncode[src_first] >= 8) {
1629               emit_opcode(*cbuf, Assembler::REX_B);
1630             }
1631           } else {
1632             if (Matcher::_regEncode[src_first] < 8) {
1633               emit_opcode(*cbuf, Assembler::REX_R);
1634             } else {
1635               emit_opcode(*cbuf, Assembler::REX_RB);
1636             }
1637           }
1638           emit_opcode(*cbuf, 0x0F);
1639           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1640           emit_rm(*cbuf, 0x3,
1641                   Matcher::_regEncode[dst_first] & 7,
1642                   Matcher::_regEncode[src_first] & 7);
1643 #ifndef PRODUCT
1644         } else if (!do_size) {
1645           st->print("%s  %s, %s\t# spill",
1646                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1647                      Matcher::regName[dst_first],
1648                      Matcher::regName[src_first]);
1649 #endif
1650         }
1651         return
1652           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1653           ? 4
1654           : 5; // REX
1655       } else {
1656         // 32-bit
1657         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1658         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1659         if (cbuf) {
1660           if (!UseXmmRegToRegMoveAll)
1661             emit_opcode(*cbuf, 0xF3);
1662           if (Matcher::_regEncode[dst_first] < 8) {
1663             if (Matcher::_regEncode[src_first] >= 8) {
1664               emit_opcode(*cbuf, Assembler::REX_B);
1665             }
1666           } else {
1667             if (Matcher::_regEncode[src_first] < 8) {
1668               emit_opcode(*cbuf, Assembler::REX_R);
1669             } else {
1670               emit_opcode(*cbuf, Assembler::REX_RB);
1671             }
1672           }
1673           emit_opcode(*cbuf, 0x0F);
1674           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1675           emit_rm(*cbuf, 0x3,
1676                   Matcher::_regEncode[dst_first] & 7,
1677                   Matcher::_regEncode[src_first] & 7);
1678 #ifndef PRODUCT
1679         } else if (!do_size) {
1680           st->print("%s  %s, %s\t# spill",
1681                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1682                      Matcher::regName[dst_first],
1683                      Matcher::regName[src_first]);
1684 #endif
1685         }
1686         return
1687           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1688           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1689           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1690       }
1691     }
1692   }
1693 
1694   assert(0," foo ");
1695   Unimplemented();
1696 
1697   return 0;
1698 }
1699 
1700 #ifndef PRODUCT
1701 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1702 {
1703   implementation(NULL, ra_, false, st);
1704 }
1705 #endif
1706 
1707 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1708 {
1709   implementation(&cbuf, ra_, false, NULL);
1710 }
1711 
1712 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1713 {
1714   return implementation(NULL, ra_, true, NULL);
1715 }
1716 
1717 //=============================================================================
1718 #ifndef PRODUCT
1719 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1720 {
1721   st->print("nop \t# %d bytes pad for loops and calls", _count);
1722 }
1723 #endif
1724 
1725 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1726 {
1727   MacroAssembler _masm(&cbuf);
1728   __ nop(_count);
1729 }
1730 
1731 uint MachNopNode::size(PhaseRegAlloc*) const
1732 {
1733   return _count;
1734 }
1735 
1736 
1737 //=============================================================================
1738 #ifndef PRODUCT
1739 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1740 {
1741   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1742   int reg = ra_->get_reg_first(this);
1743   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1744             Matcher::regName[reg], offset);
1745 }
1746 #endif
1747 
1748 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1749 {
1750   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1751   int reg = ra_->get_encode(this);
1752   if (offset >= 0x80) {
1753     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1754     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1755     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1756     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1757     emit_d32(cbuf, offset);
1758   } else {
1759     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1760     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1761     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1762     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1763     emit_d8(cbuf, offset);
1764   }
1765 }
1766 
1767 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1768 {
1769   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1770   return (offset < 0x80) ? 5 : 8; // REX
1771 }
1772 
1773 //=============================================================================
1774 
1775 // emit call stub, compiled java to interpreter
1776 void emit_java_to_interp(CodeBuffer& cbuf)
1777 {
1778   // Stub is fixed up when the corresponding call is converted from
1779   // calling compiled code to calling interpreted code.
1780   // movq rbx, 0
1781   // jmp -5 # to self
1782 
1783   address mark = cbuf.insts_mark();  // get mark within main instrs section
1784 
1785   // Note that the code buffer's insts_mark is always relative to insts.
1786   // That's why we must use the macroassembler to generate a stub.
1787   MacroAssembler _masm(&cbuf);
1788 
1789   address base =
1790   __ start_a_stub(Compile::MAX_stubs_size);
1791   if (base == NULL)  return;  // CodeBuffer::expand failed
1792   // static stub relocation stores the instruction address of the call
1793   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1794   // static stub relocation also tags the methodOop in the code-stream.
1795   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1796   // This is recognized as unresolved by relocs/nativeinst/ic code
1797   __ jump(RuntimeAddress(__ pc()));
1798 
1799   // Update current stubs pointer and restore insts_end.
1800   __ end_a_stub();
1801 }
1802 
1803 // size of call stub, compiled java to interpretor
1804 uint size_java_to_interp()
1805 {
1806   return 15;  // movq (1+1+8); jmp (1+4)
1807 }
1808 
1809 // relocation entries for call stub, compiled java to interpretor
1810 uint reloc_java_to_interp()
1811 {
1812   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1813 }
1814 
1815 //=============================================================================
1816 #ifndef PRODUCT
1817 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1818 {
1819   if (UseCompressedOops) {
1820     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1821     if (Universe::narrow_oop_shift() != 0) {
1822       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1823     }
1824     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1825   } else {
1826     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1827                  "# Inline cache check");
1828   }
1829   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1830   st->print_cr("\tnop\t# nops to align entry point");
1831 }
1832 #endif
1833 
1834 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1835 {
1836   MacroAssembler masm(&cbuf);
1837   uint insts_size = cbuf.insts_size();
1838   if (UseCompressedOops) {
1839     masm.load_klass(rscratch1, j_rarg0);
1840     masm.cmpptr(rax, rscratch1);
1841   } else {
1842     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1843   }
1844 
1845   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1846 
1847   /* WARNING these NOPs are critical so that verified entry point is properly
1848      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1849   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1850   if (OptoBreakpoint) {
1851     // Leave space for int3
1852     nops_cnt -= 1;
1853   }
1854   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1855   if (nops_cnt > 0)
1856     masm.nop(nops_cnt);
1857 }
1858 
1859 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1860 {
1861   return MachNode::size(ra_); // too many variables; just compute it
1862                               // the hard way
1863 }
1864 
1865 
1866 //=============================================================================
1867 uint size_exception_handler()
1868 {
1869   // NativeCall instruction size is the same as NativeJump.
1870   // Note that this value is also credited (in output.cpp) to
1871   // the size of the code section.
1872   return NativeJump::instruction_size;
1873 }
1874 
1875 // Emit exception handler code.
1876 int emit_exception_handler(CodeBuffer& cbuf)
1877 {
1878 
1879   // Note that the code buffer's insts_mark is always relative to insts.
1880   // That's why we must use the macroassembler to generate a handler.
1881   MacroAssembler _masm(&cbuf);
1882   address base =
1883   __ start_a_stub(size_exception_handler());
1884   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1885   int offset = __ offset();
1886   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1887   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1888   __ end_a_stub();
1889   return offset;
1890 }
1891 
1892 uint size_deopt_handler()
1893 {
1894   // three 5 byte instructions
1895   return 15;
1896 }
1897 
1898 // Emit deopt handler code.
1899 int emit_deopt_handler(CodeBuffer& cbuf)
1900 {
1901 
1902   // Note that the code buffer's insts_mark is always relative to insts.
1903   // That's why we must use the macroassembler to generate a handler.
1904   MacroAssembler _masm(&cbuf);
1905   address base =
1906   __ start_a_stub(size_deopt_handler());
1907   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1908   int offset = __ offset();
1909   address the_pc = (address) __ pc();
1910   Label next;
1911   // push a "the_pc" on the stack without destroying any registers
1912   // as they all may be live.
1913 
1914   // push address of "next"
1915   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1916   __ bind(next);
1917   // adjust it so it matches "the_pc"
1918   __ subptr(Address(rsp, 0), __ offset() - offset);
1919   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1920   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1921   __ end_a_stub();
1922   return offset;
1923 }
1924 
1925 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1926   int mark = cbuf.insts()->mark_off();
1927   MacroAssembler _masm(&cbuf);
1928   address double_address = __ double_constant(x);
1929   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1930   emit_d32_reloc(cbuf,
1931                  (int) (double_address - cbuf.insts_end() - 4),
1932                  internal_word_Relocation::spec(double_address),
1933                  RELOC_DISP32);
1934 }
1935 
1936 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1937   int mark = cbuf.insts()->mark_off();
1938   MacroAssembler _masm(&cbuf);
1939   address float_address = __ float_constant(x);
1940   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1941   emit_d32_reloc(cbuf,
1942                  (int) (float_address - cbuf.insts_end() - 4),
1943                  internal_word_Relocation::spec(float_address),
1944                  RELOC_DISP32);
1945 }
1946 
1947 
1948 const bool Matcher::match_rule_supported(int opcode) {
1949   if (!has_match_rule(opcode))
1950     return false;
1951 
1952   return true;  // Per default match rules are supported.
1953 }
1954 
1955 int Matcher::regnum_to_fpu_offset(int regnum)
1956 {
1957   return regnum - 32; // The FP registers are in the second chunk
1958 }
1959 
1960 // This is UltraSparc specific, true just means we have fast l2f conversion
1961 const bool Matcher::convL2FSupported(void) {
1962   return true;
1963 }
1964 
1965 // Vector width in bytes
1966 const uint Matcher::vector_width_in_bytes(void) {
1967   return 8;
1968 }
1969 
1970 // Vector ideal reg
1971 const uint Matcher::vector_ideal_reg(void) {
1972   return Op_RegD;
1973 }
1974 
1975 // Is this branch offset short enough that a short branch can be used?
1976 //
1977 // NOTE: If the platform does not provide any short branch variants, then
1978 //       this method should return false for offset 0.
1979 bool Matcher::is_short_branch_offset(int rule, int offset) {
1980   // the short version of jmpConUCF2 contains multiple branches,
1981   // making the reach slightly less
1982   if (rule == jmpConUCF2_rule)
1983     return (-126 <= offset && offset <= 125);
1984   return (-128 <= offset && offset <= 127);
1985 }
1986 
1987 const bool Matcher::isSimpleConstant64(jlong value) {
1988   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1989   //return value == (int) value;  // Cf. storeImmL and immL32.
1990 
1991   // Probably always true, even if a temp register is required.
1992   return true;
1993 }
1994 
1995 // The ecx parameter to rep stosq for the ClearArray node is in words.
1996 const bool Matcher::init_array_count_is_in_bytes = false;
1997 
1998 // Threshold size for cleararray.
1999 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2000 
2001 // Should the Matcher clone shifts on addressing modes, expecting them
2002 // to be subsumed into complex addressing expressions or compute them
2003 // into registers?  True for Intel but false for most RISCs
2004 const bool Matcher::clone_shift_expressions = true;
2005 
2006 bool Matcher::narrow_oop_use_complex_address() {
2007   assert(UseCompressedOops, "only for compressed oops code");
2008   return (LogMinObjAlignmentInBytes <= 3);
2009 }
2010 
2011 // Is it better to copy float constants, or load them directly from
2012 // memory?  Intel can load a float constant from a direct address,
2013 // requiring no extra registers.  Most RISCs will have to materialize
2014 // an address into a register first, so they would do better to copy
2015 // the constant from stack.
2016 const bool Matcher::rematerialize_float_constants = true; // XXX
2017 
2018 // If CPU can load and store mis-aligned doubles directly then no
2019 // fixup is needed.  Else we split the double into 2 integer pieces
2020 // and move it piece-by-piece.  Only happens when passing doubles into
2021 // C code as the Java calling convention forces doubles to be aligned.
2022 const bool Matcher::misaligned_doubles_ok = true;
2023 
2024 // No-op on amd64
2025 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2026 
2027 // Advertise here if the CPU requires explicit rounding operations to
2028 // implement the UseStrictFP mode.
2029 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2030 
2031 // Are floats conerted to double when stored to stack during deoptimization?
2032 // On x64 it is stored without convertion so we can use normal access.
2033 bool Matcher::float_in_double() { return false; }
2034 
2035 // Do ints take an entire long register or just half?
2036 const bool Matcher::int_in_long = true;
2037 
2038 // Return whether or not this register is ever used as an argument.
2039 // This function is used on startup to build the trampoline stubs in
2040 // generateOptoStub.  Registers not mentioned will be killed by the VM
2041 // call in the trampoline, and arguments in those registers not be
2042 // available to the callee.
2043 bool Matcher::can_be_java_arg(int reg)
2044 {
2045   return
2046     reg ==  RDI_num || reg ==  RDI_H_num ||
2047     reg ==  RSI_num || reg ==  RSI_H_num ||
2048     reg ==  RDX_num || reg ==  RDX_H_num ||
2049     reg ==  RCX_num || reg ==  RCX_H_num ||
2050     reg ==   R8_num || reg ==   R8_H_num ||
2051     reg ==   R9_num || reg ==   R9_H_num ||
2052     reg ==  R12_num || reg ==  R12_H_num ||
2053     reg == XMM0_num || reg == XMM0_H_num ||
2054     reg == XMM1_num || reg == XMM1_H_num ||
2055     reg == XMM2_num || reg == XMM2_H_num ||
2056     reg == XMM3_num || reg == XMM3_H_num ||
2057     reg == XMM4_num || reg == XMM4_H_num ||
2058     reg == XMM5_num || reg == XMM5_H_num ||
2059     reg == XMM6_num || reg == XMM6_H_num ||
2060     reg == XMM7_num || reg == XMM7_H_num;
2061 }
2062 
2063 bool Matcher::is_spillable_arg(int reg)
2064 {
2065   return can_be_java_arg(reg);
2066 }
2067 
2068 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2069   // In 64 bit mode a code which use multiply when
2070   // devisor is constant is faster than hardware
2071   // DIV instruction (it uses MulHiL).
2072   return false;
2073 }
2074 
2075 // Register for DIVI projection of divmodI
2076 RegMask Matcher::divI_proj_mask() {
2077   return INT_RAX_REG_mask;
2078 }
2079 
2080 // Register for MODI projection of divmodI
2081 RegMask Matcher::modI_proj_mask() {
2082   return INT_RDX_REG_mask;
2083 }
2084 
2085 // Register for DIVL projection of divmodL
2086 RegMask Matcher::divL_proj_mask() {
2087   return LONG_RAX_REG_mask;
2088 }
2089 
2090 // Register for MODL projection of divmodL
2091 RegMask Matcher::modL_proj_mask() {
2092   return LONG_RDX_REG_mask;
2093 }
2094 
2095 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2096   return PTR_RBP_REG_mask;
2097 }
2098 
2099 static Address build_address(int b, int i, int s, int d) {
2100   Register index = as_Register(i);
2101   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2102   if (index == rsp) {
2103     index = noreg;
2104     scale = Address::no_scale;
2105   }
2106   Address addr(as_Register(b), index, scale, d);
2107   return addr;
2108 }
2109 
2110 %}
2111 
2112 //----------ENCODING BLOCK-----------------------------------------------------
2113 // This block specifies the encoding classes used by the compiler to
2114 // output byte streams.  Encoding classes are parameterized macros
2115 // used by Machine Instruction Nodes in order to generate the bit
2116 // encoding of the instruction.  Operands specify their base encoding
2117 // interface with the interface keyword.  There are currently
2118 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2119 // COND_INTER.  REG_INTER causes an operand to generate a function
2120 // which returns its register number when queried.  CONST_INTER causes
2121 // an operand to generate a function which returns the value of the
2122 // constant when queried.  MEMORY_INTER causes an operand to generate
2123 // four functions which return the Base Register, the Index Register,
2124 // the Scale Value, and the Offset Value of the operand when queried.
2125 // COND_INTER causes an operand to generate six functions which return
2126 // the encoding code (ie - encoding bits for the instruction)
2127 // associated with each basic boolean condition for a conditional
2128 // instruction.
2129 //
2130 // Instructions specify two basic values for encoding.  Again, a
2131 // function is available to check if the constant displacement is an
2132 // oop. They use the ins_encode keyword to specify their encoding
2133 // classes (which must be a sequence of enc_class names, and their
2134 // parameters, specified in the encoding block), and they use the
2135 // opcode keyword to specify, in order, their primary, secondary, and
2136 // tertiary opcode.  Only the opcode sections which a particular
2137 // instruction needs for encoding need to be specified.
2138 encode %{
2139   // Build emit functions for each basic byte or larger field in the
2140   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2141   // from C++ code in the enc_class source block.  Emit functions will
2142   // live in the main source block for now.  In future, we can
2143   // generalize this by adding a syntax that specifies the sizes of
2144   // fields in an order, so that the adlc can build the emit functions
2145   // automagically
2146 
2147   // Emit primary opcode
2148   enc_class OpcP
2149   %{
2150     emit_opcode(cbuf, $primary);
2151   %}
2152 
2153   // Emit secondary opcode
2154   enc_class OpcS
2155   %{
2156     emit_opcode(cbuf, $secondary);
2157   %}
2158 
2159   // Emit tertiary opcode
2160   enc_class OpcT
2161   %{
2162     emit_opcode(cbuf, $tertiary);
2163   %}
2164 
2165   // Emit opcode directly
2166   enc_class Opcode(immI d8)
2167   %{
2168     emit_opcode(cbuf, $d8$$constant);
2169   %}
2170 
2171   // Emit size prefix
2172   enc_class SizePrefix
2173   %{
2174     emit_opcode(cbuf, 0x66);
2175   %}
2176 
2177   enc_class reg(rRegI reg)
2178   %{
2179     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2180   %}
2181 
2182   enc_class reg_reg(rRegI dst, rRegI src)
2183   %{
2184     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2185   %}
2186 
2187   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2188   %{
2189     emit_opcode(cbuf, $opcode$$constant);
2190     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2191   %}
2192 
2193   enc_class cmpfp_fixup()
2194   %{
2195     // jnp,s exit
2196     emit_opcode(cbuf, 0x7B);
2197     emit_d8(cbuf, 0x0A);
2198 
2199     // pushfq
2200     emit_opcode(cbuf, 0x9C);
2201 
2202     // andq $0xffffff2b, (%rsp)
2203     emit_opcode(cbuf, Assembler::REX_W);
2204     emit_opcode(cbuf, 0x81);
2205     emit_opcode(cbuf, 0x24);
2206     emit_opcode(cbuf, 0x24);
2207     emit_d32(cbuf, 0xffffff2b);
2208 
2209     // popfq
2210     emit_opcode(cbuf, 0x9D);
2211 
2212     // nop (target for branch to avoid branch to branch)
2213     emit_opcode(cbuf, 0x90);
2214   %}
2215 
2216   enc_class cmpfp3(rRegI dst)
2217   %{
2218     int dstenc = $dst$$reg;
2219 
2220     // movl $dst, -1
2221     if (dstenc >= 8) {
2222       emit_opcode(cbuf, Assembler::REX_B);
2223     }
2224     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2225     emit_d32(cbuf, -1);
2226 
2227     // jp,s done
2228     emit_opcode(cbuf, 0x7A);
2229     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2230 
2231     // jb,s done
2232     emit_opcode(cbuf, 0x72);
2233     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2234 
2235     // setne $dst
2236     if (dstenc >= 4) {
2237       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2238     }
2239     emit_opcode(cbuf, 0x0F);
2240     emit_opcode(cbuf, 0x95);
2241     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2242 
2243     // movzbl $dst, $dst
2244     if (dstenc >= 4) {
2245       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2246     }
2247     emit_opcode(cbuf, 0x0F);
2248     emit_opcode(cbuf, 0xB6);
2249     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2250   %}
2251 
2252   enc_class cdql_enc(no_rax_rdx_RegI div)
2253   %{
2254     // Full implementation of Java idiv and irem; checks for
2255     // special case as described in JVM spec., p.243 & p.271.
2256     //
2257     //         normal case                           special case
2258     //
2259     // input : rax: dividend                         min_int
2260     //         reg: divisor                          -1
2261     //
2262     // output: rax: quotient  (= rax idiv reg)       min_int
2263     //         rdx: remainder (= rax irem reg)       0
2264     //
2265     //  Code sequnce:
2266     //
2267     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2268     //    5:   75 07/08                jne    e <normal>
2269     //    7:   33 d2                   xor    %edx,%edx
2270     //  [div >= 8 -> offset + 1]
2271     //  [REX_B]
2272     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2273     //    c:   74 03/04                je     11 <done>
2274     // 000000000000000e <normal>:
2275     //    e:   99                      cltd
2276     //  [div >= 8 -> offset + 1]
2277     //  [REX_B]
2278     //    f:   f7 f9                   idiv   $div
2279     // 0000000000000011 <done>:
2280 
2281     // cmp    $0x80000000,%eax
2282     emit_opcode(cbuf, 0x3d);
2283     emit_d8(cbuf, 0x00);
2284     emit_d8(cbuf, 0x00);
2285     emit_d8(cbuf, 0x00);
2286     emit_d8(cbuf, 0x80);
2287 
2288     // jne    e <normal>
2289     emit_opcode(cbuf, 0x75);
2290     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2291 
2292     // xor    %edx,%edx
2293     emit_opcode(cbuf, 0x33);
2294     emit_d8(cbuf, 0xD2);
2295 
2296     // cmp    $0xffffffffffffffff,%ecx
2297     if ($div$$reg >= 8) {
2298       emit_opcode(cbuf, Assembler::REX_B);
2299     }
2300     emit_opcode(cbuf, 0x83);
2301     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2302     emit_d8(cbuf, 0xFF);
2303 
2304     // je     11 <done>
2305     emit_opcode(cbuf, 0x74);
2306     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2307 
2308     // <normal>
2309     // cltd
2310     emit_opcode(cbuf, 0x99);
2311 
2312     // idivl (note: must be emitted by the user of this rule)
2313     // <done>
2314   %}
2315 
2316   enc_class cdqq_enc(no_rax_rdx_RegL div)
2317   %{
2318     // Full implementation of Java ldiv and lrem; checks for
2319     // special case as described in JVM spec., p.243 & p.271.
2320     //
2321     //         normal case                           special case
2322     //
2323     // input : rax: dividend                         min_long
2324     //         reg: divisor                          -1
2325     //
2326     // output: rax: quotient  (= rax idiv reg)       min_long
2327     //         rdx: remainder (= rax irem reg)       0
2328     //
2329     //  Code sequnce:
2330     //
2331     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2332     //    7:   00 00 80
2333     //    a:   48 39 d0                cmp    %rdx,%rax
2334     //    d:   75 08                   jne    17 <normal>
2335     //    f:   33 d2                   xor    %edx,%edx
2336     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2337     //   15:   74 05                   je     1c <done>
2338     // 0000000000000017 <normal>:
2339     //   17:   48 99                   cqto
2340     //   19:   48 f7 f9                idiv   $div
2341     // 000000000000001c <done>:
2342 
2343     // mov    $0x8000000000000000,%rdx
2344     emit_opcode(cbuf, Assembler::REX_W);
2345     emit_opcode(cbuf, 0xBA);
2346     emit_d8(cbuf, 0x00);
2347     emit_d8(cbuf, 0x00);
2348     emit_d8(cbuf, 0x00);
2349     emit_d8(cbuf, 0x00);
2350     emit_d8(cbuf, 0x00);
2351     emit_d8(cbuf, 0x00);
2352     emit_d8(cbuf, 0x00);
2353     emit_d8(cbuf, 0x80);
2354 
2355     // cmp    %rdx,%rax
2356     emit_opcode(cbuf, Assembler::REX_W);
2357     emit_opcode(cbuf, 0x39);
2358     emit_d8(cbuf, 0xD0);
2359 
2360     // jne    17 <normal>
2361     emit_opcode(cbuf, 0x75);
2362     emit_d8(cbuf, 0x08);
2363 
2364     // xor    %edx,%edx
2365     emit_opcode(cbuf, 0x33);
2366     emit_d8(cbuf, 0xD2);
2367 
2368     // cmp    $0xffffffffffffffff,$div
2369     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2370     emit_opcode(cbuf, 0x83);
2371     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2372     emit_d8(cbuf, 0xFF);
2373 
2374     // je     1e <done>
2375     emit_opcode(cbuf, 0x74);
2376     emit_d8(cbuf, 0x05);
2377 
2378     // <normal>
2379     // cqto
2380     emit_opcode(cbuf, Assembler::REX_W);
2381     emit_opcode(cbuf, 0x99);
2382 
2383     // idivq (note: must be emitted by the user of this rule)
2384     // <done>
2385   %}
2386 
2387   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2388   enc_class OpcSE(immI imm)
2389   %{
2390     // Emit primary opcode and set sign-extend bit
2391     // Check for 8-bit immediate, and set sign extend bit in opcode
2392     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2393       emit_opcode(cbuf, $primary | 0x02);
2394     } else {
2395       // 32-bit immediate
2396       emit_opcode(cbuf, $primary);
2397     }
2398   %}
2399 
2400   enc_class OpcSErm(rRegI dst, immI imm)
2401   %{
2402     // OpcSEr/m
2403     int dstenc = $dst$$reg;
2404     if (dstenc >= 8) {
2405       emit_opcode(cbuf, Assembler::REX_B);
2406       dstenc -= 8;
2407     }
2408     // Emit primary opcode and set sign-extend bit
2409     // Check for 8-bit immediate, and set sign extend bit in opcode
2410     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2411       emit_opcode(cbuf, $primary | 0x02);
2412     } else {
2413       // 32-bit immediate
2414       emit_opcode(cbuf, $primary);
2415     }
2416     // Emit r/m byte with secondary opcode, after primary opcode.
2417     emit_rm(cbuf, 0x3, $secondary, dstenc);
2418   %}
2419 
2420   enc_class OpcSErm_wide(rRegL dst, immI imm)
2421   %{
2422     // OpcSEr/m
2423     int dstenc = $dst$$reg;
2424     if (dstenc < 8) {
2425       emit_opcode(cbuf, Assembler::REX_W);
2426     } else {
2427       emit_opcode(cbuf, Assembler::REX_WB);
2428       dstenc -= 8;
2429     }
2430     // Emit primary opcode and set sign-extend bit
2431     // Check for 8-bit immediate, and set sign extend bit in opcode
2432     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2433       emit_opcode(cbuf, $primary | 0x02);
2434     } else {
2435       // 32-bit immediate
2436       emit_opcode(cbuf, $primary);
2437     }
2438     // Emit r/m byte with secondary opcode, after primary opcode.
2439     emit_rm(cbuf, 0x3, $secondary, dstenc);
2440   %}
2441 
2442   enc_class Con8or32(immI imm)
2443   %{
2444     // Check for 8-bit immediate, and set sign extend bit in opcode
2445     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2446       $$$emit8$imm$$constant;
2447     } else {
2448       // 32-bit immediate
2449       $$$emit32$imm$$constant;
2450     }
2451   %}
2452 
2453   enc_class Lbl(label labl)
2454   %{
2455     // JMP, CALL
2456     Label* l = $labl$$label;
2457     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2458   %}
2459 
2460   enc_class LblShort(label labl)
2461   %{
2462     // JMP, CALL
2463     Label* l = $labl$$label;
2464     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2465     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2466     emit_d8(cbuf, disp);
2467   %}
2468 
2469   enc_class opc2_reg(rRegI dst)
2470   %{
2471     // BSWAP
2472     emit_cc(cbuf, $secondary, $dst$$reg);
2473   %}
2474 
2475   enc_class opc3_reg(rRegI dst)
2476   %{
2477     // BSWAP
2478     emit_cc(cbuf, $tertiary, $dst$$reg);
2479   %}
2480 
2481   enc_class reg_opc(rRegI div)
2482   %{
2483     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2484     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2485   %}
2486 
2487   enc_class Jcc(cmpOp cop, label labl)
2488   %{
2489     // JCC
2490     Label* l = $labl$$label;
2491     $$$emit8$primary;
2492     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2493     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2494   %}
2495 
2496   enc_class JccShort (cmpOp cop, label labl)
2497   %{
2498   // JCC
2499     Label *l = $labl$$label;
2500     emit_cc(cbuf, $primary, $cop$$cmpcode);
2501     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2502     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2503     emit_d8(cbuf, disp);
2504   %}
2505 
2506   enc_class enc_cmov(cmpOp cop)
2507   %{
2508     // CMOV
2509     $$$emit8$primary;
2510     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2511   %}
2512 
2513   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2514   %{
2515     // Invert sense of branch from sense of cmov
2516     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2517     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2518                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2519                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2520     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2521     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2522     if ($dst$$reg < 8) {
2523       if ($src$$reg >= 8) {
2524         emit_opcode(cbuf, Assembler::REX_B);
2525       }
2526     } else {
2527       if ($src$$reg < 8) {
2528         emit_opcode(cbuf, Assembler::REX_R);
2529       } else {
2530         emit_opcode(cbuf, Assembler::REX_RB);
2531       }
2532     }
2533     emit_opcode(cbuf, 0x0F);
2534     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2535     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2536   %}
2537 
2538   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2539   %{
2540     // Invert sense of branch from sense of cmov
2541     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2542     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2543 
2544     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2545     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2546     if ($dst$$reg < 8) {
2547       if ($src$$reg >= 8) {
2548         emit_opcode(cbuf, Assembler::REX_B);
2549       }
2550     } else {
2551       if ($src$$reg < 8) {
2552         emit_opcode(cbuf, Assembler::REX_R);
2553       } else {
2554         emit_opcode(cbuf, Assembler::REX_RB);
2555       }
2556     }
2557     emit_opcode(cbuf, 0x0F);
2558     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2559     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2560   %}
2561 
2562   enc_class enc_PartialSubtypeCheck()
2563   %{
2564     Register Rrdi = as_Register(RDI_enc); // result register
2565     Register Rrax = as_Register(RAX_enc); // super class
2566     Register Rrcx = as_Register(RCX_enc); // killed
2567     Register Rrsi = as_Register(RSI_enc); // sub class
2568     Label miss;
2569     const bool set_cond_codes = true;
2570 
2571     MacroAssembler _masm(&cbuf);
2572     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2573                                      NULL, &miss,
2574                                      /*set_cond_codes:*/ true);
2575     if ($primary) {
2576       __ xorptr(Rrdi, Rrdi);
2577     }
2578     __ bind(miss);
2579   %}
2580 
2581   enc_class Java_To_Interpreter(method meth)
2582   %{
2583     // CALL Java_To_Interpreter
2584     // This is the instruction starting address for relocation info.
2585     cbuf.set_insts_mark();
2586     $$$emit8$primary;
2587     // CALL directly to the runtime
2588     emit_d32_reloc(cbuf,
2589                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2590                    runtime_call_Relocation::spec(),
2591                    RELOC_DISP32);
2592   %}
2593 
2594   enc_class preserve_SP %{
2595     debug_only(int off0 = cbuf.insts_size());
2596     MacroAssembler _masm(&cbuf);
2597     // RBP is preserved across all calls, even compiled calls.
2598     // Use it to preserve RSP in places where the callee might change the SP.
2599     __ movptr(rbp_mh_SP_save, rsp);
2600     debug_only(int off1 = cbuf.insts_size());
2601     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2602   %}
2603 
2604   enc_class restore_SP %{
2605     MacroAssembler _masm(&cbuf);
2606     __ movptr(rsp, rbp_mh_SP_save);
2607   %}
2608 
2609   enc_class Java_Static_Call(method meth)
2610   %{
2611     // JAVA STATIC CALL
2612     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2613     // determine who we intended to call.
2614     cbuf.set_insts_mark();
2615     $$$emit8$primary;
2616 
2617     if (!_method) {
2618       emit_d32_reloc(cbuf,
2619                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2620                      runtime_call_Relocation::spec(),
2621                      RELOC_DISP32);
2622     } else if (_optimized_virtual) {
2623       emit_d32_reloc(cbuf,
2624                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2625                      opt_virtual_call_Relocation::spec(),
2626                      RELOC_DISP32);
2627     } else {
2628       emit_d32_reloc(cbuf,
2629                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2630                      static_call_Relocation::spec(),
2631                      RELOC_DISP32);
2632     }
2633     if (_method) {
2634       // Emit stub for static call
2635       emit_java_to_interp(cbuf);
2636     }
2637   %}
2638 
2639   enc_class Java_Dynamic_Call(method meth)
2640   %{
2641     // JAVA DYNAMIC CALL
2642     // !!!!!
2643     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2644     // emit_call_dynamic_prologue( cbuf );
2645     cbuf.set_insts_mark();
2646 
2647     // movq rax, -1
2648     emit_opcode(cbuf, Assembler::REX_W);
2649     emit_opcode(cbuf, 0xB8 | RAX_enc);
2650     emit_d64_reloc(cbuf,
2651                    (int64_t) Universe::non_oop_word(),
2652                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2653     address virtual_call_oop_addr = cbuf.insts_mark();
2654     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2655     // who we intended to call.
2656     cbuf.set_insts_mark();
2657     $$$emit8$primary;
2658     emit_d32_reloc(cbuf,
2659                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2660                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2661                    RELOC_DISP32);
2662   %}
2663 
2664   enc_class Java_Compiled_Call(method meth)
2665   %{
2666     // JAVA COMPILED CALL
2667     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2668 
2669     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2670     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2671 
2672     // callq *disp(%rax)
2673     cbuf.set_insts_mark();
2674     $$$emit8$primary;
2675     if (disp < 0x80) {
2676       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2677       emit_d8(cbuf, disp); // Displacement
2678     } else {
2679       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2680       emit_d32(cbuf, disp); // Displacement
2681     }
2682   %}
2683 
2684   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2685   %{
2686     // SAL, SAR, SHR
2687     int dstenc = $dst$$reg;
2688     if (dstenc >= 8) {
2689       emit_opcode(cbuf, Assembler::REX_B);
2690       dstenc -= 8;
2691     }
2692     $$$emit8$primary;
2693     emit_rm(cbuf, 0x3, $secondary, dstenc);
2694     $$$emit8$shift$$constant;
2695   %}
2696 
2697   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2698   %{
2699     // SAL, SAR, SHR
2700     int dstenc = $dst$$reg;
2701     if (dstenc < 8) {
2702       emit_opcode(cbuf, Assembler::REX_W);
2703     } else {
2704       emit_opcode(cbuf, Assembler::REX_WB);
2705       dstenc -= 8;
2706     }
2707     $$$emit8$primary;
2708     emit_rm(cbuf, 0x3, $secondary, dstenc);
2709     $$$emit8$shift$$constant;
2710   %}
2711 
2712   enc_class load_immI(rRegI dst, immI src)
2713   %{
2714     int dstenc = $dst$$reg;
2715     if (dstenc >= 8) {
2716       emit_opcode(cbuf, Assembler::REX_B);
2717       dstenc -= 8;
2718     }
2719     emit_opcode(cbuf, 0xB8 | dstenc);
2720     $$$emit32$src$$constant;
2721   %}
2722 
2723   enc_class load_immL(rRegL dst, immL src)
2724   %{
2725     int dstenc = $dst$$reg;
2726     if (dstenc < 8) {
2727       emit_opcode(cbuf, Assembler::REX_W);
2728     } else {
2729       emit_opcode(cbuf, Assembler::REX_WB);
2730       dstenc -= 8;
2731     }
2732     emit_opcode(cbuf, 0xB8 | dstenc);
2733     emit_d64(cbuf, $src$$constant);
2734   %}
2735 
2736   enc_class load_immUL32(rRegL dst, immUL32 src)
2737   %{
2738     // same as load_immI, but this time we care about zeroes in the high word
2739     int dstenc = $dst$$reg;
2740     if (dstenc >= 8) {
2741       emit_opcode(cbuf, Assembler::REX_B);
2742       dstenc -= 8;
2743     }
2744     emit_opcode(cbuf, 0xB8 | dstenc);
2745     $$$emit32$src$$constant;
2746   %}
2747 
2748   enc_class load_immL32(rRegL dst, immL32 src)
2749   %{
2750     int dstenc = $dst$$reg;
2751     if (dstenc < 8) {
2752       emit_opcode(cbuf, Assembler::REX_W);
2753     } else {
2754       emit_opcode(cbuf, Assembler::REX_WB);
2755       dstenc -= 8;
2756     }
2757     emit_opcode(cbuf, 0xC7);
2758     emit_rm(cbuf, 0x03, 0x00, dstenc);
2759     $$$emit32$src$$constant;
2760   %}
2761 
2762   enc_class load_immP31(rRegP dst, immP32 src)
2763   %{
2764     // same as load_immI, but this time we care about zeroes in the high word
2765     int dstenc = $dst$$reg;
2766     if (dstenc >= 8) {
2767       emit_opcode(cbuf, Assembler::REX_B);
2768       dstenc -= 8;
2769     }
2770     emit_opcode(cbuf, 0xB8 | dstenc);
2771     $$$emit32$src$$constant;
2772   %}
2773 
2774   enc_class load_immP(rRegP dst, immP src)
2775   %{
2776     int dstenc = $dst$$reg;
2777     if (dstenc < 8) {
2778       emit_opcode(cbuf, Assembler::REX_W);
2779     } else {
2780       emit_opcode(cbuf, Assembler::REX_WB);
2781       dstenc -= 8;
2782     }
2783     emit_opcode(cbuf, 0xB8 | dstenc);
2784     // This next line should be generated from ADLC
2785     if ($src->constant_is_oop()) {
2786       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2787     } else {
2788       emit_d64(cbuf, $src$$constant);
2789     }
2790   %}
2791 
2792   enc_class load_immF(regF dst, immF con)
2793   %{
2794     // XXX reg_mem doesn't support RIP-relative addressing yet
2795     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2796     emit_float_constant(cbuf, $con$$constant);
2797   %}
2798 
2799   enc_class load_immD(regD dst, immD con)
2800   %{
2801     // XXX reg_mem doesn't support RIP-relative addressing yet
2802     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2803     emit_double_constant(cbuf, $con$$constant);
2804   %}
2805 
2806   enc_class load_conF (regF dst, immF con) %{    // Load float constant
2807     emit_opcode(cbuf, 0xF3);
2808     if ($dst$$reg >= 8) {
2809       emit_opcode(cbuf, Assembler::REX_R);
2810     }
2811     emit_opcode(cbuf, 0x0F);
2812     emit_opcode(cbuf, 0x10);
2813     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2814     emit_float_constant(cbuf, $con$$constant);
2815   %}
2816 
2817   enc_class load_conD (regD dst, immD con) %{    // Load double constant
2818     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2819     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2820     if ($dst$$reg >= 8) {
2821       emit_opcode(cbuf, Assembler::REX_R);
2822     }
2823     emit_opcode(cbuf, 0x0F);
2824     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2825     emit_rm(cbuf, 0x0, $dst$$reg & 7, 0x5); // 00 reg 101
2826     emit_double_constant(cbuf, $con$$constant);
2827   %}
2828 
2829   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2830   enc_class enc_copy(rRegI dst, rRegI src)
2831   %{
2832     encode_copy(cbuf, $dst$$reg, $src$$reg);
2833   %}
2834 
2835   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2836   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2837     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2838   %}
2839 
2840   enc_class enc_copy_always(rRegI dst, rRegI src)
2841   %{
2842     int srcenc = $src$$reg;
2843     int dstenc = $dst$$reg;
2844 
2845     if (dstenc < 8) {
2846       if (srcenc >= 8) {
2847         emit_opcode(cbuf, Assembler::REX_B);
2848         srcenc -= 8;
2849       }
2850     } else {
2851       if (srcenc < 8) {
2852         emit_opcode(cbuf, Assembler::REX_R);
2853       } else {
2854         emit_opcode(cbuf, Assembler::REX_RB);
2855         srcenc -= 8;
2856       }
2857       dstenc -= 8;
2858     }
2859 
2860     emit_opcode(cbuf, 0x8B);
2861     emit_rm(cbuf, 0x3, dstenc, srcenc);
2862   %}
2863 
2864   enc_class enc_copy_wide(rRegL dst, rRegL src)
2865   %{
2866     int srcenc = $src$$reg;
2867     int dstenc = $dst$$reg;
2868 
2869     if (dstenc != srcenc) {
2870       if (dstenc < 8) {
2871         if (srcenc < 8) {
2872           emit_opcode(cbuf, Assembler::REX_W);
2873         } else {
2874           emit_opcode(cbuf, Assembler::REX_WB);
2875           srcenc -= 8;
2876         }
2877       } else {
2878         if (srcenc < 8) {
2879           emit_opcode(cbuf, Assembler::REX_WR);
2880         } else {
2881           emit_opcode(cbuf, Assembler::REX_WRB);
2882           srcenc -= 8;
2883         }
2884         dstenc -= 8;
2885       }
2886       emit_opcode(cbuf, 0x8B);
2887       emit_rm(cbuf, 0x3, dstenc, srcenc);
2888     }
2889   %}
2890 
2891   enc_class Con32(immI src)
2892   %{
2893     // Output immediate
2894     $$$emit32$src$$constant;
2895   %}
2896 
2897   enc_class Con64(immL src)
2898   %{
2899     // Output immediate
2900     emit_d64($src$$constant);
2901   %}
2902 
2903   enc_class Con32F_as_bits(immF src)
2904   %{
2905     // Output Float immediate bits
2906     jfloat jf = $src$$constant;
2907     jint jf_as_bits = jint_cast(jf);
2908     emit_d32(cbuf, jf_as_bits);
2909   %}
2910 
2911   enc_class Con16(immI src)
2912   %{
2913     // Output immediate
2914     $$$emit16$src$$constant;
2915   %}
2916 
2917   // How is this different from Con32??? XXX
2918   enc_class Con_d32(immI src)
2919   %{
2920     emit_d32(cbuf,$src$$constant);
2921   %}
2922 
2923   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2924     // Output immediate memory reference
2925     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2926     emit_d32(cbuf, 0x00);
2927   %}
2928 
2929   enc_class jump_enc(rRegL switch_val, rRegI dest) %{
2930     MacroAssembler masm(&cbuf);
2931 
2932     Register switch_reg = as_Register($switch_val$$reg);
2933     Register dest_reg   = as_Register($dest$$reg);
2934     address table_base  = masm.address_table_constant(_index2label);
2935 
2936     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2937     // to do that and the compiler is using that register as one it can allocate.
2938     // So we build it all by hand.
2939     // Address index(noreg, switch_reg, Address::times_1);
2940     // ArrayAddress dispatch(table, index);
2941 
2942     Address dispatch(dest_reg, switch_reg, Address::times_1);
2943 
2944     masm.lea(dest_reg, InternalAddress(table_base));
2945     masm.jmp(dispatch);
2946   %}
2947 
2948   enc_class jump_enc_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
2949     MacroAssembler masm(&cbuf);
2950 
2951     Register switch_reg = as_Register($switch_val$$reg);
2952     Register dest_reg   = as_Register($dest$$reg);
2953     address table_base  = masm.address_table_constant(_index2label);
2954 
2955     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2956     // to do that and the compiler is using that register as one it can allocate.
2957     // So we build it all by hand.
2958     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2959     // ArrayAddress dispatch(table, index);
2960 
2961     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant, (int)$offset$$constant);
2962 
2963     masm.lea(dest_reg, InternalAddress(table_base));
2964     masm.jmp(dispatch);
2965   %}
2966 
2967   enc_class jump_enc_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
2968     MacroAssembler masm(&cbuf);
2969 
2970     Register switch_reg = as_Register($switch_val$$reg);
2971     Register dest_reg   = as_Register($dest$$reg);
2972     address table_base  = masm.address_table_constant(_index2label);
2973 
2974     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
2975     // to do that and the compiler is using that register as one it can allocate.
2976     // So we build it all by hand.
2977     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2978     // ArrayAddress dispatch(table, index);
2979 
2980     Address dispatch(dest_reg, switch_reg, (Address::ScaleFactor)$shift$$constant);
2981     masm.lea(dest_reg, InternalAddress(table_base));
2982     masm.jmp(dispatch);
2983 
2984   %}
2985 
2986   enc_class lock_prefix()
2987   %{
2988     if (os::is_MP()) {
2989       emit_opcode(cbuf, 0xF0); // lock
2990     }
2991   %}
2992 
2993   enc_class REX_mem(memory mem)
2994   %{
2995     if ($mem$$base >= 8) {
2996       if ($mem$$index < 8) {
2997         emit_opcode(cbuf, Assembler::REX_B);
2998       } else {
2999         emit_opcode(cbuf, Assembler::REX_XB);
3000       }
3001     } else {
3002       if ($mem$$index >= 8) {
3003         emit_opcode(cbuf, Assembler::REX_X);
3004       }
3005     }
3006   %}
3007 
3008   enc_class REX_mem_wide(memory mem)
3009   %{
3010     if ($mem$$base >= 8) {
3011       if ($mem$$index < 8) {
3012         emit_opcode(cbuf, Assembler::REX_WB);
3013       } else {
3014         emit_opcode(cbuf, Assembler::REX_WXB);
3015       }
3016     } else {
3017       if ($mem$$index < 8) {
3018         emit_opcode(cbuf, Assembler::REX_W);
3019       } else {
3020         emit_opcode(cbuf, Assembler::REX_WX);
3021       }
3022     }
3023   %}
3024 
3025   // for byte regs
3026   enc_class REX_breg(rRegI reg)
3027   %{
3028     if ($reg$$reg >= 4) {
3029       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3030     }
3031   %}
3032 
3033   // for byte regs
3034   enc_class REX_reg_breg(rRegI dst, rRegI src)
3035   %{
3036     if ($dst$$reg < 8) {
3037       if ($src$$reg >= 4) {
3038         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
3039       }
3040     } else {
3041       if ($src$$reg < 8) {
3042         emit_opcode(cbuf, Assembler::REX_R);
3043       } else {
3044         emit_opcode(cbuf, Assembler::REX_RB);
3045       }
3046     }
3047   %}
3048 
3049   // for byte regs
3050   enc_class REX_breg_mem(rRegI reg, memory mem)
3051   %{
3052     if ($reg$$reg < 8) {
3053       if ($mem$$base < 8) {
3054         if ($mem$$index >= 8) {
3055           emit_opcode(cbuf, Assembler::REX_X);
3056         } else if ($reg$$reg >= 4) {
3057           emit_opcode(cbuf, Assembler::REX);
3058         }
3059       } else {
3060         if ($mem$$index < 8) {
3061           emit_opcode(cbuf, Assembler::REX_B);
3062         } else {
3063           emit_opcode(cbuf, Assembler::REX_XB);
3064         }
3065       }
3066     } else {
3067       if ($mem$$base < 8) {
3068         if ($mem$$index < 8) {
3069           emit_opcode(cbuf, Assembler::REX_R);
3070         } else {
3071           emit_opcode(cbuf, Assembler::REX_RX);
3072         }
3073       } else {
3074         if ($mem$$index < 8) {
3075           emit_opcode(cbuf, Assembler::REX_RB);
3076         } else {
3077           emit_opcode(cbuf, Assembler::REX_RXB);
3078         }
3079       }
3080     }
3081   %}
3082 
3083   enc_class REX_reg(rRegI reg)
3084   %{
3085     if ($reg$$reg >= 8) {
3086       emit_opcode(cbuf, Assembler::REX_B);
3087     }
3088   %}
3089 
3090   enc_class REX_reg_wide(rRegI reg)
3091   %{
3092     if ($reg$$reg < 8) {
3093       emit_opcode(cbuf, Assembler::REX_W);
3094     } else {
3095       emit_opcode(cbuf, Assembler::REX_WB);
3096     }
3097   %}
3098 
3099   enc_class REX_reg_reg(rRegI dst, rRegI src)
3100   %{
3101     if ($dst$$reg < 8) {
3102       if ($src$$reg >= 8) {
3103         emit_opcode(cbuf, Assembler::REX_B);
3104       }
3105     } else {
3106       if ($src$$reg < 8) {
3107         emit_opcode(cbuf, Assembler::REX_R);
3108       } else {
3109         emit_opcode(cbuf, Assembler::REX_RB);
3110       }
3111     }
3112   %}
3113 
3114   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3115   %{
3116     if ($dst$$reg < 8) {
3117       if ($src$$reg < 8) {
3118         emit_opcode(cbuf, Assembler::REX_W);
3119       } else {
3120         emit_opcode(cbuf, Assembler::REX_WB);
3121       }
3122     } else {
3123       if ($src$$reg < 8) {
3124         emit_opcode(cbuf, Assembler::REX_WR);
3125       } else {
3126         emit_opcode(cbuf, Assembler::REX_WRB);
3127       }
3128     }
3129   %}
3130 
3131   enc_class REX_reg_mem(rRegI reg, memory mem)
3132   %{
3133     if ($reg$$reg < 8) {
3134       if ($mem$$base < 8) {
3135         if ($mem$$index >= 8) {
3136           emit_opcode(cbuf, Assembler::REX_X);
3137         }
3138       } else {
3139         if ($mem$$index < 8) {
3140           emit_opcode(cbuf, Assembler::REX_B);
3141         } else {
3142           emit_opcode(cbuf, Assembler::REX_XB);
3143         }
3144       }
3145     } else {
3146       if ($mem$$base < 8) {
3147         if ($mem$$index < 8) {
3148           emit_opcode(cbuf, Assembler::REX_R);
3149         } else {
3150           emit_opcode(cbuf, Assembler::REX_RX);
3151         }
3152       } else {
3153         if ($mem$$index < 8) {
3154           emit_opcode(cbuf, Assembler::REX_RB);
3155         } else {
3156           emit_opcode(cbuf, Assembler::REX_RXB);
3157         }
3158       }
3159     }
3160   %}
3161 
3162   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3163   %{
3164     if ($reg$$reg < 8) {
3165       if ($mem$$base < 8) {
3166         if ($mem$$index < 8) {
3167           emit_opcode(cbuf, Assembler::REX_W);
3168         } else {
3169           emit_opcode(cbuf, Assembler::REX_WX);
3170         }
3171       } else {
3172         if ($mem$$index < 8) {
3173           emit_opcode(cbuf, Assembler::REX_WB);
3174         } else {
3175           emit_opcode(cbuf, Assembler::REX_WXB);
3176         }
3177       }
3178     } else {
3179       if ($mem$$base < 8) {
3180         if ($mem$$index < 8) {
3181           emit_opcode(cbuf, Assembler::REX_WR);
3182         } else {
3183           emit_opcode(cbuf, Assembler::REX_WRX);
3184         }
3185       } else {
3186         if ($mem$$index < 8) {
3187           emit_opcode(cbuf, Assembler::REX_WRB);
3188         } else {
3189           emit_opcode(cbuf, Assembler::REX_WRXB);
3190         }
3191       }
3192     }
3193   %}
3194 
3195   enc_class reg_mem(rRegI ereg, memory mem)
3196   %{
3197     // High registers handle in encode_RegMem
3198     int reg = $ereg$$reg;
3199     int base = $mem$$base;
3200     int index = $mem$$index;
3201     int scale = $mem$$scale;
3202     int disp = $mem$$disp;
3203     bool disp_is_oop = $mem->disp_is_oop();
3204 
3205     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3206   %}
3207 
3208   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3209   %{
3210     int rm_byte_opcode = $rm_opcode$$constant;
3211 
3212     // High registers handle in encode_RegMem
3213     int base = $mem$$base;
3214     int index = $mem$$index;
3215     int scale = $mem$$scale;
3216     int displace = $mem$$disp;
3217 
3218     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3219                                             // working with static
3220                                             // globals
3221     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3222                   disp_is_oop);
3223   %}
3224 
3225   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3226   %{
3227     int reg_encoding = $dst$$reg;
3228     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3229     int index        = 0x04;            // 0x04 indicates no index
3230     int scale        = 0x00;            // 0x00 indicates no scale
3231     int displace     = $src1$$constant; // 0x00 indicates no displacement
3232     bool disp_is_oop = false;
3233     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3234                   disp_is_oop);
3235   %}
3236 
3237   enc_class neg_reg(rRegI dst)
3238   %{
3239     int dstenc = $dst$$reg;
3240     if (dstenc >= 8) {
3241       emit_opcode(cbuf, Assembler::REX_B);
3242       dstenc -= 8;
3243     }
3244     // NEG $dst
3245     emit_opcode(cbuf, 0xF7);
3246     emit_rm(cbuf, 0x3, 0x03, dstenc);
3247   %}
3248 
3249   enc_class neg_reg_wide(rRegI dst)
3250   %{
3251     int dstenc = $dst$$reg;
3252     if (dstenc < 8) {
3253       emit_opcode(cbuf, Assembler::REX_W);
3254     } else {
3255       emit_opcode(cbuf, Assembler::REX_WB);
3256       dstenc -= 8;
3257     }
3258     // NEG $dst
3259     emit_opcode(cbuf, 0xF7);
3260     emit_rm(cbuf, 0x3, 0x03, dstenc);
3261   %}
3262 
3263   enc_class setLT_reg(rRegI dst)
3264   %{
3265     int dstenc = $dst$$reg;
3266     if (dstenc >= 8) {
3267       emit_opcode(cbuf, Assembler::REX_B);
3268       dstenc -= 8;
3269     } else if (dstenc >= 4) {
3270       emit_opcode(cbuf, Assembler::REX);
3271     }
3272     // SETLT $dst
3273     emit_opcode(cbuf, 0x0F);
3274     emit_opcode(cbuf, 0x9C);
3275     emit_rm(cbuf, 0x3, 0x0, dstenc);
3276   %}
3277 
3278   enc_class setNZ_reg(rRegI dst)
3279   %{
3280     int dstenc = $dst$$reg;
3281     if (dstenc >= 8) {
3282       emit_opcode(cbuf, Assembler::REX_B);
3283       dstenc -= 8;
3284     } else if (dstenc >= 4) {
3285       emit_opcode(cbuf, Assembler::REX);
3286     }
3287     // SETNZ $dst
3288     emit_opcode(cbuf, 0x0F);
3289     emit_opcode(cbuf, 0x95);
3290     emit_rm(cbuf, 0x3, 0x0, dstenc);
3291   %}
3292 
3293   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3294                        rcx_RegI tmp)
3295   %{
3296     // cadd_cmpLT
3297 
3298     int tmpReg = $tmp$$reg;
3299 
3300     int penc = $p$$reg;
3301     int qenc = $q$$reg;
3302     int yenc = $y$$reg;
3303 
3304     // subl $p,$q
3305     if (penc < 8) {
3306       if (qenc >= 8) {
3307         emit_opcode(cbuf, Assembler::REX_B);
3308       }
3309     } else {
3310       if (qenc < 8) {
3311         emit_opcode(cbuf, Assembler::REX_R);
3312       } else {
3313         emit_opcode(cbuf, Assembler::REX_RB);
3314       }
3315     }
3316     emit_opcode(cbuf, 0x2B);
3317     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3318 
3319     // sbbl $tmp, $tmp
3320     emit_opcode(cbuf, 0x1B);
3321     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3322 
3323     // andl $tmp, $y
3324     if (yenc >= 8) {
3325       emit_opcode(cbuf, Assembler::REX_B);
3326     }
3327     emit_opcode(cbuf, 0x23);
3328     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3329 
3330     // addl $p,$tmp
3331     if (penc >= 8) {
3332         emit_opcode(cbuf, Assembler::REX_R);
3333     }
3334     emit_opcode(cbuf, 0x03);
3335     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3336   %}
3337 
3338   // Compare the lonogs and set -1, 0, or 1 into dst
3339   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3340   %{
3341     int src1enc = $src1$$reg;
3342     int src2enc = $src2$$reg;
3343     int dstenc = $dst$$reg;
3344 
3345     // cmpq $src1, $src2
3346     if (src1enc < 8) {
3347       if (src2enc < 8) {
3348         emit_opcode(cbuf, Assembler::REX_W);
3349       } else {
3350         emit_opcode(cbuf, Assembler::REX_WB);
3351       }
3352     } else {
3353       if (src2enc < 8) {
3354         emit_opcode(cbuf, Assembler::REX_WR);
3355       } else {
3356         emit_opcode(cbuf, Assembler::REX_WRB);
3357       }
3358     }
3359     emit_opcode(cbuf, 0x3B);
3360     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3361 
3362     // movl $dst, -1
3363     if (dstenc >= 8) {
3364       emit_opcode(cbuf, Assembler::REX_B);
3365     }
3366     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3367     emit_d32(cbuf, -1);
3368 
3369     // jl,s done
3370     emit_opcode(cbuf, 0x7C);
3371     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3372 
3373     // setne $dst
3374     if (dstenc >= 4) {
3375       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3376     }
3377     emit_opcode(cbuf, 0x0F);
3378     emit_opcode(cbuf, 0x95);
3379     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3380 
3381     // movzbl $dst, $dst
3382     if (dstenc >= 4) {
3383       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3384     }
3385     emit_opcode(cbuf, 0x0F);
3386     emit_opcode(cbuf, 0xB6);
3387     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3388   %}
3389 
3390   enc_class Push_ResultXD(regD dst) %{
3391     int dstenc = $dst$$reg;
3392 
3393     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3394 
3395     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3396     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3397     if (dstenc >= 8) {
3398       emit_opcode(cbuf, Assembler::REX_R);
3399     }
3400     emit_opcode  (cbuf, 0x0F );
3401     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3402     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3403 
3404     // add rsp,8
3405     emit_opcode(cbuf, Assembler::REX_W);
3406     emit_opcode(cbuf,0x83);
3407     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3408     emit_d8(cbuf,0x08);
3409   %}
3410 
3411   enc_class Push_SrcXD(regD src) %{
3412     int srcenc = $src$$reg;
3413 
3414     // subq rsp,#8
3415     emit_opcode(cbuf, Assembler::REX_W);
3416     emit_opcode(cbuf, 0x83);
3417     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3418     emit_d8(cbuf, 0x8);
3419 
3420     // movsd [rsp],src
3421     emit_opcode(cbuf, 0xF2);
3422     if (srcenc >= 8) {
3423       emit_opcode(cbuf, Assembler::REX_R);
3424     }
3425     emit_opcode(cbuf, 0x0F);
3426     emit_opcode(cbuf, 0x11);
3427     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3428 
3429     // fldd [rsp]
3430     emit_opcode(cbuf, 0x66);
3431     emit_opcode(cbuf, 0xDD);
3432     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3433   %}
3434 
3435 
3436   enc_class movq_ld(regD dst, memory mem) %{
3437     MacroAssembler _masm(&cbuf);
3438     __ movq($dst$$XMMRegister, $mem$$Address);
3439   %}
3440 
3441   enc_class movq_st(memory mem, regD src) %{
3442     MacroAssembler _masm(&cbuf);
3443     __ movq($mem$$Address, $src$$XMMRegister);
3444   %}
3445 
3446   enc_class pshufd_8x8(regF dst, regF src) %{
3447     MacroAssembler _masm(&cbuf);
3448 
3449     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3450     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3451     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3452   %}
3453 
3454   enc_class pshufd_4x16(regF dst, regF src) %{
3455     MacroAssembler _masm(&cbuf);
3456 
3457     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3458   %}
3459 
3460   enc_class pshufd(regD dst, regD src, int mode) %{
3461     MacroAssembler _masm(&cbuf);
3462 
3463     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3464   %}
3465 
3466   enc_class pxor(regD dst, regD src) %{
3467     MacroAssembler _masm(&cbuf);
3468 
3469     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3470   %}
3471 
3472   enc_class mov_i2x(regD dst, rRegI src) %{
3473     MacroAssembler _masm(&cbuf);
3474 
3475     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3476   %}
3477 
3478   // obj: object to lock
3479   // box: box address (header location) -- killed
3480   // tmp: rax -- killed
3481   // scr: rbx -- killed
3482   //
3483   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3484   // from i486.ad.  See that file for comments.
3485   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3486   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3487 
3488 
3489   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3490   %{
3491     Register objReg = as_Register((int)$obj$$reg);
3492     Register boxReg = as_Register((int)$box$$reg);
3493     Register tmpReg = as_Register($tmp$$reg);
3494     Register scrReg = as_Register($scr$$reg);
3495     MacroAssembler masm(&cbuf);
3496 
3497     // Verify uniqueness of register assignments -- necessary but not sufficient
3498     assert (objReg != boxReg && objReg != tmpReg &&
3499             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3500 
3501     if (_counters != NULL) {
3502       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3503     }
3504     if (EmitSync & 1) {
3505         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3506         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3507         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3508     } else
3509     if (EmitSync & 2) {
3510         Label DONE_LABEL;
3511         if (UseBiasedLocking) {
3512            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3513           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3514         }
3515         // QQQ was movl...
3516         masm.movptr(tmpReg, 0x1);
3517         masm.orptr(tmpReg, Address(objReg, 0));
3518         masm.movptr(Address(boxReg, 0), tmpReg);
3519         if (os::is_MP()) {
3520           masm.lock();
3521         }
3522         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3523         masm.jcc(Assembler::equal, DONE_LABEL);
3524 
3525         // Recursive locking
3526         masm.subptr(tmpReg, rsp);
3527         masm.andptr(tmpReg, 7 - os::vm_page_size());
3528         masm.movptr(Address(boxReg, 0), tmpReg);
3529 
3530         masm.bind(DONE_LABEL);
3531         masm.nop(); // avoid branch to branch
3532     } else {
3533         Label DONE_LABEL, IsInflated, Egress;
3534 
3535         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3536         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3537         masm.jcc   (Assembler::notZero, IsInflated) ; 
3538          
3539         // it's stack-locked, biased or neutral
3540         // TODO: optimize markword triage order to reduce the number of
3541         // conditional branches in the most common cases.
3542         // Beware -- there's a subtle invariant that fetch of the markword
3543         // at [FETCH], below, will never observe a biased encoding (*101b).
3544         // If this invariant is not held we'll suffer exclusion (safety) failure.
3545 
3546         if (UseBiasedLocking && !UseOptoBiasInlining) {
3547           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3548           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3549         }
3550 
3551         // was q will it destroy high?
3552         masm.orl   (tmpReg, 1) ; 
3553         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3554         if (os::is_MP()) { masm.lock(); } 
3555         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3556         if (_counters != NULL) {
3557            masm.cond_inc32(Assembler::equal,
3558                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3559         }
3560         masm.jcc   (Assembler::equal, DONE_LABEL);
3561 
3562         // Recursive locking
3563         masm.subptr(tmpReg, rsp);
3564         masm.andptr(tmpReg, 7 - os::vm_page_size());
3565         masm.movptr(Address(boxReg, 0), tmpReg);
3566         if (_counters != NULL) {
3567            masm.cond_inc32(Assembler::equal,
3568                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3569         }
3570         masm.jmp   (DONE_LABEL) ;
3571 
3572         masm.bind  (IsInflated) ;
3573         // It's inflated
3574 
3575         // TODO: someday avoid the ST-before-CAS penalty by
3576         // relocating (deferring) the following ST.
3577         // We should also think about trying a CAS without having
3578         // fetched _owner.  If the CAS is successful we may
3579         // avoid an RTO->RTS upgrade on the $line.
3580         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3581         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3582 
3583         masm.mov    (boxReg, tmpReg) ; 
3584         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3585         masm.testptr(tmpReg, tmpReg) ;   
3586         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3587 
3588         // It's inflated and appears unlocked
3589         if (os::is_MP()) { masm.lock(); } 
3590         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3591         // Intentional fall-through into DONE_LABEL ...
3592 
3593         masm.bind  (DONE_LABEL) ;
3594         masm.nop   () ;                 // avoid jmp to jmp
3595     }
3596   %}
3597 
3598   // obj: object to unlock
3599   // box: box address (displaced header location), killed
3600   // RBX: killed tmp; cannot be obj nor box
3601   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3602   %{
3603 
3604     Register objReg = as_Register($obj$$reg);
3605     Register boxReg = as_Register($box$$reg);
3606     Register tmpReg = as_Register($tmp$$reg);
3607     MacroAssembler masm(&cbuf);
3608 
3609     if (EmitSync & 4) { 
3610        masm.cmpptr(rsp, 0) ; 
3611     } else
3612     if (EmitSync & 8) {
3613        Label DONE_LABEL;
3614        if (UseBiasedLocking) {
3615          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3616        }
3617 
3618        // Check whether the displaced header is 0
3619        //(=> recursive unlock)
3620        masm.movptr(tmpReg, Address(boxReg, 0));
3621        masm.testptr(tmpReg, tmpReg);
3622        masm.jcc(Assembler::zero, DONE_LABEL);
3623 
3624        // If not recursive lock, reset the header to displaced header
3625        if (os::is_MP()) {
3626          masm.lock();
3627        }
3628        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3629        masm.bind(DONE_LABEL);
3630        masm.nop(); // avoid branch to branch
3631     } else {
3632        Label DONE_LABEL, Stacked, CheckSucc ;
3633 
3634        if (UseBiasedLocking && !UseOptoBiasInlining) {
3635          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3636        }
3637         
3638        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3639        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3640        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3641        masm.testl (tmpReg, 0x02) ; 
3642        masm.jcc   (Assembler::zero, Stacked) ; 
3643         
3644        // It's inflated
3645        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3646        masm.xorptr(boxReg, r15_thread) ; 
3647        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3648        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3649        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3650        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3651        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3652        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3653        masm.jmp   (DONE_LABEL) ; 
3654         
3655        if ((EmitSync & 65536) == 0) { 
3656          Label LSuccess, LGoSlowPath ;
3657          masm.bind  (CheckSucc) ;
3658          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3659          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3660 
3661          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3662          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3663          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3664          // are all faster when the write buffer is populated.
3665          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3666          if (os::is_MP()) {
3667             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3668          }
3669          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3670          masm.jcc   (Assembler::notZero, LSuccess) ;
3671 
3672          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3673          if (os::is_MP()) { masm.lock(); }
3674          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3675          masm.jcc   (Assembler::notEqual, LSuccess) ;
3676          // Intentional fall-through into slow-path
3677 
3678          masm.bind  (LGoSlowPath) ;
3679          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3680          masm.jmp   (DONE_LABEL) ;
3681 
3682          masm.bind  (LSuccess) ;
3683          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3684          masm.jmp   (DONE_LABEL) ;
3685        }
3686 
3687        masm.bind  (Stacked) ; 
3688        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3689        if (os::is_MP()) { masm.lock(); } 
3690        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3691 
3692        if (EmitSync & 65536) {
3693           masm.bind (CheckSucc) ;
3694        }
3695        masm.bind(DONE_LABEL);
3696        if (EmitSync & 32768) {
3697           masm.nop();                      // avoid branch to branch
3698        }
3699     }
3700   %}
3701 
3702 
3703   enc_class enc_rethrow()
3704   %{
3705     cbuf.set_insts_mark();
3706     emit_opcode(cbuf, 0xE9); // jmp entry
3707     emit_d32_reloc(cbuf,
3708                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3709                    runtime_call_Relocation::spec(),
3710                    RELOC_DISP32);
3711   %}
3712 
3713   enc_class absF_encoding(regF dst)
3714   %{
3715     int dstenc = $dst$$reg;
3716     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3717 
3718     cbuf.set_insts_mark();
3719     if (dstenc >= 8) {
3720       emit_opcode(cbuf, Assembler::REX_R);
3721       dstenc -= 8;
3722     }
3723     // XXX reg_mem doesn't support RIP-relative addressing yet
3724     emit_opcode(cbuf, 0x0F);
3725     emit_opcode(cbuf, 0x54);
3726     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3727     emit_d32_reloc(cbuf, signmask_address);
3728   %}
3729 
3730   enc_class absD_encoding(regD dst)
3731   %{
3732     int dstenc = $dst$$reg;
3733     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3734 
3735     cbuf.set_insts_mark();
3736     emit_opcode(cbuf, 0x66);
3737     if (dstenc >= 8) {
3738       emit_opcode(cbuf, Assembler::REX_R);
3739       dstenc -= 8;
3740     }
3741     // XXX reg_mem doesn't support RIP-relative addressing yet
3742     emit_opcode(cbuf, 0x0F);
3743     emit_opcode(cbuf, 0x54);
3744     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3745     emit_d32_reloc(cbuf, signmask_address);
3746   %}
3747 
3748   enc_class negF_encoding(regF dst)
3749   %{
3750     int dstenc = $dst$$reg;
3751     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3752 
3753     cbuf.set_insts_mark();
3754     if (dstenc >= 8) {
3755       emit_opcode(cbuf, Assembler::REX_R);
3756       dstenc -= 8;
3757     }
3758     // XXX reg_mem doesn't support RIP-relative addressing yet
3759     emit_opcode(cbuf, 0x0F);
3760     emit_opcode(cbuf, 0x57);
3761     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3762     emit_d32_reloc(cbuf, signflip_address);
3763   %}
3764 
3765   enc_class negD_encoding(regD dst)
3766   %{
3767     int dstenc = $dst$$reg;
3768     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3769 
3770     cbuf.set_insts_mark();
3771     emit_opcode(cbuf, 0x66);
3772     if (dstenc >= 8) {
3773       emit_opcode(cbuf, Assembler::REX_R);
3774       dstenc -= 8;
3775     }
3776     // XXX reg_mem doesn't support RIP-relative addressing yet
3777     emit_opcode(cbuf, 0x0F);
3778     emit_opcode(cbuf, 0x57);
3779     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3780     emit_d32_reloc(cbuf, signflip_address);
3781   %}
3782 
3783   enc_class f2i_fixup(rRegI dst, regF src)
3784   %{
3785     int dstenc = $dst$$reg;
3786     int srcenc = $src$$reg;
3787 
3788     // cmpl $dst, #0x80000000
3789     if (dstenc >= 8) {
3790       emit_opcode(cbuf, Assembler::REX_B);
3791     }
3792     emit_opcode(cbuf, 0x81);
3793     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3794     emit_d32(cbuf, 0x80000000);
3795 
3796     // jne,s done
3797     emit_opcode(cbuf, 0x75);
3798     if (srcenc < 8 && dstenc < 8) {
3799       emit_d8(cbuf, 0xF);
3800     } else if (srcenc >= 8 && dstenc >= 8) {
3801       emit_d8(cbuf, 0x11);
3802     } else {
3803       emit_d8(cbuf, 0x10);
3804     }
3805 
3806     // subq rsp, #8
3807     emit_opcode(cbuf, Assembler::REX_W);
3808     emit_opcode(cbuf, 0x83);
3809     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3810     emit_d8(cbuf, 8);
3811 
3812     // movss [rsp], $src
3813     emit_opcode(cbuf, 0xF3);
3814     if (srcenc >= 8) {
3815       emit_opcode(cbuf, Assembler::REX_R);
3816     }
3817     emit_opcode(cbuf, 0x0F);
3818     emit_opcode(cbuf, 0x11);
3819     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3820 
3821     // call f2i_fixup
3822     cbuf.set_insts_mark();
3823     emit_opcode(cbuf, 0xE8);
3824     emit_d32_reloc(cbuf,
3825                    (int)
3826                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3827                    runtime_call_Relocation::spec(),
3828                    RELOC_DISP32);
3829 
3830     // popq $dst
3831     if (dstenc >= 8) {
3832       emit_opcode(cbuf, Assembler::REX_B);
3833     }
3834     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3835 
3836     // done:
3837   %}
3838 
3839   enc_class f2l_fixup(rRegL dst, regF src)
3840   %{
3841     int dstenc = $dst$$reg;
3842     int srcenc = $src$$reg;
3843     address const_address = (address) StubRoutines::x86::double_sign_flip();
3844 
3845     // cmpq $dst, [0x8000000000000000]
3846     cbuf.set_insts_mark();
3847     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3848     emit_opcode(cbuf, 0x39);
3849     // XXX reg_mem doesn't support RIP-relative addressing yet
3850     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3851     emit_d32_reloc(cbuf, const_address);
3852 
3853 
3854     // jne,s done
3855     emit_opcode(cbuf, 0x75);
3856     if (srcenc < 8 && dstenc < 8) {
3857       emit_d8(cbuf, 0xF);
3858     } else if (srcenc >= 8 && dstenc >= 8) {
3859       emit_d8(cbuf, 0x11);
3860     } else {
3861       emit_d8(cbuf, 0x10);
3862     }
3863 
3864     // subq rsp, #8
3865     emit_opcode(cbuf, Assembler::REX_W);
3866     emit_opcode(cbuf, 0x83);
3867     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3868     emit_d8(cbuf, 8);
3869 
3870     // movss [rsp], $src
3871     emit_opcode(cbuf, 0xF3);
3872     if (srcenc >= 8) {
3873       emit_opcode(cbuf, Assembler::REX_R);
3874     }
3875     emit_opcode(cbuf, 0x0F);
3876     emit_opcode(cbuf, 0x11);
3877     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3878 
3879     // call f2l_fixup
3880     cbuf.set_insts_mark();
3881     emit_opcode(cbuf, 0xE8);
3882     emit_d32_reloc(cbuf,
3883                    (int)
3884                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3885                    runtime_call_Relocation::spec(),
3886                    RELOC_DISP32);
3887 
3888     // popq $dst
3889     if (dstenc >= 8) {
3890       emit_opcode(cbuf, Assembler::REX_B);
3891     }
3892     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3893 
3894     // done:
3895   %}
3896 
3897   enc_class d2i_fixup(rRegI dst, regD src)
3898   %{
3899     int dstenc = $dst$$reg;
3900     int srcenc = $src$$reg;
3901 
3902     // cmpl $dst, #0x80000000
3903     if (dstenc >= 8) {
3904       emit_opcode(cbuf, Assembler::REX_B);
3905     }
3906     emit_opcode(cbuf, 0x81);
3907     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3908     emit_d32(cbuf, 0x80000000);
3909 
3910     // jne,s done
3911     emit_opcode(cbuf, 0x75);
3912     if (srcenc < 8 && dstenc < 8) {
3913       emit_d8(cbuf, 0xF);
3914     } else if (srcenc >= 8 && dstenc >= 8) {
3915       emit_d8(cbuf, 0x11);
3916     } else {
3917       emit_d8(cbuf, 0x10);
3918     }
3919 
3920     // subq rsp, #8
3921     emit_opcode(cbuf, Assembler::REX_W);
3922     emit_opcode(cbuf, 0x83);
3923     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3924     emit_d8(cbuf, 8);
3925 
3926     // movsd [rsp], $src
3927     emit_opcode(cbuf, 0xF2);
3928     if (srcenc >= 8) {
3929       emit_opcode(cbuf, Assembler::REX_R);
3930     }
3931     emit_opcode(cbuf, 0x0F);
3932     emit_opcode(cbuf, 0x11);
3933     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3934 
3935     // call d2i_fixup
3936     cbuf.set_insts_mark();
3937     emit_opcode(cbuf, 0xE8);
3938     emit_d32_reloc(cbuf,
3939                    (int)
3940                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3941                    runtime_call_Relocation::spec(),
3942                    RELOC_DISP32);
3943 
3944     // popq $dst
3945     if (dstenc >= 8) {
3946       emit_opcode(cbuf, Assembler::REX_B);
3947     }
3948     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3949 
3950     // done:
3951   %}
3952 
3953   enc_class d2l_fixup(rRegL dst, regD src)
3954   %{
3955     int dstenc = $dst$$reg;
3956     int srcenc = $src$$reg;
3957     address const_address = (address) StubRoutines::x86::double_sign_flip();
3958 
3959     // cmpq $dst, [0x8000000000000000]
3960     cbuf.set_insts_mark();
3961     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3962     emit_opcode(cbuf, 0x39);
3963     // XXX reg_mem doesn't support RIP-relative addressing yet
3964     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3965     emit_d32_reloc(cbuf, const_address);
3966 
3967 
3968     // jne,s done
3969     emit_opcode(cbuf, 0x75);
3970     if (srcenc < 8 && dstenc < 8) {
3971       emit_d8(cbuf, 0xF);
3972     } else if (srcenc >= 8 && dstenc >= 8) {
3973       emit_d8(cbuf, 0x11);
3974     } else {
3975       emit_d8(cbuf, 0x10);
3976     }
3977 
3978     // subq rsp, #8
3979     emit_opcode(cbuf, Assembler::REX_W);
3980     emit_opcode(cbuf, 0x83);
3981     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3982     emit_d8(cbuf, 8);
3983 
3984     // movsd [rsp], $src
3985     emit_opcode(cbuf, 0xF2);
3986     if (srcenc >= 8) {
3987       emit_opcode(cbuf, Assembler::REX_R);
3988     }
3989     emit_opcode(cbuf, 0x0F);
3990     emit_opcode(cbuf, 0x11);
3991     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3992 
3993     // call d2l_fixup
3994     cbuf.set_insts_mark();
3995     emit_opcode(cbuf, 0xE8);
3996     emit_d32_reloc(cbuf,
3997                    (int)
3998                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3999                    runtime_call_Relocation::spec(),
4000                    RELOC_DISP32);
4001 
4002     // popq $dst
4003     if (dstenc >= 8) {
4004       emit_opcode(cbuf, Assembler::REX_B);
4005     }
4006     emit_opcode(cbuf, 0x58 | (dstenc & 7));
4007 
4008     // done:
4009   %}
4010 
4011   // Safepoint Poll.  This polls the safepoint page, and causes an
4012   // exception if it is not readable. Unfortunately, it kills
4013   // RFLAGS in the process.
4014   enc_class enc_safepoint_poll
4015   %{
4016     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
4017     // XXX reg_mem doesn't support RIP-relative addressing yet
4018     cbuf.set_insts_mark();
4019     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); // XXX
4020     emit_opcode(cbuf, 0x85); // testl
4021     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
4022     // cbuf.insts_mark() is beginning of instruction
4023     emit_d32_reloc(cbuf, os::get_polling_page());
4024 //                    relocInfo::poll_type,
4025   %}
4026 %}
4027 
4028 
4029 
4030 //----------FRAME--------------------------------------------------------------
4031 // Definition of frame structure and management information.
4032 //
4033 //  S T A C K   L A Y O U T    Allocators stack-slot number
4034 //                             |   (to get allocators register number
4035 //  G  Owned by    |        |  v    add OptoReg::stack0())
4036 //  r   CALLER     |        |
4037 //  o     |        +--------+      pad to even-align allocators stack-slot
4038 //  w     V        |  pad0  |        numbers; owned by CALLER
4039 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4040 //  h     ^        |   in   |  5
4041 //        |        |  args  |  4   Holes in incoming args owned by SELF
4042 //  |     |        |        |  3
4043 //  |     |        +--------+
4044 //  V     |        | old out|      Empty on Intel, window on Sparc
4045 //        |    old |preserve|      Must be even aligned.
4046 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4047 //        |        |   in   |  3   area for Intel ret address
4048 //     Owned by    |preserve|      Empty on Sparc.
4049 //       SELF      +--------+
4050 //        |        |  pad2  |  2   pad to align old SP
4051 //        |        +--------+  1
4052 //        |        | locks  |  0
4053 //        |        +--------+----> OptoReg::stack0(), even aligned
4054 //        |        |  pad1  | 11   pad to align new SP
4055 //        |        +--------+
4056 //        |        |        | 10
4057 //        |        | spills |  9   spills
4058 //        V        |        |  8   (pad0 slot for callee)
4059 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4060 //        ^        |  out   |  7
4061 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4062 //     Owned by    +--------+
4063 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4064 //        |    new |preserve|      Must be even-aligned.
4065 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4066 //        |        |        |
4067 //
4068 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4069 //         known from SELF's arguments and the Java calling convention.
4070 //         Region 6-7 is determined per call site.
4071 // Note 2: If the calling convention leaves holes in the incoming argument
4072 //         area, those holes are owned by SELF.  Holes in the outgoing area
4073 //         are owned by the CALLEE.  Holes should not be nessecary in the
4074 //         incoming area, as the Java calling convention is completely under
4075 //         the control of the AD file.  Doubles can be sorted and packed to
4076 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4077 //         varargs C calling conventions.
4078 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4079 //         even aligned with pad0 as needed.
4080 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4081 //         region 6-11 is even aligned; it may be padded out more so that
4082 //         the region from SP to FP meets the minimum stack alignment.
4083 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4084 //         alignment.  Region 11, pad1, may be dynamically extended so that
4085 //         SP meets the minimum alignment.
4086 
4087 frame
4088 %{
4089   // What direction does stack grow in (assumed to be same for C & Java)
4090   stack_direction(TOWARDS_LOW);
4091 
4092   // These three registers define part of the calling convention
4093   // between compiled code and the interpreter.
4094   inline_cache_reg(RAX);                // Inline Cache Register
4095   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4096                                         // calling interpreter
4097 
4098   // Optional: name the operand used by cisc-spilling to access
4099   // [stack_pointer + offset]
4100   cisc_spilling_operand_name(indOffset32);
4101 
4102   // Number of stack slots consumed by locking an object
4103   sync_stack_slots(2);
4104 
4105   // Compiled code's Frame Pointer
4106   frame_pointer(RSP);
4107 
4108   // Interpreter stores its frame pointer in a register which is
4109   // stored to the stack by I2CAdaptors.
4110   // I2CAdaptors convert from interpreted java to compiled java.
4111   interpreter_frame_pointer(RBP);
4112 
4113   // Stack alignment requirement
4114   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4115 
4116   // Number of stack slots between incoming argument block and the start of
4117   // a new frame.  The PROLOG must add this many slots to the stack.  The
4118   // EPILOG must remove this many slots.  amd64 needs two slots for
4119   // return address.
4120   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4121 
4122   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4123   // for calls to C.  Supports the var-args backing area for register parms.
4124   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4125 
4126   // The after-PROLOG location of the return address.  Location of
4127   // return address specifies a type (REG or STACK) and a number
4128   // representing the register number (i.e. - use a register name) or
4129   // stack slot.
4130   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4131   // Otherwise, it is above the locks and verification slot and alignment word
4132   return_addr(STACK - 2 +
4133               round_to(2 + 2 * VerifyStackAtCalls +
4134                        Compile::current()->fixed_slots(),
4135                        WordsPerLong * 2));
4136 
4137   // Body of function which returns an integer array locating
4138   // arguments either in registers or in stack slots.  Passed an array
4139   // of ideal registers called "sig" and a "length" count.  Stack-slot
4140   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4141   // arguments for a CALLEE.  Incoming stack arguments are
4142   // automatically biased by the preserve_stack_slots field above.
4143 
4144   calling_convention
4145   %{
4146     // No difference between ingoing/outgoing just pass false
4147     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4148   %}
4149 
4150   c_calling_convention
4151   %{
4152     // This is obviously always outgoing
4153     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4154   %}
4155 
4156   // Location of compiled Java return values.  Same as C for now.
4157   return_value
4158   %{
4159     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4160            "only return normal values");
4161 
4162     static const int lo[Op_RegL + 1] = {
4163       0,
4164       0,
4165       RAX_num,  // Op_RegN
4166       RAX_num,  // Op_RegI
4167       RAX_num,  // Op_RegP
4168       XMM0_num, // Op_RegF
4169       XMM0_num, // Op_RegD
4170       RAX_num   // Op_RegL
4171     };
4172     static const int hi[Op_RegL + 1] = {
4173       0,
4174       0,
4175       OptoReg::Bad, // Op_RegN
4176       OptoReg::Bad, // Op_RegI
4177       RAX_H_num,    // Op_RegP
4178       OptoReg::Bad, // Op_RegF
4179       XMM0_H_num,   // Op_RegD
4180       RAX_H_num     // Op_RegL
4181     };
4182     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4183     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4184   %}
4185 %}
4186 
4187 //----------ATTRIBUTES---------------------------------------------------------
4188 //----------Operand Attributes-------------------------------------------------
4189 op_attrib op_cost(0);        // Required cost attribute
4190 
4191 //----------Instruction Attributes---------------------------------------------
4192 ins_attrib ins_cost(100);       // Required cost attribute
4193 ins_attrib ins_size(8);         // Required size attribute (in bits)
4194 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4195 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4196                                 // a non-matching short branch variant
4197                                 // of some long branch?
4198 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4199                                 // be a power of 2) specifies the
4200                                 // alignment that some part of the
4201                                 // instruction (not necessarily the
4202                                 // start) requires.  If > 1, a
4203                                 // compute_padding() function must be
4204                                 // provided for the instruction
4205 
4206 //----------OPERANDS-----------------------------------------------------------
4207 // Operand definitions must precede instruction definitions for correct parsing
4208 // in the ADLC because operands constitute user defined types which are used in
4209 // instruction definitions.
4210 
4211 //----------Simple Operands----------------------------------------------------
4212 // Immediate Operands
4213 // Integer Immediate
4214 operand immI()
4215 %{
4216   match(ConI);
4217 
4218   op_cost(10);
4219   format %{ %}
4220   interface(CONST_INTER);
4221 %}
4222 
4223 // Constant for test vs zero
4224 operand immI0()
4225 %{
4226   predicate(n->get_int() == 0);
4227   match(ConI);
4228 
4229   op_cost(0);
4230   format %{ %}
4231   interface(CONST_INTER);
4232 %}
4233 
4234 // Constant for increment
4235 operand immI1()
4236 %{
4237   predicate(n->get_int() == 1);
4238   match(ConI);
4239 
4240   op_cost(0);
4241   format %{ %}
4242   interface(CONST_INTER);
4243 %}
4244 
4245 // Constant for decrement
4246 operand immI_M1()
4247 %{
4248   predicate(n->get_int() == -1);
4249   match(ConI);
4250 
4251   op_cost(0);
4252   format %{ %}
4253   interface(CONST_INTER);
4254 %}
4255 
4256 // Valid scale values for addressing modes
4257 operand immI2()
4258 %{
4259   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4260   match(ConI);
4261 
4262   format %{ %}
4263   interface(CONST_INTER);
4264 %}
4265 
4266 operand immI8()
4267 %{
4268   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4269   match(ConI);
4270 
4271   op_cost(5);
4272   format %{ %}
4273   interface(CONST_INTER);
4274 %}
4275 
4276 operand immI16()
4277 %{
4278   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4279   match(ConI);
4280 
4281   op_cost(10);
4282   format %{ %}
4283   interface(CONST_INTER);
4284 %}
4285 
4286 // Constant for long shifts
4287 operand immI_32()
4288 %{
4289   predicate( n->get_int() == 32 );
4290   match(ConI);
4291 
4292   op_cost(0);
4293   format %{ %}
4294   interface(CONST_INTER);
4295 %}
4296 
4297 // Constant for long shifts
4298 operand immI_64()
4299 %{
4300   predicate( n->get_int() == 64 );
4301   match(ConI);
4302 
4303   op_cost(0);
4304   format %{ %}
4305   interface(CONST_INTER);
4306 %}
4307 
4308 // Pointer Immediate
4309 operand immP()
4310 %{
4311   match(ConP);
4312 
4313   op_cost(10);
4314   format %{ %}
4315   interface(CONST_INTER);
4316 %}
4317 
4318 // NULL Pointer Immediate
4319 operand immP0()
4320 %{
4321   predicate(n->get_ptr() == 0);
4322   match(ConP);
4323 
4324   op_cost(5);
4325   format %{ %}
4326   interface(CONST_INTER);
4327 %}
4328 
4329 // Pointer Immediate
4330 operand immN() %{
4331   match(ConN);
4332 
4333   op_cost(10);
4334   format %{ %}
4335   interface(CONST_INTER);
4336 %}
4337 
4338 // NULL Pointer Immediate
4339 operand immN0() %{
4340   predicate(n->get_narrowcon() == 0);
4341   match(ConN);
4342 
4343   op_cost(5);
4344   format %{ %}
4345   interface(CONST_INTER);
4346 %}
4347 
4348 operand immP31()
4349 %{
4350   predicate(!n->as_Type()->type()->isa_oopptr()
4351             && (n->get_ptr() >> 31) == 0);
4352   match(ConP);
4353 
4354   op_cost(5);
4355   format %{ %}
4356   interface(CONST_INTER);
4357 %}
4358 
4359 
4360 // Long Immediate
4361 operand immL()
4362 %{
4363   match(ConL);
4364 
4365   op_cost(20);
4366   format %{ %}
4367   interface(CONST_INTER);
4368 %}
4369 
4370 // Long Immediate 8-bit
4371 operand immL8()
4372 %{
4373   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4374   match(ConL);
4375 
4376   op_cost(5);
4377   format %{ %}
4378   interface(CONST_INTER);
4379 %}
4380 
4381 // Long Immediate 32-bit unsigned
4382 operand immUL32()
4383 %{
4384   predicate(n->get_long() == (unsigned int) (n->get_long()));
4385   match(ConL);
4386 
4387   op_cost(10);
4388   format %{ %}
4389   interface(CONST_INTER);
4390 %}
4391 
4392 // Long Immediate 32-bit signed
4393 operand immL32()
4394 %{
4395   predicate(n->get_long() == (int) (n->get_long()));
4396   match(ConL);
4397 
4398   op_cost(15);
4399   format %{ %}
4400   interface(CONST_INTER);
4401 %}
4402 
4403 // Long Immediate zero
4404 operand immL0()
4405 %{
4406   predicate(n->get_long() == 0L);
4407   match(ConL);
4408 
4409   op_cost(10);
4410   format %{ %}
4411   interface(CONST_INTER);
4412 %}
4413 
4414 // Constant for increment
4415 operand immL1()
4416 %{
4417   predicate(n->get_long() == 1);
4418   match(ConL);
4419 
4420   format %{ %}
4421   interface(CONST_INTER);
4422 %}
4423 
4424 // Constant for decrement
4425 operand immL_M1()
4426 %{
4427   predicate(n->get_long() == -1);
4428   match(ConL);
4429 
4430   format %{ %}
4431   interface(CONST_INTER);
4432 %}
4433 
4434 // Long Immediate: the value 10
4435 operand immL10()
4436 %{
4437   predicate(n->get_long() == 10);
4438   match(ConL);
4439 
4440   format %{ %}
4441   interface(CONST_INTER);
4442 %}
4443 
4444 // Long immediate from 0 to 127.
4445 // Used for a shorter form of long mul by 10.
4446 operand immL_127()
4447 %{
4448   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4449   match(ConL);
4450 
4451   op_cost(10);
4452   format %{ %}
4453   interface(CONST_INTER);
4454 %}
4455 
4456 // Long Immediate: low 32-bit mask
4457 operand immL_32bits()
4458 %{
4459   predicate(n->get_long() == 0xFFFFFFFFL);
4460   match(ConL);
4461   op_cost(20);
4462 
4463   format %{ %}
4464   interface(CONST_INTER);
4465 %}
4466 
4467 // Float Immediate zero
4468 operand immF0()
4469 %{
4470   predicate(jint_cast(n->getf()) == 0);
4471   match(ConF);
4472 
4473   op_cost(5);
4474   format %{ %}
4475   interface(CONST_INTER);
4476 %}
4477 
4478 // Float Immediate
4479 operand immF()
4480 %{
4481   match(ConF);
4482 
4483   op_cost(15);
4484   format %{ %}
4485   interface(CONST_INTER);
4486 %}
4487 
4488 // Double Immediate zero
4489 operand immD0()
4490 %{
4491   predicate(jlong_cast(n->getd()) == 0);
4492   match(ConD);
4493 
4494   op_cost(5);
4495   format %{ %}
4496   interface(CONST_INTER);
4497 %}
4498 
4499 // Double Immediate
4500 operand immD()
4501 %{
4502   match(ConD);
4503 
4504   op_cost(15);
4505   format %{ %}
4506   interface(CONST_INTER);
4507 %}
4508 
4509 // Immediates for special shifts (sign extend)
4510 
4511 // Constants for increment
4512 operand immI_16()
4513 %{
4514   predicate(n->get_int() == 16);
4515   match(ConI);
4516 
4517   format %{ %}
4518   interface(CONST_INTER);
4519 %}
4520 
4521 operand immI_24()
4522 %{
4523   predicate(n->get_int() == 24);
4524   match(ConI);
4525 
4526   format %{ %}
4527   interface(CONST_INTER);
4528 %}
4529 
4530 // Constant for byte-wide masking
4531 operand immI_255()
4532 %{
4533   predicate(n->get_int() == 255);
4534   match(ConI);
4535 
4536   format %{ %}
4537   interface(CONST_INTER);
4538 %}
4539 
4540 // Constant for short-wide masking
4541 operand immI_65535()
4542 %{
4543   predicate(n->get_int() == 65535);
4544   match(ConI);
4545 
4546   format %{ %}
4547   interface(CONST_INTER);
4548 %}
4549 
4550 // Constant for byte-wide masking
4551 operand immL_255()
4552 %{
4553   predicate(n->get_long() == 255);
4554   match(ConL);
4555 
4556   format %{ %}
4557   interface(CONST_INTER);
4558 %}
4559 
4560 // Constant for short-wide masking
4561 operand immL_65535()
4562 %{
4563   predicate(n->get_long() == 65535);
4564   match(ConL);
4565 
4566   format %{ %}
4567   interface(CONST_INTER);
4568 %}
4569 
4570 // Register Operands
4571 // Integer Register
4572 operand rRegI()
4573 %{
4574   constraint(ALLOC_IN_RC(int_reg));
4575   match(RegI);
4576 
4577   match(rax_RegI);
4578   match(rbx_RegI);
4579   match(rcx_RegI);
4580   match(rdx_RegI);
4581   match(rdi_RegI);
4582 
4583   format %{ %}
4584   interface(REG_INTER);
4585 %}
4586 
4587 // Special Registers
4588 operand rax_RegI()
4589 %{
4590   constraint(ALLOC_IN_RC(int_rax_reg));
4591   match(RegI);
4592   match(rRegI);
4593 
4594   format %{ "RAX" %}
4595   interface(REG_INTER);
4596 %}
4597 
4598 // Special Registers
4599 operand rbx_RegI()
4600 %{
4601   constraint(ALLOC_IN_RC(int_rbx_reg));
4602   match(RegI);
4603   match(rRegI);
4604 
4605   format %{ "RBX" %}
4606   interface(REG_INTER);
4607 %}
4608 
4609 operand rcx_RegI()
4610 %{
4611   constraint(ALLOC_IN_RC(int_rcx_reg));
4612   match(RegI);
4613   match(rRegI);
4614 
4615   format %{ "RCX" %}
4616   interface(REG_INTER);
4617 %}
4618 
4619 operand rdx_RegI()
4620 %{
4621   constraint(ALLOC_IN_RC(int_rdx_reg));
4622   match(RegI);
4623   match(rRegI);
4624 
4625   format %{ "RDX" %}
4626   interface(REG_INTER);
4627 %}
4628 
4629 operand rdi_RegI()
4630 %{
4631   constraint(ALLOC_IN_RC(int_rdi_reg));
4632   match(RegI);
4633   match(rRegI);
4634 
4635   format %{ "RDI" %}
4636   interface(REG_INTER);
4637 %}
4638 
4639 operand no_rcx_RegI()
4640 %{
4641   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4642   match(RegI);
4643   match(rax_RegI);
4644   match(rbx_RegI);
4645   match(rdx_RegI);
4646   match(rdi_RegI);
4647 
4648   format %{ %}
4649   interface(REG_INTER);
4650 %}
4651 
4652 operand no_rax_rdx_RegI()
4653 %{
4654   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4655   match(RegI);
4656   match(rbx_RegI);
4657   match(rcx_RegI);
4658   match(rdi_RegI);
4659 
4660   format %{ %}
4661   interface(REG_INTER);
4662 %}
4663 
4664 // Pointer Register
4665 operand any_RegP()
4666 %{
4667   constraint(ALLOC_IN_RC(any_reg));
4668   match(RegP);
4669   match(rax_RegP);
4670   match(rbx_RegP);
4671   match(rdi_RegP);
4672   match(rsi_RegP);
4673   match(rbp_RegP);
4674   match(r15_RegP);
4675   match(rRegP);
4676 
4677   format %{ %}
4678   interface(REG_INTER);
4679 %}
4680 
4681 operand rRegP()
4682 %{
4683   constraint(ALLOC_IN_RC(ptr_reg));
4684   match(RegP);
4685   match(rax_RegP);
4686   match(rbx_RegP);
4687   match(rdi_RegP);
4688   match(rsi_RegP);
4689   match(rbp_RegP);
4690   match(r15_RegP);  // See Q&A below about r15_RegP.
4691 
4692   format %{ %}
4693   interface(REG_INTER);
4694 %}
4695 
4696 operand rRegN() %{
4697   constraint(ALLOC_IN_RC(int_reg));
4698   match(RegN);
4699 
4700   format %{ %}
4701   interface(REG_INTER);
4702 %}
4703 
4704 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4705 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4706 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4707 // The output of an instruction is controlled by the allocator, which respects
4708 // register class masks, not match rules.  Unless an instruction mentions
4709 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4710 // by the allocator as an input.
4711 
4712 operand no_rax_RegP()
4713 %{
4714   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4715   match(RegP);
4716   match(rbx_RegP);
4717   match(rsi_RegP);
4718   match(rdi_RegP);
4719 
4720   format %{ %}
4721   interface(REG_INTER);
4722 %}
4723 
4724 operand no_rbp_RegP()
4725 %{
4726   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4727   match(RegP);
4728   match(rbx_RegP);
4729   match(rsi_RegP);
4730   match(rdi_RegP);
4731 
4732   format %{ %}
4733   interface(REG_INTER);
4734 %}
4735 
4736 operand no_rax_rbx_RegP()
4737 %{
4738   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4739   match(RegP);
4740   match(rsi_RegP);
4741   match(rdi_RegP);
4742 
4743   format %{ %}
4744   interface(REG_INTER);
4745 %}
4746 
4747 // Special Registers
4748 // Return a pointer value
4749 operand rax_RegP()
4750 %{
4751   constraint(ALLOC_IN_RC(ptr_rax_reg));
4752   match(RegP);
4753   match(rRegP);
4754 
4755   format %{ %}
4756   interface(REG_INTER);
4757 %}
4758 
4759 // Special Registers
4760 // Return a compressed pointer value
4761 operand rax_RegN()
4762 %{
4763   constraint(ALLOC_IN_RC(int_rax_reg));
4764   match(RegN);
4765   match(rRegN);
4766 
4767   format %{ %}
4768   interface(REG_INTER);
4769 %}
4770 
4771 // Used in AtomicAdd
4772 operand rbx_RegP()
4773 %{
4774   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4775   match(RegP);
4776   match(rRegP);
4777 
4778   format %{ %}
4779   interface(REG_INTER);
4780 %}
4781 
4782 operand rsi_RegP()
4783 %{
4784   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4785   match(RegP);
4786   match(rRegP);
4787 
4788   format %{ %}
4789   interface(REG_INTER);
4790 %}
4791 
4792 // Used in rep stosq
4793 operand rdi_RegP()
4794 %{
4795   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4796   match(RegP);
4797   match(rRegP);
4798 
4799   format %{ %}
4800   interface(REG_INTER);
4801 %}
4802 
4803 operand rbp_RegP()
4804 %{
4805   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4806   match(RegP);
4807   match(rRegP);
4808 
4809   format %{ %}
4810   interface(REG_INTER);
4811 %}
4812 
4813 operand r15_RegP()
4814 %{
4815   constraint(ALLOC_IN_RC(ptr_r15_reg));
4816   match(RegP);
4817   match(rRegP);
4818 
4819   format %{ %}
4820   interface(REG_INTER);
4821 %}
4822 
4823 operand rRegL()
4824 %{
4825   constraint(ALLOC_IN_RC(long_reg));
4826   match(RegL);
4827   match(rax_RegL);
4828   match(rdx_RegL);
4829 
4830   format %{ %}
4831   interface(REG_INTER);
4832 %}
4833 
4834 // Special Registers
4835 operand no_rax_rdx_RegL()
4836 %{
4837   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4838   match(RegL);
4839   match(rRegL);
4840 
4841   format %{ %}
4842   interface(REG_INTER);
4843 %}
4844 
4845 operand no_rax_RegL()
4846 %{
4847   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4848   match(RegL);
4849   match(rRegL);
4850   match(rdx_RegL);
4851 
4852   format %{ %}
4853   interface(REG_INTER);
4854 %}
4855 
4856 operand no_rcx_RegL()
4857 %{
4858   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4859   match(RegL);
4860   match(rRegL);
4861 
4862   format %{ %}
4863   interface(REG_INTER);
4864 %}
4865 
4866 operand rax_RegL()
4867 %{
4868   constraint(ALLOC_IN_RC(long_rax_reg));
4869   match(RegL);
4870   match(rRegL);
4871 
4872   format %{ "RAX" %}
4873   interface(REG_INTER);
4874 %}
4875 
4876 operand rcx_RegL()
4877 %{
4878   constraint(ALLOC_IN_RC(long_rcx_reg));
4879   match(RegL);
4880   match(rRegL);
4881 
4882   format %{ %}
4883   interface(REG_INTER);
4884 %}
4885 
4886 operand rdx_RegL()
4887 %{
4888   constraint(ALLOC_IN_RC(long_rdx_reg));
4889   match(RegL);
4890   match(rRegL);
4891 
4892   format %{ %}
4893   interface(REG_INTER);
4894 %}
4895 
4896 // Flags register, used as output of compare instructions
4897 operand rFlagsReg()
4898 %{
4899   constraint(ALLOC_IN_RC(int_flags));
4900   match(RegFlags);
4901 
4902   format %{ "RFLAGS" %}
4903   interface(REG_INTER);
4904 %}
4905 
4906 // Flags register, used as output of FLOATING POINT compare instructions
4907 operand rFlagsRegU()
4908 %{
4909   constraint(ALLOC_IN_RC(int_flags));
4910   match(RegFlags);
4911 
4912   format %{ "RFLAGS_U" %}
4913   interface(REG_INTER);
4914 %}
4915 
4916 operand rFlagsRegUCF() %{
4917   constraint(ALLOC_IN_RC(int_flags));
4918   match(RegFlags);
4919   predicate(false);
4920 
4921   format %{ "RFLAGS_U_CF" %}
4922   interface(REG_INTER);
4923 %}
4924 
4925 // Float register operands
4926 operand regF()
4927 %{
4928   constraint(ALLOC_IN_RC(float_reg));
4929   match(RegF);
4930 
4931   format %{ %}
4932   interface(REG_INTER);
4933 %}
4934 
4935 // Double register operands
4936 operand regD() 
4937 %{
4938   constraint(ALLOC_IN_RC(double_reg));
4939   match(RegD);
4940 
4941   format %{ %}
4942   interface(REG_INTER);
4943 %}
4944 
4945 
4946 //----------Memory Operands----------------------------------------------------
4947 // Direct Memory Operand
4948 // operand direct(immP addr)
4949 // %{
4950 //   match(addr);
4951 
4952 //   format %{ "[$addr]" %}
4953 //   interface(MEMORY_INTER) %{
4954 //     base(0xFFFFFFFF);
4955 //     index(0x4);
4956 //     scale(0x0);
4957 //     disp($addr);
4958 //   %}
4959 // %}
4960 
4961 // Indirect Memory Operand
4962 operand indirect(any_RegP reg)
4963 %{
4964   constraint(ALLOC_IN_RC(ptr_reg));
4965   match(reg);
4966 
4967   format %{ "[$reg]" %}
4968   interface(MEMORY_INTER) %{
4969     base($reg);
4970     index(0x4);
4971     scale(0x0);
4972     disp(0x0);
4973   %}
4974 %}
4975 
4976 // Indirect Memory Plus Short Offset Operand
4977 operand indOffset8(any_RegP reg, immL8 off)
4978 %{
4979   constraint(ALLOC_IN_RC(ptr_reg));
4980   match(AddP reg off);
4981 
4982   format %{ "[$reg + $off (8-bit)]" %}
4983   interface(MEMORY_INTER) %{
4984     base($reg);
4985     index(0x4);
4986     scale(0x0);
4987     disp($off);
4988   %}
4989 %}
4990 
4991 // Indirect Memory Plus Long Offset Operand
4992 operand indOffset32(any_RegP reg, immL32 off)
4993 %{
4994   constraint(ALLOC_IN_RC(ptr_reg));
4995   match(AddP reg off);
4996 
4997   format %{ "[$reg + $off (32-bit)]" %}
4998   interface(MEMORY_INTER) %{
4999     base($reg);
5000     index(0x4);
5001     scale(0x0);
5002     disp($off);
5003   %}
5004 %}
5005 
5006 // Indirect Memory Plus Index Register Plus Offset Operand
5007 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5008 %{
5009   constraint(ALLOC_IN_RC(ptr_reg));
5010   match(AddP (AddP reg lreg) off);
5011 
5012   op_cost(10);
5013   format %{"[$reg + $off + $lreg]" %}
5014   interface(MEMORY_INTER) %{
5015     base($reg);
5016     index($lreg);
5017     scale(0x0);
5018     disp($off);
5019   %}
5020 %}
5021 
5022 // Indirect Memory Plus Index Register Plus Offset Operand
5023 operand indIndex(any_RegP reg, rRegL lreg)
5024 %{
5025   constraint(ALLOC_IN_RC(ptr_reg));
5026   match(AddP reg lreg);
5027 
5028   op_cost(10);
5029   format %{"[$reg + $lreg]" %}
5030   interface(MEMORY_INTER) %{
5031     base($reg);
5032     index($lreg);
5033     scale(0x0);
5034     disp(0x0);
5035   %}
5036 %}
5037 
5038 // Indirect Memory Times Scale Plus Index Register
5039 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5040 %{
5041   constraint(ALLOC_IN_RC(ptr_reg));
5042   match(AddP reg (LShiftL lreg scale));
5043 
5044   op_cost(10);
5045   format %{"[$reg + $lreg << $scale]" %}
5046   interface(MEMORY_INTER) %{
5047     base($reg);
5048     index($lreg);
5049     scale($scale);
5050     disp(0x0);
5051   %}
5052 %}
5053 
5054 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5055 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5056 %{
5057   constraint(ALLOC_IN_RC(ptr_reg));
5058   match(AddP (AddP reg (LShiftL lreg scale)) off);
5059 
5060   op_cost(10);
5061   format %{"[$reg + $off + $lreg << $scale]" %}
5062   interface(MEMORY_INTER) %{
5063     base($reg);
5064     index($lreg);
5065     scale($scale);
5066     disp($off);
5067   %}
5068 %}
5069 
5070 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5071 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5072 %{
5073   constraint(ALLOC_IN_RC(ptr_reg));
5074   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5075   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5076 
5077   op_cost(10);
5078   format %{"[$reg + $off + $idx << $scale]" %}
5079   interface(MEMORY_INTER) %{
5080     base($reg);
5081     index($idx);
5082     scale($scale);
5083     disp($off);
5084   %}
5085 %}
5086 
5087 // Indirect Narrow Oop Plus Offset Operand
5088 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5089 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
5090 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5091   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
5092   constraint(ALLOC_IN_RC(ptr_reg));
5093   match(AddP (DecodeN reg) off);
5094 
5095   op_cost(10);
5096   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5097   interface(MEMORY_INTER) %{
5098     base(0xc); // R12
5099     index($reg);
5100     scale(0x3);
5101     disp($off);
5102   %}
5103 %}
5104 
5105 // Indirect Memory Operand
5106 operand indirectNarrow(rRegN reg)
5107 %{
5108   predicate(Universe::narrow_oop_shift() == 0);
5109   constraint(ALLOC_IN_RC(ptr_reg));
5110   match(DecodeN reg);
5111 
5112   format %{ "[$reg]" %}
5113   interface(MEMORY_INTER) %{
5114     base($reg);
5115     index(0x4);
5116     scale(0x0);
5117     disp(0x0);
5118   %}
5119 %}
5120 
5121 // Indirect Memory Plus Short Offset Operand
5122 operand indOffset8Narrow(rRegN reg, immL8 off)
5123 %{
5124   predicate(Universe::narrow_oop_shift() == 0);
5125   constraint(ALLOC_IN_RC(ptr_reg));
5126   match(AddP (DecodeN reg) off);
5127 
5128   format %{ "[$reg + $off (8-bit)]" %}
5129   interface(MEMORY_INTER) %{
5130     base($reg);
5131     index(0x4);
5132     scale(0x0);
5133     disp($off);
5134   %}
5135 %}
5136 
5137 // Indirect Memory Plus Long Offset Operand
5138 operand indOffset32Narrow(rRegN reg, immL32 off)
5139 %{
5140   predicate(Universe::narrow_oop_shift() == 0);
5141   constraint(ALLOC_IN_RC(ptr_reg));
5142   match(AddP (DecodeN reg) off);
5143 
5144   format %{ "[$reg + $off (32-bit)]" %}
5145   interface(MEMORY_INTER) %{
5146     base($reg);
5147     index(0x4);
5148     scale(0x0);
5149     disp($off);
5150   %}
5151 %}
5152 
5153 // Indirect Memory Plus Index Register Plus Offset Operand
5154 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5155 %{
5156   predicate(Universe::narrow_oop_shift() == 0);
5157   constraint(ALLOC_IN_RC(ptr_reg));
5158   match(AddP (AddP (DecodeN reg) lreg) off);
5159 
5160   op_cost(10);
5161   format %{"[$reg + $off + $lreg]" %}
5162   interface(MEMORY_INTER) %{
5163     base($reg);
5164     index($lreg);
5165     scale(0x0);
5166     disp($off);
5167   %}
5168 %}
5169 
5170 // Indirect Memory Plus Index Register Plus Offset Operand
5171 operand indIndexNarrow(rRegN reg, rRegL lreg)
5172 %{
5173   predicate(Universe::narrow_oop_shift() == 0);
5174   constraint(ALLOC_IN_RC(ptr_reg));
5175   match(AddP (DecodeN reg) lreg);
5176 
5177   op_cost(10);
5178   format %{"[$reg + $lreg]" %}
5179   interface(MEMORY_INTER) %{
5180     base($reg);
5181     index($lreg);
5182     scale(0x0);
5183     disp(0x0);
5184   %}
5185 %}
5186 
5187 // Indirect Memory Times Scale Plus Index Register
5188 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5189 %{
5190   predicate(Universe::narrow_oop_shift() == 0);
5191   constraint(ALLOC_IN_RC(ptr_reg));
5192   match(AddP (DecodeN reg) (LShiftL lreg scale));
5193 
5194   op_cost(10);
5195   format %{"[$reg + $lreg << $scale]" %}
5196   interface(MEMORY_INTER) %{
5197     base($reg);
5198     index($lreg);
5199     scale($scale);
5200     disp(0x0);
5201   %}
5202 %}
5203 
5204 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5205 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5206 %{
5207   predicate(Universe::narrow_oop_shift() == 0);
5208   constraint(ALLOC_IN_RC(ptr_reg));
5209   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5210 
5211   op_cost(10);
5212   format %{"[$reg + $off + $lreg << $scale]" %}
5213   interface(MEMORY_INTER) %{
5214     base($reg);
5215     index($lreg);
5216     scale($scale);
5217     disp($off);
5218   %}
5219 %}
5220 
5221 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5222 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5223 %{
5224   constraint(ALLOC_IN_RC(ptr_reg));
5225   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5226   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5227 
5228   op_cost(10);
5229   format %{"[$reg + $off + $idx << $scale]" %}
5230   interface(MEMORY_INTER) %{
5231     base($reg);
5232     index($idx);
5233     scale($scale);
5234     disp($off);
5235   %}
5236 %}
5237 
5238 
5239 //----------Special Memory Operands--------------------------------------------
5240 // Stack Slot Operand - This operand is used for loading and storing temporary
5241 //                      values on the stack where a match requires a value to
5242 //                      flow through memory.
5243 operand stackSlotP(sRegP reg)
5244 %{
5245   constraint(ALLOC_IN_RC(stack_slots));
5246   // No match rule because this operand is only generated in matching
5247 
5248   format %{ "[$reg]" %}
5249   interface(MEMORY_INTER) %{
5250     base(0x4);   // RSP
5251     index(0x4);  // No Index
5252     scale(0x0);  // No Scale
5253     disp($reg);  // Stack Offset
5254   %}
5255 %}
5256 
5257 operand stackSlotI(sRegI reg)
5258 %{
5259   constraint(ALLOC_IN_RC(stack_slots));
5260   // No match rule because this operand is only generated in matching
5261 
5262   format %{ "[$reg]" %}
5263   interface(MEMORY_INTER) %{
5264     base(0x4);   // RSP
5265     index(0x4);  // No Index
5266     scale(0x0);  // No Scale
5267     disp($reg);  // Stack Offset
5268   %}
5269 %}
5270 
5271 operand stackSlotF(sRegF reg)
5272 %{
5273   constraint(ALLOC_IN_RC(stack_slots));
5274   // No match rule because this operand is only generated in matching
5275 
5276   format %{ "[$reg]" %}
5277   interface(MEMORY_INTER) %{
5278     base(0x4);   // RSP
5279     index(0x4);  // No Index
5280     scale(0x0);  // No Scale
5281     disp($reg);  // Stack Offset
5282   %}
5283 %}
5284 
5285 operand stackSlotD(sRegD reg)
5286 %{
5287   constraint(ALLOC_IN_RC(stack_slots));
5288   // No match rule because this operand is only generated in matching
5289 
5290   format %{ "[$reg]" %}
5291   interface(MEMORY_INTER) %{
5292     base(0x4);   // RSP
5293     index(0x4);  // No Index
5294     scale(0x0);  // No Scale
5295     disp($reg);  // Stack Offset
5296   %}
5297 %}
5298 operand stackSlotL(sRegL reg)
5299 %{
5300   constraint(ALLOC_IN_RC(stack_slots));
5301   // No match rule because this operand is only generated in matching
5302 
5303   format %{ "[$reg]" %}
5304   interface(MEMORY_INTER) %{
5305     base(0x4);   // RSP
5306     index(0x4);  // No Index
5307     scale(0x0);  // No Scale
5308     disp($reg);  // Stack Offset
5309   %}
5310 %}
5311 
5312 //----------Conditional Branch Operands----------------------------------------
5313 // Comparison Op  - This is the operation of the comparison, and is limited to
5314 //                  the following set of codes:
5315 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5316 //
5317 // Other attributes of the comparison, such as unsignedness, are specified
5318 // by the comparison instruction that sets a condition code flags register.
5319 // That result is represented by a flags operand whose subtype is appropriate
5320 // to the unsignedness (etc.) of the comparison.
5321 //
5322 // Later, the instruction which matches both the Comparison Op (a Bool) and
5323 // the flags (produced by the Cmp) specifies the coding of the comparison op
5324 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5325 
5326 // Comparision Code
5327 operand cmpOp()
5328 %{
5329   match(Bool);
5330 
5331   format %{ "" %}
5332   interface(COND_INTER) %{
5333     equal(0x4, "e");
5334     not_equal(0x5, "ne");
5335     less(0xC, "l");
5336     greater_equal(0xD, "ge");
5337     less_equal(0xE, "le");
5338     greater(0xF, "g");
5339   %}
5340 %}
5341 
5342 // Comparison Code, unsigned compare.  Used by FP also, with
5343 // C2 (unordered) turned into GT or LT already.  The other bits
5344 // C0 and C3 are turned into Carry & Zero flags.
5345 operand cmpOpU()
5346 %{
5347   match(Bool);
5348 
5349   format %{ "" %}
5350   interface(COND_INTER) %{
5351     equal(0x4, "e");
5352     not_equal(0x5, "ne");
5353     less(0x2, "b");
5354     greater_equal(0x3, "nb");
5355     less_equal(0x6, "be");
5356     greater(0x7, "nbe");
5357   %}
5358 %}
5359 
5360 
5361 // Floating comparisons that don't require any fixup for the unordered case
5362 operand cmpOpUCF() %{
5363   match(Bool);
5364   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5365             n->as_Bool()->_test._test == BoolTest::ge ||
5366             n->as_Bool()->_test._test == BoolTest::le ||
5367             n->as_Bool()->_test._test == BoolTest::gt);
5368   format %{ "" %}
5369   interface(COND_INTER) %{
5370     equal(0x4, "e");
5371     not_equal(0x5, "ne");
5372     less(0x2, "b");
5373     greater_equal(0x3, "nb");
5374     less_equal(0x6, "be");
5375     greater(0x7, "nbe");
5376   %}
5377 %}
5378 
5379 
5380 // Floating comparisons that can be fixed up with extra conditional jumps
5381 operand cmpOpUCF2() %{
5382   match(Bool);
5383   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5384             n->as_Bool()->_test._test == BoolTest::eq);
5385   format %{ "" %}
5386   interface(COND_INTER) %{
5387     equal(0x4, "e");
5388     not_equal(0x5, "ne");
5389     less(0x2, "b");
5390     greater_equal(0x3, "nb");
5391     less_equal(0x6, "be");
5392     greater(0x7, "nbe");
5393   %}
5394 %}
5395 
5396 
5397 //----------OPERAND CLASSES----------------------------------------------------
5398 // Operand Classes are groups of operands that are used as to simplify
5399 // instruction definitions by not requiring the AD writer to specify separate
5400 // instructions for every form of operand when the instruction accepts
5401 // multiple operand types with the same basic encoding and format.  The classic
5402 // case of this is memory operands.
5403 
5404 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5405                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5406                indCompressedOopOffset,
5407                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5408                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5409                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5410 
5411 //----------PIPELINE-----------------------------------------------------------
5412 // Rules which define the behavior of the target architectures pipeline.
5413 pipeline %{
5414 
5415 //----------ATTRIBUTES---------------------------------------------------------
5416 attributes %{
5417   variable_size_instructions;        // Fixed size instructions
5418   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5419   instruction_unit_size = 1;         // An instruction is 1 bytes long
5420   instruction_fetch_unit_size = 16;  // The processor fetches one line
5421   instruction_fetch_units = 1;       // of 16 bytes
5422 
5423   // List of nop instructions
5424   nops( MachNop );
5425 %}
5426 
5427 //----------RESOURCES----------------------------------------------------------
5428 // Resources are the functional units available to the machine
5429 
5430 // Generic P2/P3 pipeline
5431 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5432 // 3 instructions decoded per cycle.
5433 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5434 // 3 ALU op, only ALU0 handles mul instructions.
5435 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5436            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5437            BR, FPU,
5438            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5439 
5440 //----------PIPELINE DESCRIPTION-----------------------------------------------
5441 // Pipeline Description specifies the stages in the machine's pipeline
5442 
5443 // Generic P2/P3 pipeline
5444 pipe_desc(S0, S1, S2, S3, S4, S5);
5445 
5446 //----------PIPELINE CLASSES---------------------------------------------------
5447 // Pipeline Classes describe the stages in which input and output are
5448 // referenced by the hardware pipeline.
5449 
5450 // Naming convention: ialu or fpu
5451 // Then: _reg
5452 // Then: _reg if there is a 2nd register
5453 // Then: _long if it's a pair of instructions implementing a long
5454 // Then: _fat if it requires the big decoder
5455 //   Or: _mem if it requires the big decoder and a memory unit.
5456 
5457 // Integer ALU reg operation
5458 pipe_class ialu_reg(rRegI dst)
5459 %{
5460     single_instruction;
5461     dst    : S4(write);
5462     dst    : S3(read);
5463     DECODE : S0;        // any decoder
5464     ALU    : S3;        // any alu
5465 %}
5466 
5467 // Long ALU reg operation
5468 pipe_class ialu_reg_long(rRegL dst)
5469 %{
5470     instruction_count(2);
5471     dst    : S4(write);
5472     dst    : S3(read);
5473     DECODE : S0(2);     // any 2 decoders
5474     ALU    : S3(2);     // both alus
5475 %}
5476 
5477 // Integer ALU reg operation using big decoder
5478 pipe_class ialu_reg_fat(rRegI dst)
5479 %{
5480     single_instruction;
5481     dst    : S4(write);
5482     dst    : S3(read);
5483     D0     : S0;        // big decoder only
5484     ALU    : S3;        // any alu
5485 %}
5486 
5487 // Long ALU reg operation using big decoder
5488 pipe_class ialu_reg_long_fat(rRegL dst)
5489 %{
5490     instruction_count(2);
5491     dst    : S4(write);
5492     dst    : S3(read);
5493     D0     : S0(2);     // big decoder only; twice
5494     ALU    : S3(2);     // any 2 alus
5495 %}
5496 
5497 // Integer ALU reg-reg operation
5498 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5499 %{
5500     single_instruction;
5501     dst    : S4(write);
5502     src    : S3(read);
5503     DECODE : S0;        // any decoder
5504     ALU    : S3;        // any alu
5505 %}
5506 
5507 // Long ALU reg-reg operation
5508 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5509 %{
5510     instruction_count(2);
5511     dst    : S4(write);
5512     src    : S3(read);
5513     DECODE : S0(2);     // any 2 decoders
5514     ALU    : S3(2);     // both alus
5515 %}
5516 
5517 // Integer ALU reg-reg operation
5518 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5519 %{
5520     single_instruction;
5521     dst    : S4(write);
5522     src    : S3(read);
5523     D0     : S0;        // big decoder only
5524     ALU    : S3;        // any alu
5525 %}
5526 
5527 // Long ALU reg-reg operation
5528 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5529 %{
5530     instruction_count(2);
5531     dst    : S4(write);
5532     src    : S3(read);
5533     D0     : S0(2);     // big decoder only; twice
5534     ALU    : S3(2);     // both alus
5535 %}
5536 
5537 // Integer ALU reg-mem operation
5538 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5539 %{
5540     single_instruction;
5541     dst    : S5(write);
5542     mem    : S3(read);
5543     D0     : S0;        // big decoder only
5544     ALU    : S4;        // any alu
5545     MEM    : S3;        // any mem
5546 %}
5547 
5548 // Integer mem operation (prefetch)
5549 pipe_class ialu_mem(memory mem)
5550 %{
5551     single_instruction;
5552     mem    : S3(read);
5553     D0     : S0;        // big decoder only
5554     MEM    : S3;        // any mem
5555 %}
5556 
5557 // Integer Store to Memory
5558 pipe_class ialu_mem_reg(memory mem, rRegI src)
5559 %{
5560     single_instruction;
5561     mem    : S3(read);
5562     src    : S5(read);
5563     D0     : S0;        // big decoder only
5564     ALU    : S4;        // any alu
5565     MEM    : S3;
5566 %}
5567 
5568 // // Long Store to Memory
5569 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5570 // %{
5571 //     instruction_count(2);
5572 //     mem    : S3(read);
5573 //     src    : S5(read);
5574 //     D0     : S0(2);          // big decoder only; twice
5575 //     ALU    : S4(2);     // any 2 alus
5576 //     MEM    : S3(2);  // Both mems
5577 // %}
5578 
5579 // Integer Store to Memory
5580 pipe_class ialu_mem_imm(memory mem)
5581 %{
5582     single_instruction;
5583     mem    : S3(read);
5584     D0     : S0;        // big decoder only
5585     ALU    : S4;        // any alu
5586     MEM    : S3;
5587 %}
5588 
5589 // Integer ALU0 reg-reg operation
5590 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5591 %{
5592     single_instruction;
5593     dst    : S4(write);
5594     src    : S3(read);
5595     D0     : S0;        // Big decoder only
5596     ALU0   : S3;        // only alu0
5597 %}
5598 
5599 // Integer ALU0 reg-mem operation
5600 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5601 %{
5602     single_instruction;
5603     dst    : S5(write);
5604     mem    : S3(read);
5605     D0     : S0;        // big decoder only
5606     ALU0   : S4;        // ALU0 only
5607     MEM    : S3;        // any mem
5608 %}
5609 
5610 // Integer ALU reg-reg operation
5611 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5612 %{
5613     single_instruction;
5614     cr     : S4(write);
5615     src1   : S3(read);
5616     src2   : S3(read);
5617     DECODE : S0;        // any decoder
5618     ALU    : S3;        // any alu
5619 %}
5620 
5621 // Integer ALU reg-imm operation
5622 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5623 %{
5624     single_instruction;
5625     cr     : S4(write);
5626     src1   : S3(read);
5627     DECODE : S0;        // any decoder
5628     ALU    : S3;        // any alu
5629 %}
5630 
5631 // Integer ALU reg-mem operation
5632 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5633 %{
5634     single_instruction;
5635     cr     : S4(write);
5636     src1   : S3(read);
5637     src2   : S3(read);
5638     D0     : S0;        // big decoder only
5639     ALU    : S4;        // any alu
5640     MEM    : S3;
5641 %}
5642 
5643 // Conditional move reg-reg
5644 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5645 %{
5646     instruction_count(4);
5647     y      : S4(read);
5648     q      : S3(read);
5649     p      : S3(read);
5650     DECODE : S0(4);     // any decoder
5651 %}
5652 
5653 // Conditional move reg-reg
5654 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5655 %{
5656     single_instruction;
5657     dst    : S4(write);
5658     src    : S3(read);
5659     cr     : S3(read);
5660     DECODE : S0;        // any decoder
5661 %}
5662 
5663 // Conditional move reg-mem
5664 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5665 %{
5666     single_instruction;
5667     dst    : S4(write);
5668     src    : S3(read);
5669     cr     : S3(read);
5670     DECODE : S0;        // any decoder
5671     MEM    : S3;
5672 %}
5673 
5674 // Conditional move reg-reg long
5675 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5676 %{
5677     single_instruction;
5678     dst    : S4(write);
5679     src    : S3(read);
5680     cr     : S3(read);
5681     DECODE : S0(2);     // any 2 decoders
5682 %}
5683 
5684 // XXX
5685 // // Conditional move double reg-reg
5686 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5687 // %{
5688 //     single_instruction;
5689 //     dst    : S4(write);
5690 //     src    : S3(read);
5691 //     cr     : S3(read);
5692 //     DECODE : S0;     // any decoder
5693 // %}
5694 
5695 // Float reg-reg operation
5696 pipe_class fpu_reg(regD dst)
5697 %{
5698     instruction_count(2);
5699     dst    : S3(read);
5700     DECODE : S0(2);     // any 2 decoders
5701     FPU    : S3;
5702 %}
5703 
5704 // Float reg-reg operation
5705 pipe_class fpu_reg_reg(regD dst, regD src)
5706 %{
5707     instruction_count(2);
5708     dst    : S4(write);
5709     src    : S3(read);
5710     DECODE : S0(2);     // any 2 decoders
5711     FPU    : S3;
5712 %}
5713 
5714 // Float reg-reg operation
5715 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5716 %{
5717     instruction_count(3);
5718     dst    : S4(write);
5719     src1   : S3(read);
5720     src2   : S3(read);
5721     DECODE : S0(3);     // any 3 decoders
5722     FPU    : S3(2);
5723 %}
5724 
5725 // Float reg-reg operation
5726 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5727 %{
5728     instruction_count(4);
5729     dst    : S4(write);
5730     src1   : S3(read);
5731     src2   : S3(read);
5732     src3   : S3(read);
5733     DECODE : S0(4);     // any 3 decoders
5734     FPU    : S3(2);
5735 %}
5736 
5737 // Float reg-reg operation
5738 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5739 %{
5740     instruction_count(4);
5741     dst    : S4(write);
5742     src1   : S3(read);
5743     src2   : S3(read);
5744     src3   : S3(read);
5745     DECODE : S1(3);     // any 3 decoders
5746     D0     : S0;        // Big decoder only
5747     FPU    : S3(2);
5748     MEM    : S3;
5749 %}
5750 
5751 // Float reg-mem operation
5752 pipe_class fpu_reg_mem(regD dst, memory mem)
5753 %{
5754     instruction_count(2);
5755     dst    : S5(write);
5756     mem    : S3(read);
5757     D0     : S0;        // big decoder only
5758     DECODE : S1;        // any decoder for FPU POP
5759     FPU    : S4;
5760     MEM    : S3;        // any mem
5761 %}
5762 
5763 // Float reg-mem operation
5764 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5765 %{
5766     instruction_count(3);
5767     dst    : S5(write);
5768     src1   : S3(read);
5769     mem    : S3(read);
5770     D0     : S0;        // big decoder only
5771     DECODE : S1(2);     // any decoder for FPU POP
5772     FPU    : S4;
5773     MEM    : S3;        // any mem
5774 %}
5775 
5776 // Float mem-reg operation
5777 pipe_class fpu_mem_reg(memory mem, regD src)
5778 %{
5779     instruction_count(2);
5780     src    : S5(read);
5781     mem    : S3(read);
5782     DECODE : S0;        // any decoder for FPU PUSH
5783     D0     : S1;        // big decoder only
5784     FPU    : S4;
5785     MEM    : S3;        // any mem
5786 %}
5787 
5788 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5789 %{
5790     instruction_count(3);
5791     src1   : S3(read);
5792     src2   : S3(read);
5793     mem    : S3(read);
5794     DECODE : S0(2);     // any decoder for FPU PUSH
5795     D0     : S1;        // big decoder only
5796     FPU    : S4;
5797     MEM    : S3;        // any mem
5798 %}
5799 
5800 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5801 %{
5802     instruction_count(3);
5803     src1   : S3(read);
5804     src2   : S3(read);
5805     mem    : S4(read);
5806     DECODE : S0;        // any decoder for FPU PUSH
5807     D0     : S0(2);     // big decoder only
5808     FPU    : S4;
5809     MEM    : S3(2);     // any mem
5810 %}
5811 
5812 pipe_class fpu_mem_mem(memory dst, memory src1)
5813 %{
5814     instruction_count(2);
5815     src1   : S3(read);
5816     dst    : S4(read);
5817     D0     : S0(2);     // big decoder only
5818     MEM    : S3(2);     // any mem
5819 %}
5820 
5821 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5822 %{
5823     instruction_count(3);
5824     src1   : S3(read);
5825     src2   : S3(read);
5826     dst    : S4(read);
5827     D0     : S0(3);     // big decoder only
5828     FPU    : S4;
5829     MEM    : S3(3);     // any mem
5830 %}
5831 
5832 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5833 %{
5834     instruction_count(3);
5835     src1   : S4(read);
5836     mem    : S4(read);
5837     DECODE : S0;        // any decoder for FPU PUSH
5838     D0     : S0(2);     // big decoder only
5839     FPU    : S4;
5840     MEM    : S3(2);     // any mem
5841 %}
5842 
5843 // Float load constant
5844 pipe_class fpu_reg_con(regD dst)
5845 %{
5846     instruction_count(2);
5847     dst    : S5(write);
5848     D0     : S0;        // big decoder only for the load
5849     DECODE : S1;        // any decoder for FPU POP
5850     FPU    : S4;
5851     MEM    : S3;        // any mem
5852 %}
5853 
5854 // Float load constant
5855 pipe_class fpu_reg_reg_con(regD dst, regD src)
5856 %{
5857     instruction_count(3);
5858     dst    : S5(write);
5859     src    : S3(read);
5860     D0     : S0;        // big decoder only for the load
5861     DECODE : S1(2);     // any decoder for FPU POP
5862     FPU    : S4;
5863     MEM    : S3;        // any mem
5864 %}
5865 
5866 // UnConditional branch
5867 pipe_class pipe_jmp(label labl)
5868 %{
5869     single_instruction;
5870     BR   : S3;
5871 %}
5872 
5873 // Conditional branch
5874 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5875 %{
5876     single_instruction;
5877     cr    : S1(read);
5878     BR    : S3;
5879 %}
5880 
5881 // Allocation idiom
5882 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5883 %{
5884     instruction_count(1); force_serialization;
5885     fixed_latency(6);
5886     heap_ptr : S3(read);
5887     DECODE   : S0(3);
5888     D0       : S2;
5889     MEM      : S3;
5890     ALU      : S3(2);
5891     dst      : S5(write);
5892     BR       : S5;
5893 %}
5894 
5895 // Generic big/slow expanded idiom
5896 pipe_class pipe_slow()
5897 %{
5898     instruction_count(10); multiple_bundles; force_serialization;
5899     fixed_latency(100);
5900     D0  : S0(2);
5901     MEM : S3(2);
5902 %}
5903 
5904 // The real do-nothing guy
5905 pipe_class empty()
5906 %{
5907     instruction_count(0);
5908 %}
5909 
5910 // Define the class for the Nop node
5911 define
5912 %{
5913    MachNop = empty;
5914 %}
5915 
5916 %}
5917 
5918 //----------INSTRUCTIONS-------------------------------------------------------
5919 //
5920 // match      -- States which machine-independent subtree may be replaced
5921 //               by this instruction.
5922 // ins_cost   -- The estimated cost of this instruction is used by instruction
5923 //               selection to identify a minimum cost tree of machine
5924 //               instructions that matches a tree of machine-independent
5925 //               instructions.
5926 // format     -- A string providing the disassembly for this instruction.
5927 //               The value of an instruction's operand may be inserted
5928 //               by referring to it with a '$' prefix.
5929 // opcode     -- Three instruction opcodes may be provided.  These are referred
5930 //               to within an encode class as $primary, $secondary, and $tertiary
5931 //               rrspectively.  The primary opcode is commonly used to
5932 //               indicate the type of machine instruction, while secondary
5933 //               and tertiary are often used for prefix options or addressing
5934 //               modes.
5935 // ins_encode -- A list of encode classes with parameters. The encode class
5936 //               name must have been defined in an 'enc_class' specification
5937 //               in the encode section of the architecture description.
5938 
5939 
5940 //----------Load/Store/Move Instructions---------------------------------------
5941 //----------Load Instructions--------------------------------------------------
5942 
5943 // Load Byte (8 bit signed)
5944 instruct loadB(rRegI dst, memory mem)
5945 %{
5946   match(Set dst (LoadB mem));
5947 
5948   ins_cost(125);
5949   format %{ "movsbl  $dst, $mem\t# byte" %}
5950 
5951   ins_encode %{
5952     __ movsbl($dst$$Register, $mem$$Address);
5953   %}
5954 
5955   ins_pipe(ialu_reg_mem);
5956 %}
5957 
5958 // Load Byte (8 bit signed) into Long Register
5959 instruct loadB2L(rRegL dst, memory mem)
5960 %{
5961   match(Set dst (ConvI2L (LoadB mem)));
5962 
5963   ins_cost(125);
5964   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5965 
5966   ins_encode %{
5967     __ movsbq($dst$$Register, $mem$$Address);
5968   %}
5969 
5970   ins_pipe(ialu_reg_mem);
5971 %}
5972 
5973 // Load Unsigned Byte (8 bit UNsigned)
5974 instruct loadUB(rRegI dst, memory mem)
5975 %{
5976   match(Set dst (LoadUB mem));
5977 
5978   ins_cost(125);
5979   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5980 
5981   ins_encode %{
5982     __ movzbl($dst$$Register, $mem$$Address);
5983   %}
5984 
5985   ins_pipe(ialu_reg_mem);
5986 %}
5987 
5988 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5989 instruct loadUB2L(rRegL dst, memory mem)
5990 %{
5991   match(Set dst (ConvI2L (LoadUB mem)));
5992 
5993   ins_cost(125);
5994   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5995 
5996   ins_encode %{
5997     __ movzbq($dst$$Register, $mem$$Address);
5998   %}
5999 
6000   ins_pipe(ialu_reg_mem);
6001 %}
6002 
6003 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
6004 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
6005   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6006   effect(KILL cr);
6007 
6008   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
6009             "andl    $dst, $mask" %}
6010   ins_encode %{
6011     Register Rdst = $dst$$Register;
6012     __ movzbq(Rdst, $mem$$Address);
6013     __ andl(Rdst, $mask$$constant);
6014   %}
6015   ins_pipe(ialu_reg_mem);
6016 %}
6017 
6018 // Load Short (16 bit signed)
6019 instruct loadS(rRegI dst, memory mem)
6020 %{
6021   match(Set dst (LoadS mem));
6022 
6023   ins_cost(125);
6024   format %{ "movswl $dst, $mem\t# short" %}
6025 
6026   ins_encode %{
6027     __ movswl($dst$$Register, $mem$$Address);
6028   %}
6029 
6030   ins_pipe(ialu_reg_mem);
6031 %}
6032 
6033 // Load Short (16 bit signed) to Byte (8 bit signed)
6034 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6035   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6036 
6037   ins_cost(125);
6038   format %{ "movsbl $dst, $mem\t# short -> byte" %}
6039   ins_encode %{
6040     __ movsbl($dst$$Register, $mem$$Address);
6041   %}
6042   ins_pipe(ialu_reg_mem);
6043 %}
6044 
6045 // Load Short (16 bit signed) into Long Register
6046 instruct loadS2L(rRegL dst, memory mem)
6047 %{
6048   match(Set dst (ConvI2L (LoadS mem)));
6049 
6050   ins_cost(125);
6051   format %{ "movswq $dst, $mem\t# short -> long" %}
6052 
6053   ins_encode %{
6054     __ movswq($dst$$Register, $mem$$Address);
6055   %}
6056 
6057   ins_pipe(ialu_reg_mem);
6058 %}
6059 
6060 // Load Unsigned Short/Char (16 bit UNsigned)
6061 instruct loadUS(rRegI dst, memory mem)
6062 %{
6063   match(Set dst (LoadUS mem));
6064 
6065   ins_cost(125);
6066   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
6067 
6068   ins_encode %{
6069     __ movzwl($dst$$Register, $mem$$Address);
6070   %}
6071 
6072   ins_pipe(ialu_reg_mem);
6073 %}
6074 
6075 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6076 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6077   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6078 
6079   ins_cost(125);
6080   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6081   ins_encode %{
6082     __ movsbl($dst$$Register, $mem$$Address);
6083   %}
6084   ins_pipe(ialu_reg_mem);
6085 %}
6086 
6087 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6088 instruct loadUS2L(rRegL dst, memory mem)
6089 %{
6090   match(Set dst (ConvI2L (LoadUS mem)));
6091 
6092   ins_cost(125);
6093   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6094 
6095   ins_encode %{
6096     __ movzwq($dst$$Register, $mem$$Address);
6097   %}
6098 
6099   ins_pipe(ialu_reg_mem);
6100 %}
6101 
6102 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6103 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6104   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6105 
6106   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6107   ins_encode %{
6108     __ movzbq($dst$$Register, $mem$$Address);
6109   %}
6110   ins_pipe(ialu_reg_mem);
6111 %}
6112 
6113 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6114 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6115   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6116   effect(KILL cr);
6117 
6118   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6119             "andl    $dst, $mask" %}
6120   ins_encode %{
6121     Register Rdst = $dst$$Register;
6122     __ movzwq(Rdst, $mem$$Address);
6123     __ andl(Rdst, $mask$$constant);
6124   %}
6125   ins_pipe(ialu_reg_mem);
6126 %}
6127 
6128 // Load Integer
6129 instruct loadI(rRegI dst, memory mem)
6130 %{
6131   match(Set dst (LoadI mem));
6132 
6133   ins_cost(125);
6134   format %{ "movl    $dst, $mem\t# int" %}
6135 
6136   ins_encode %{
6137     __ movl($dst$$Register, $mem$$Address);
6138   %}
6139 
6140   ins_pipe(ialu_reg_mem);
6141 %}
6142 
6143 // Load Integer (32 bit signed) to Byte (8 bit signed)
6144 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6145   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6146 
6147   ins_cost(125);
6148   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6149   ins_encode %{
6150     __ movsbl($dst$$Register, $mem$$Address);
6151   %}
6152   ins_pipe(ialu_reg_mem);
6153 %}
6154 
6155 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6156 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6157   match(Set dst (AndI (LoadI mem) mask));
6158 
6159   ins_cost(125);
6160   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6161   ins_encode %{
6162     __ movzbl($dst$$Register, $mem$$Address);
6163   %}
6164   ins_pipe(ialu_reg_mem);
6165 %}
6166 
6167 // Load Integer (32 bit signed) to Short (16 bit signed)
6168 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6169   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6170 
6171   ins_cost(125);
6172   format %{ "movswl  $dst, $mem\t# int -> short" %}
6173   ins_encode %{
6174     __ movswl($dst$$Register, $mem$$Address);
6175   %}
6176   ins_pipe(ialu_reg_mem);
6177 %}
6178 
6179 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6180 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6181   match(Set dst (AndI (LoadI mem) mask));
6182 
6183   ins_cost(125);
6184   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6185   ins_encode %{
6186     __ movzwl($dst$$Register, $mem$$Address);
6187   %}
6188   ins_pipe(ialu_reg_mem);
6189 %}
6190 
6191 // Load Integer into Long Register
6192 instruct loadI2L(rRegL dst, memory mem)
6193 %{
6194   match(Set dst (ConvI2L (LoadI mem)));
6195 
6196   ins_cost(125);
6197   format %{ "movslq  $dst, $mem\t# int -> long" %}
6198 
6199   ins_encode %{
6200     __ movslq($dst$$Register, $mem$$Address);
6201   %}
6202 
6203   ins_pipe(ialu_reg_mem);
6204 %}
6205 
6206 // Load Integer with mask 0xFF into Long Register
6207 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6208   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6209 
6210   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6211   ins_encode %{
6212     __ movzbq($dst$$Register, $mem$$Address);
6213   %}
6214   ins_pipe(ialu_reg_mem);
6215 %}
6216 
6217 // Load Integer with mask 0xFFFF into Long Register
6218 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6219   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6220 
6221   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6222   ins_encode %{
6223     __ movzwq($dst$$Register, $mem$$Address);
6224   %}
6225   ins_pipe(ialu_reg_mem);
6226 %}
6227 
6228 // Load Integer with a 32-bit mask into Long Register
6229 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6230   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6231   effect(KILL cr);
6232 
6233   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6234             "andl    $dst, $mask" %}
6235   ins_encode %{
6236     Register Rdst = $dst$$Register;
6237     __ movl(Rdst, $mem$$Address);
6238     __ andl(Rdst, $mask$$constant);
6239   %}
6240   ins_pipe(ialu_reg_mem);
6241 %}
6242 
6243 // Load Unsigned Integer into Long Register
6244 instruct loadUI2L(rRegL dst, memory mem)
6245 %{
6246   match(Set dst (LoadUI2L mem));
6247 
6248   ins_cost(125);
6249   format %{ "movl    $dst, $mem\t# uint -> long" %}
6250 
6251   ins_encode %{
6252     __ movl($dst$$Register, $mem$$Address);
6253   %}
6254 
6255   ins_pipe(ialu_reg_mem);
6256 %}
6257 
6258 // Load Long
6259 instruct loadL(rRegL dst, memory mem)
6260 %{
6261   match(Set dst (LoadL mem));
6262 
6263   ins_cost(125);
6264   format %{ "movq    $dst, $mem\t# long" %}
6265 
6266   ins_encode %{
6267     __ movq($dst$$Register, $mem$$Address);
6268   %}
6269 
6270   ins_pipe(ialu_reg_mem); // XXX
6271 %}
6272 
6273 // Load Range
6274 instruct loadRange(rRegI dst, memory mem)
6275 %{
6276   match(Set dst (LoadRange mem));
6277 
6278   ins_cost(125); // XXX
6279   format %{ "movl    $dst, $mem\t# range" %}
6280   opcode(0x8B);
6281   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6282   ins_pipe(ialu_reg_mem);
6283 %}
6284 
6285 // Load Pointer
6286 instruct loadP(rRegP dst, memory mem)
6287 %{
6288   match(Set dst (LoadP mem));
6289 
6290   ins_cost(125); // XXX
6291   format %{ "movq    $dst, $mem\t# ptr" %}
6292   opcode(0x8B);
6293   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6294   ins_pipe(ialu_reg_mem); // XXX
6295 %}
6296 
6297 // Load Compressed Pointer
6298 instruct loadN(rRegN dst, memory mem)
6299 %{
6300    match(Set dst (LoadN mem));
6301 
6302    ins_cost(125); // XXX
6303    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6304    ins_encode %{
6305      __ movl($dst$$Register, $mem$$Address);
6306    %}
6307    ins_pipe(ialu_reg_mem); // XXX
6308 %}
6309 
6310 
6311 // Load Klass Pointer
6312 instruct loadKlass(rRegP dst, memory mem)
6313 %{
6314   match(Set dst (LoadKlass mem));
6315 
6316   ins_cost(125); // XXX
6317   format %{ "movq    $dst, $mem\t# class" %}
6318   opcode(0x8B);
6319   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6320   ins_pipe(ialu_reg_mem); // XXX
6321 %}
6322 
6323 // Load narrow Klass Pointer
6324 instruct loadNKlass(rRegN dst, memory mem)
6325 %{
6326   match(Set dst (LoadNKlass mem));
6327 
6328   ins_cost(125); // XXX
6329   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6330   ins_encode %{
6331     __ movl($dst$$Register, $mem$$Address);
6332   %}
6333   ins_pipe(ialu_reg_mem); // XXX
6334 %}
6335 
6336 // Load Float
6337 instruct loadF(regF dst, memory mem)
6338 %{
6339   match(Set dst (LoadF mem));
6340 
6341   ins_cost(145); // XXX
6342   format %{ "movss   $dst, $mem\t# float" %}
6343   opcode(0xF3, 0x0F, 0x10);
6344   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6345   ins_pipe(pipe_slow); // XXX
6346 %}
6347 
6348 // Load Double
6349 instruct loadD_partial(regD dst, memory mem)
6350 %{
6351   predicate(!UseXmmLoadAndClearUpper);
6352   match(Set dst (LoadD mem));
6353 
6354   ins_cost(145); // XXX
6355   format %{ "movlpd  $dst, $mem\t# double" %}
6356   opcode(0x66, 0x0F, 0x12);
6357   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6358   ins_pipe(pipe_slow); // XXX
6359 %}
6360 
6361 instruct loadD(regD dst, memory mem)
6362 %{
6363   predicate(UseXmmLoadAndClearUpper);
6364   match(Set dst (LoadD mem));
6365 
6366   ins_cost(145); // XXX
6367   format %{ "movsd   $dst, $mem\t# double" %}
6368   opcode(0xF2, 0x0F, 0x10);
6369   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6370   ins_pipe(pipe_slow); // XXX
6371 %}
6372 
6373 // Load Aligned Packed Byte to XMM register
6374 instruct loadA8B(regD dst, memory mem) %{
6375   match(Set dst (Load8B mem));
6376   ins_cost(125);
6377   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6378   ins_encode( movq_ld(dst, mem));
6379   ins_pipe( pipe_slow );
6380 %}
6381 
6382 // Load Aligned Packed Short to XMM register
6383 instruct loadA4S(regD dst, memory mem) %{
6384   match(Set dst (Load4S mem));
6385   ins_cost(125);
6386   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6387   ins_encode( movq_ld(dst, mem));
6388   ins_pipe( pipe_slow );
6389 %}
6390 
6391 // Load Aligned Packed Char to XMM register
6392 instruct loadA4C(regD dst, memory mem) %{
6393   match(Set dst (Load4C mem));
6394   ins_cost(125);
6395   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6396   ins_encode( movq_ld(dst, mem));
6397   ins_pipe( pipe_slow );
6398 %}
6399 
6400 // Load Aligned Packed Integer to XMM register
6401 instruct load2IU(regD dst, memory mem) %{
6402   match(Set dst (Load2I mem));
6403   ins_cost(125);
6404   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6405   ins_encode( movq_ld(dst, mem));
6406   ins_pipe( pipe_slow );
6407 %}
6408 
6409 // Load Aligned Packed Single to XMM
6410 instruct loadA2F(regD dst, memory mem) %{
6411   match(Set dst (Load2F mem));
6412   ins_cost(145);
6413   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6414   ins_encode( movq_ld(dst, mem));
6415   ins_pipe( pipe_slow );
6416 %}
6417 
6418 // Load Effective Address
6419 instruct leaP8(rRegP dst, indOffset8 mem)
6420 %{
6421   match(Set dst mem);
6422 
6423   ins_cost(110); // XXX
6424   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6425   opcode(0x8D);
6426   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6427   ins_pipe(ialu_reg_reg_fat);
6428 %}
6429 
6430 instruct leaP32(rRegP dst, indOffset32 mem)
6431 %{
6432   match(Set dst mem);
6433 
6434   ins_cost(110);
6435   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6436   opcode(0x8D);
6437   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6438   ins_pipe(ialu_reg_reg_fat);
6439 %}
6440 
6441 // instruct leaPIdx(rRegP dst, indIndex mem)
6442 // %{
6443 //   match(Set dst mem);
6444 
6445 //   ins_cost(110);
6446 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6447 //   opcode(0x8D);
6448 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6449 //   ins_pipe(ialu_reg_reg_fat);
6450 // %}
6451 
6452 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6453 %{
6454   match(Set dst mem);
6455 
6456   ins_cost(110);
6457   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6458   opcode(0x8D);
6459   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6460   ins_pipe(ialu_reg_reg_fat);
6461 %}
6462 
6463 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6464 %{
6465   match(Set dst mem);
6466 
6467   ins_cost(110);
6468   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6469   opcode(0x8D);
6470   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6471   ins_pipe(ialu_reg_reg_fat);
6472 %}
6473 
6474 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6475 %{
6476   match(Set dst mem);
6477 
6478   ins_cost(110);
6479   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6480   opcode(0x8D);
6481   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6482   ins_pipe(ialu_reg_reg_fat);
6483 %}
6484 
6485 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6486 %{
6487   match(Set dst mem);
6488 
6489   ins_cost(110);
6490   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6491   opcode(0x8D);
6492   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6493   ins_pipe(ialu_reg_reg_fat);
6494 %}
6495 
6496 // Load Effective Address which uses Narrow (32-bits) oop
6497 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6498 %{
6499   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6500   match(Set dst mem);
6501 
6502   ins_cost(110);
6503   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6504   opcode(0x8D);
6505   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6506   ins_pipe(ialu_reg_reg_fat);
6507 %}
6508 
6509 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6510 %{
6511   predicate(Universe::narrow_oop_shift() == 0);
6512   match(Set dst mem);
6513 
6514   ins_cost(110); // XXX
6515   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6516   opcode(0x8D);
6517   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6518   ins_pipe(ialu_reg_reg_fat);
6519 %}
6520 
6521 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6522 %{
6523   predicate(Universe::narrow_oop_shift() == 0);
6524   match(Set dst mem);
6525 
6526   ins_cost(110);
6527   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6528   opcode(0x8D);
6529   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6530   ins_pipe(ialu_reg_reg_fat);
6531 %}
6532 
6533 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6534 %{
6535   predicate(Universe::narrow_oop_shift() == 0);
6536   match(Set dst mem);
6537 
6538   ins_cost(110);
6539   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6540   opcode(0x8D);
6541   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6542   ins_pipe(ialu_reg_reg_fat);
6543 %}
6544 
6545 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6546 %{
6547   predicate(Universe::narrow_oop_shift() == 0);
6548   match(Set dst mem);
6549 
6550   ins_cost(110);
6551   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6552   opcode(0x8D);
6553   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6554   ins_pipe(ialu_reg_reg_fat);
6555 %}
6556 
6557 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6558 %{
6559   predicate(Universe::narrow_oop_shift() == 0);
6560   match(Set dst mem);
6561 
6562   ins_cost(110);
6563   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6564   opcode(0x8D);
6565   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6566   ins_pipe(ialu_reg_reg_fat);
6567 %}
6568 
6569 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6570 %{
6571   predicate(Universe::narrow_oop_shift() == 0);
6572   match(Set dst mem);
6573 
6574   ins_cost(110);
6575   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6576   opcode(0x8D);
6577   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6578   ins_pipe(ialu_reg_reg_fat);
6579 %}
6580 
6581 instruct loadConI(rRegI dst, immI src)
6582 %{
6583   match(Set dst src);
6584 
6585   format %{ "movl    $dst, $src\t# int" %}
6586   ins_encode(load_immI(dst, src));
6587   ins_pipe(ialu_reg_fat); // XXX
6588 %}
6589 
6590 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6591 %{
6592   match(Set dst src);
6593   effect(KILL cr);
6594 
6595   ins_cost(50);
6596   format %{ "xorl    $dst, $dst\t# int" %}
6597   opcode(0x33); /* + rd */
6598   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6599   ins_pipe(ialu_reg);
6600 %}
6601 
6602 instruct loadConL(rRegL dst, immL src)
6603 %{
6604   match(Set dst src);
6605 
6606   ins_cost(150);
6607   format %{ "movq    $dst, $src\t# long" %}
6608   ins_encode(load_immL(dst, src));
6609   ins_pipe(ialu_reg);
6610 %}
6611 
6612 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6613 %{
6614   match(Set dst src);
6615   effect(KILL cr);
6616 
6617   ins_cost(50);
6618   format %{ "xorl    $dst, $dst\t# long" %}
6619   opcode(0x33); /* + rd */
6620   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6621   ins_pipe(ialu_reg); // XXX
6622 %}
6623 
6624 instruct loadConUL32(rRegL dst, immUL32 src)
6625 %{
6626   match(Set dst src);
6627 
6628   ins_cost(60);
6629   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6630   ins_encode(load_immUL32(dst, src));
6631   ins_pipe(ialu_reg);
6632 %}
6633 
6634 instruct loadConL32(rRegL dst, immL32 src)
6635 %{
6636   match(Set dst src);
6637 
6638   ins_cost(70);
6639   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6640   ins_encode(load_immL32(dst, src));
6641   ins_pipe(ialu_reg);
6642 %}
6643 
6644 instruct loadConP(rRegP dst, immP src)
6645 %{
6646   match(Set dst src);
6647 
6648   format %{ "movq    $dst, $src\t# ptr" %}
6649   ins_encode(load_immP(dst, src));
6650   ins_pipe(ialu_reg_fat); // XXX
6651 %}
6652 
6653 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6654 %{
6655   match(Set dst src);
6656   effect(KILL cr);
6657 
6658   ins_cost(50);
6659   format %{ "xorl    $dst, $dst\t# ptr" %}
6660   opcode(0x33); /* + rd */
6661   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6662   ins_pipe(ialu_reg);
6663 %}
6664 
6665 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6666 %{
6667   match(Set dst src);
6668   effect(KILL cr);
6669 
6670   ins_cost(60);
6671   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6672   ins_encode(load_immP31(dst, src));
6673   ins_pipe(ialu_reg);
6674 %}
6675 
6676 instruct loadConF(regF dst, immF src)
6677 %{
6678   match(Set dst src);
6679   ins_cost(125);
6680 
6681   format %{ "movss   $dst, [$src]" %}
6682   ins_encode(load_conF(dst, src));
6683   ins_pipe(pipe_slow);
6684 %}
6685 
6686 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6687   match(Set dst src);
6688   effect(KILL cr);
6689   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6690   ins_encode %{
6691     __ xorq($dst$$Register, $dst$$Register);
6692   %}
6693   ins_pipe(ialu_reg);
6694 %}
6695 
6696 instruct loadConN(rRegN dst, immN src) %{
6697   match(Set dst src);
6698 
6699   ins_cost(125);
6700   format %{ "movl    $dst, $src\t# compressed ptr" %}
6701   ins_encode %{
6702     address con = (address)$src$$constant;
6703     if (con == NULL) {
6704       ShouldNotReachHere();
6705     } else {
6706       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6707     }
6708   %}
6709   ins_pipe(ialu_reg_fat); // XXX
6710 %}
6711 
6712 instruct loadConF0(regF dst, immF0 src)
6713 %{
6714   match(Set dst src);
6715   ins_cost(100);
6716 
6717   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6718   opcode(0x0F, 0x57);
6719   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6720   ins_pipe(pipe_slow);
6721 %}
6722 
6723 // Use the same format since predicate() can not be used here.
6724 instruct loadConD(regD dst, immD src)
6725 %{
6726   match(Set dst src);
6727   ins_cost(125);
6728 
6729   format %{ "movsd   $dst, [$src]" %}
6730   ins_encode(load_conD(dst, src));
6731   ins_pipe(pipe_slow);
6732 %}
6733 
6734 instruct loadConD0(regD dst, immD0 src)
6735 %{
6736   match(Set dst src);
6737   ins_cost(100);
6738 
6739   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6740   opcode(0x66, 0x0F, 0x57);
6741   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6742   ins_pipe(pipe_slow);
6743 %}
6744 
6745 instruct loadSSI(rRegI dst, stackSlotI src)
6746 %{
6747   match(Set dst src);
6748 
6749   ins_cost(125);
6750   format %{ "movl    $dst, $src\t# int stk" %}
6751   opcode(0x8B);
6752   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6753   ins_pipe(ialu_reg_mem);
6754 %}
6755 
6756 instruct loadSSL(rRegL dst, stackSlotL src)
6757 %{
6758   match(Set dst src);
6759 
6760   ins_cost(125);
6761   format %{ "movq    $dst, $src\t# long stk" %}
6762   opcode(0x8B);
6763   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6764   ins_pipe(ialu_reg_mem);
6765 %}
6766 
6767 instruct loadSSP(rRegP dst, stackSlotP src)
6768 %{
6769   match(Set dst src);
6770 
6771   ins_cost(125);
6772   format %{ "movq    $dst, $src\t# ptr stk" %}
6773   opcode(0x8B);
6774   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6775   ins_pipe(ialu_reg_mem);
6776 %}
6777 
6778 instruct loadSSF(regF dst, stackSlotF src)
6779 %{
6780   match(Set dst src);
6781 
6782   ins_cost(125);
6783   format %{ "movss   $dst, $src\t# float stk" %}
6784   opcode(0xF3, 0x0F, 0x10);
6785   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6786   ins_pipe(pipe_slow); // XXX
6787 %}
6788 
6789 // Use the same format since predicate() can not be used here.
6790 instruct loadSSD(regD dst, stackSlotD src)
6791 %{
6792   match(Set dst src);
6793 
6794   ins_cost(125);
6795   format %{ "movsd   $dst, $src\t# double stk" %}
6796   ins_encode  %{
6797     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6798   %}
6799   ins_pipe(pipe_slow); // XXX
6800 %}
6801 
6802 // Prefetch instructions.
6803 // Must be safe to execute with invalid address (cannot fault).
6804 
6805 instruct prefetchr( memory mem ) %{
6806   predicate(ReadPrefetchInstr==3);
6807   match(PrefetchRead mem);
6808   ins_cost(125);
6809 
6810   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6811   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6812   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6813   ins_pipe(ialu_mem);
6814 %}
6815 
6816 instruct prefetchrNTA( memory mem ) %{
6817   predicate(ReadPrefetchInstr==0);
6818   match(PrefetchRead mem);
6819   ins_cost(125);
6820 
6821   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6822   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6823   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6824   ins_pipe(ialu_mem);
6825 %}
6826 
6827 instruct prefetchrT0( memory mem ) %{
6828   predicate(ReadPrefetchInstr==1);
6829   match(PrefetchRead mem);
6830   ins_cost(125);
6831 
6832   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6833   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6834   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6835   ins_pipe(ialu_mem);
6836 %}
6837 
6838 instruct prefetchrT2( memory mem ) %{
6839   predicate(ReadPrefetchInstr==2);
6840   match(PrefetchRead mem);
6841   ins_cost(125);
6842 
6843   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6844   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6845   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6846   ins_pipe(ialu_mem);
6847 %}
6848 
6849 instruct prefetchw( memory mem ) %{
6850   predicate(AllocatePrefetchInstr==3);
6851   match(PrefetchWrite mem);
6852   ins_cost(125);
6853 
6854   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6855   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6856   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6857   ins_pipe(ialu_mem);
6858 %}
6859 
6860 instruct prefetchwNTA( memory mem ) %{
6861   predicate(AllocatePrefetchInstr==0);
6862   match(PrefetchWrite mem);
6863   ins_cost(125);
6864 
6865   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6866   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6867   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6868   ins_pipe(ialu_mem);
6869 %}
6870 
6871 instruct prefetchwT0( memory mem ) %{
6872   predicate(AllocatePrefetchInstr==1);
6873   match(PrefetchWrite mem);
6874   ins_cost(125);
6875 
6876   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6877   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6878   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6879   ins_pipe(ialu_mem);
6880 %}
6881 
6882 instruct prefetchwT2( memory mem ) %{
6883   predicate(AllocatePrefetchInstr==2);
6884   match(PrefetchWrite mem);
6885   ins_cost(125);
6886 
6887   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6888   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6889   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6890   ins_pipe(ialu_mem);
6891 %}
6892 
6893 //----------Store Instructions-------------------------------------------------
6894 
6895 // Store Byte
6896 instruct storeB(memory mem, rRegI src)
6897 %{
6898   match(Set mem (StoreB mem src));
6899 
6900   ins_cost(125); // XXX
6901   format %{ "movb    $mem, $src\t# byte" %}
6902   opcode(0x88);
6903   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6904   ins_pipe(ialu_mem_reg);
6905 %}
6906 
6907 // Store Char/Short
6908 instruct storeC(memory mem, rRegI src)
6909 %{
6910   match(Set mem (StoreC mem src));
6911 
6912   ins_cost(125); // XXX
6913   format %{ "movw    $mem, $src\t# char/short" %}
6914   opcode(0x89);
6915   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6916   ins_pipe(ialu_mem_reg);
6917 %}
6918 
6919 // Store Integer
6920 instruct storeI(memory mem, rRegI src)
6921 %{
6922   match(Set mem (StoreI mem src));
6923 
6924   ins_cost(125); // XXX
6925   format %{ "movl    $mem, $src\t# int" %}
6926   opcode(0x89);
6927   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6928   ins_pipe(ialu_mem_reg);
6929 %}
6930 
6931 // Store Long
6932 instruct storeL(memory mem, rRegL src)
6933 %{
6934   match(Set mem (StoreL mem src));
6935 
6936   ins_cost(125); // XXX
6937   format %{ "movq    $mem, $src\t# long" %}
6938   opcode(0x89);
6939   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6940   ins_pipe(ialu_mem_reg); // XXX
6941 %}
6942 
6943 // Store Pointer
6944 instruct storeP(memory mem, any_RegP src)
6945 %{
6946   match(Set mem (StoreP mem src));
6947 
6948   ins_cost(125); // XXX
6949   format %{ "movq    $mem, $src\t# ptr" %}
6950   opcode(0x89);
6951   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6952   ins_pipe(ialu_mem_reg);
6953 %}
6954 
6955 instruct storeImmP0(memory mem, immP0 zero)
6956 %{
6957   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6958   match(Set mem (StoreP mem zero));
6959 
6960   ins_cost(125); // XXX
6961   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6962   ins_encode %{
6963     __ movq($mem$$Address, r12);
6964   %}
6965   ins_pipe(ialu_mem_reg);
6966 %}
6967 
6968 // Store NULL Pointer, mark word, or other simple pointer constant.
6969 instruct storeImmP(memory mem, immP31 src)
6970 %{
6971   match(Set mem (StoreP mem src));
6972 
6973   ins_cost(150); // XXX
6974   format %{ "movq    $mem, $src\t# ptr" %}
6975   opcode(0xC7); /* C7 /0 */
6976   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6977   ins_pipe(ialu_mem_imm);
6978 %}
6979 
6980 // Store Compressed Pointer
6981 instruct storeN(memory mem, rRegN src)
6982 %{
6983   match(Set mem (StoreN mem src));
6984 
6985   ins_cost(125); // XXX
6986   format %{ "movl    $mem, $src\t# compressed ptr" %}
6987   ins_encode %{
6988     __ movl($mem$$Address, $src$$Register);
6989   %}
6990   ins_pipe(ialu_mem_reg);
6991 %}
6992 
6993 instruct storeImmN0(memory mem, immN0 zero)
6994 %{
6995   predicate(Universe::narrow_oop_base() == NULL);
6996   match(Set mem (StoreN mem zero));
6997 
6998   ins_cost(125); // XXX
6999   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
7000   ins_encode %{
7001     __ movl($mem$$Address, r12);
7002   %}
7003   ins_pipe(ialu_mem_reg);
7004 %}
7005 
7006 instruct storeImmN(memory mem, immN src)
7007 %{
7008   match(Set mem (StoreN mem src));
7009 
7010   ins_cost(150); // XXX
7011   format %{ "movl    $mem, $src\t# compressed ptr" %}
7012   ins_encode %{
7013     address con = (address)$src$$constant;
7014     if (con == NULL) {
7015       __ movl($mem$$Address, (int32_t)0);
7016     } else {
7017       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
7018     }
7019   %}
7020   ins_pipe(ialu_mem_imm);
7021 %}
7022 
7023 // Store Integer Immediate
7024 instruct storeImmI0(memory mem, immI0 zero)
7025 %{
7026   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7027   match(Set mem (StoreI mem zero));
7028 
7029   ins_cost(125); // XXX
7030   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
7031   ins_encode %{
7032     __ movl($mem$$Address, r12);
7033   %}
7034   ins_pipe(ialu_mem_reg);
7035 %}
7036 
7037 instruct storeImmI(memory mem, immI src)
7038 %{
7039   match(Set mem (StoreI mem src));
7040 
7041   ins_cost(150);
7042   format %{ "movl    $mem, $src\t# int" %}
7043   opcode(0xC7); /* C7 /0 */
7044   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7045   ins_pipe(ialu_mem_imm);
7046 %}
7047 
7048 // Store Long Immediate
7049 instruct storeImmL0(memory mem, immL0 zero)
7050 %{
7051   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7052   match(Set mem (StoreL mem zero));
7053 
7054   ins_cost(125); // XXX
7055   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
7056   ins_encode %{
7057     __ movq($mem$$Address, r12);
7058   %}
7059   ins_pipe(ialu_mem_reg);
7060 %}
7061 
7062 instruct storeImmL(memory mem, immL32 src)
7063 %{
7064   match(Set mem (StoreL mem src));
7065 
7066   ins_cost(150);
7067   format %{ "movq    $mem, $src\t# long" %}
7068   opcode(0xC7); /* C7 /0 */
7069   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
7070   ins_pipe(ialu_mem_imm);
7071 %}
7072 
7073 // Store Short/Char Immediate
7074 instruct storeImmC0(memory mem, immI0 zero)
7075 %{
7076   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7077   match(Set mem (StoreC mem zero));
7078 
7079   ins_cost(125); // XXX
7080   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
7081   ins_encode %{
7082     __ movw($mem$$Address, r12);
7083   %}
7084   ins_pipe(ialu_mem_reg);
7085 %}
7086 
7087 instruct storeImmI16(memory mem, immI16 src)
7088 %{
7089   predicate(UseStoreImmI16);
7090   match(Set mem (StoreC mem src));
7091 
7092   ins_cost(150);
7093   format %{ "movw    $mem, $src\t# short/char" %}
7094   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7095   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7096   ins_pipe(ialu_mem_imm);
7097 %}
7098 
7099 // Store Byte Immediate
7100 instruct storeImmB0(memory mem, immI0 zero)
7101 %{
7102   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7103   match(Set mem (StoreB mem zero));
7104 
7105   ins_cost(125); // XXX
7106   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7107   ins_encode %{
7108     __ movb($mem$$Address, r12);
7109   %}
7110   ins_pipe(ialu_mem_reg);
7111 %}
7112 
7113 instruct storeImmB(memory mem, immI8 src)
7114 %{
7115   match(Set mem (StoreB mem src));
7116 
7117   ins_cost(150); // XXX
7118   format %{ "movb    $mem, $src\t# byte" %}
7119   opcode(0xC6); /* C6 /0 */
7120   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7121   ins_pipe(ialu_mem_imm);
7122 %}
7123 
7124 // Store Aligned Packed Byte XMM register to memory
7125 instruct storeA8B(memory mem, regD src) %{
7126   match(Set mem (Store8B mem src));
7127   ins_cost(145);
7128   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7129   ins_encode( movq_st(mem, src));
7130   ins_pipe( pipe_slow );
7131 %}
7132 
7133 // Store Aligned Packed Char/Short XMM register to memory
7134 instruct storeA4C(memory mem, regD src) %{
7135   match(Set mem (Store4C mem src));
7136   ins_cost(145);
7137   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7138   ins_encode( movq_st(mem, src));
7139   ins_pipe( pipe_slow );
7140 %}
7141 
7142 // Store Aligned Packed Integer XMM register to memory
7143 instruct storeA2I(memory mem, regD src) %{
7144   match(Set mem (Store2I mem src));
7145   ins_cost(145);
7146   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7147   ins_encode( movq_st(mem, src));
7148   ins_pipe( pipe_slow );
7149 %}
7150 
7151 // Store CMS card-mark Immediate
7152 instruct storeImmCM0_reg(memory mem, immI0 zero)
7153 %{
7154   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7155   match(Set mem (StoreCM mem zero));
7156 
7157   ins_cost(125); // XXX
7158   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7159   ins_encode %{
7160     __ movb($mem$$Address, r12);
7161   %}
7162   ins_pipe(ialu_mem_reg);
7163 %}
7164 
7165 instruct storeImmCM0(memory mem, immI0 src)
7166 %{
7167   match(Set mem (StoreCM mem src));
7168 
7169   ins_cost(150); // XXX
7170   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7171   opcode(0xC6); /* C6 /0 */
7172   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7173   ins_pipe(ialu_mem_imm);
7174 %}
7175 
7176 // Store Aligned Packed Single Float XMM register to memory
7177 instruct storeA2F(memory mem, regD src) %{
7178   match(Set mem (Store2F mem src));
7179   ins_cost(145);
7180   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7181   ins_encode( movq_st(mem, src));
7182   ins_pipe( pipe_slow );
7183 %}
7184 
7185 // Store Float
7186 instruct storeF(memory mem, regF src)
7187 %{
7188   match(Set mem (StoreF mem src));
7189 
7190   ins_cost(95); // XXX
7191   format %{ "movss   $mem, $src\t# float" %}
7192   opcode(0xF3, 0x0F, 0x11);
7193   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7194   ins_pipe(pipe_slow); // XXX
7195 %}
7196 
7197 // Store immediate Float value (it is faster than store from XMM register)
7198 instruct storeF0(memory mem, immF0 zero)
7199 %{
7200   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7201   match(Set mem (StoreF mem zero));
7202 
7203   ins_cost(25); // XXX
7204   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7205   ins_encode %{
7206     __ movl($mem$$Address, r12);
7207   %}
7208   ins_pipe(ialu_mem_reg);
7209 %}
7210 
7211 instruct storeF_imm(memory mem, immF src)
7212 %{
7213   match(Set mem (StoreF mem src));
7214 
7215   ins_cost(50);
7216   format %{ "movl    $mem, $src\t# float" %}
7217   opcode(0xC7); /* C7 /0 */
7218   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7219   ins_pipe(ialu_mem_imm);
7220 %}
7221 
7222 // Store Double
7223 instruct storeD(memory mem, regD src)
7224 %{
7225   match(Set mem (StoreD mem src));
7226 
7227   ins_cost(95); // XXX
7228   format %{ "movsd   $mem, $src\t# double" %}
7229   opcode(0xF2, 0x0F, 0x11);
7230   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7231   ins_pipe(pipe_slow); // XXX
7232 %}
7233 
7234 // Store immediate double 0.0 (it is faster than store from XMM register)
7235 instruct storeD0_imm(memory mem, immD0 src)
7236 %{
7237   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7238   match(Set mem (StoreD mem src));
7239 
7240   ins_cost(50);
7241   format %{ "movq    $mem, $src\t# double 0." %}
7242   opcode(0xC7); /* C7 /0 */
7243   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7244   ins_pipe(ialu_mem_imm);
7245 %}
7246 
7247 instruct storeD0(memory mem, immD0 zero)
7248 %{
7249   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7250   match(Set mem (StoreD mem zero));
7251 
7252   ins_cost(25); // XXX
7253   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7254   ins_encode %{
7255     __ movq($mem$$Address, r12);
7256   %}
7257   ins_pipe(ialu_mem_reg);
7258 %}
7259 
7260 instruct storeSSI(stackSlotI dst, rRegI src)
7261 %{
7262   match(Set dst src);
7263 
7264   ins_cost(100);
7265   format %{ "movl    $dst, $src\t# int stk" %}
7266   opcode(0x89);
7267   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7268   ins_pipe( ialu_mem_reg );
7269 %}
7270 
7271 instruct storeSSL(stackSlotL dst, rRegL src)
7272 %{
7273   match(Set dst src);
7274 
7275   ins_cost(100);
7276   format %{ "movq    $dst, $src\t# long stk" %}
7277   opcode(0x89);
7278   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7279   ins_pipe(ialu_mem_reg);
7280 %}
7281 
7282 instruct storeSSP(stackSlotP dst, rRegP src)
7283 %{
7284   match(Set dst src);
7285 
7286   ins_cost(100);
7287   format %{ "movq    $dst, $src\t# ptr stk" %}
7288   opcode(0x89);
7289   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7290   ins_pipe(ialu_mem_reg);
7291 %}
7292 
7293 instruct storeSSF(stackSlotF dst, regF src)
7294 %{
7295   match(Set dst src);
7296 
7297   ins_cost(95); // XXX
7298   format %{ "movss   $dst, $src\t# float stk" %}
7299   opcode(0xF3, 0x0F, 0x11);
7300   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7301   ins_pipe(pipe_slow); // XXX
7302 %}
7303 
7304 instruct storeSSD(stackSlotD dst, regD src)
7305 %{
7306   match(Set dst src);
7307 
7308   ins_cost(95); // XXX
7309   format %{ "movsd   $dst, $src\t# double stk" %}
7310   opcode(0xF2, 0x0F, 0x11);
7311   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7312   ins_pipe(pipe_slow); // XXX
7313 %}
7314 
7315 //----------BSWAP Instructions-------------------------------------------------
7316 instruct bytes_reverse_int(rRegI dst) %{
7317   match(Set dst (ReverseBytesI dst));
7318 
7319   format %{ "bswapl  $dst" %}
7320   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7321   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7322   ins_pipe( ialu_reg );
7323 %}
7324 
7325 instruct bytes_reverse_long(rRegL dst) %{
7326   match(Set dst (ReverseBytesL dst));
7327 
7328   format %{ "bswapq  $dst" %}
7329 
7330   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7331   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7332   ins_pipe( ialu_reg);
7333 %}
7334 
7335 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7336   match(Set dst (ReverseBytesUS dst));
7337 
7338   format %{ "bswapl  $dst\n\t" 
7339             "shrl    $dst,16\n\t" %}
7340   ins_encode %{
7341     __ bswapl($dst$$Register);
7342     __ shrl($dst$$Register, 16); 
7343   %}
7344   ins_pipe( ialu_reg );
7345 %}
7346 
7347 instruct bytes_reverse_short(rRegI dst) %{
7348   match(Set dst (ReverseBytesS dst));
7349 
7350   format %{ "bswapl  $dst\n\t" 
7351             "sar     $dst,16\n\t" %}
7352   ins_encode %{
7353     __ bswapl($dst$$Register);
7354     __ sarl($dst$$Register, 16); 
7355   %}
7356   ins_pipe( ialu_reg );
7357 %}
7358 
7359 //---------- Zeros Count Instructions ------------------------------------------
7360 
7361 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7362   predicate(UseCountLeadingZerosInstruction);
7363   match(Set dst (CountLeadingZerosI src));
7364   effect(KILL cr);
7365 
7366   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7367   ins_encode %{
7368     __ lzcntl($dst$$Register, $src$$Register);
7369   %}
7370   ins_pipe(ialu_reg);
7371 %}
7372 
7373 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7374   predicate(!UseCountLeadingZerosInstruction);
7375   match(Set dst (CountLeadingZerosI src));
7376   effect(KILL cr);
7377 
7378   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7379             "jnz     skip\n\t"
7380             "movl    $dst, -1\n"
7381       "skip:\n\t"
7382             "negl    $dst\n\t"
7383             "addl    $dst, 31" %}
7384   ins_encode %{
7385     Register Rdst = $dst$$Register;
7386     Register Rsrc = $src$$Register;
7387     Label skip;
7388     __ bsrl(Rdst, Rsrc);
7389     __ jccb(Assembler::notZero, skip);
7390     __ movl(Rdst, -1);
7391     __ bind(skip);
7392     __ negl(Rdst);
7393     __ addl(Rdst, BitsPerInt - 1);
7394   %}
7395   ins_pipe(ialu_reg);
7396 %}
7397 
7398 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7399   predicate(UseCountLeadingZerosInstruction);
7400   match(Set dst (CountLeadingZerosL src));
7401   effect(KILL cr);
7402 
7403   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7404   ins_encode %{
7405     __ lzcntq($dst$$Register, $src$$Register);
7406   %}
7407   ins_pipe(ialu_reg);
7408 %}
7409 
7410 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7411   predicate(!UseCountLeadingZerosInstruction);
7412   match(Set dst (CountLeadingZerosL src));
7413   effect(KILL cr);
7414 
7415   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7416             "jnz     skip\n\t"
7417             "movl    $dst, -1\n"
7418       "skip:\n\t"
7419             "negl    $dst\n\t"
7420             "addl    $dst, 63" %}
7421   ins_encode %{
7422     Register Rdst = $dst$$Register;
7423     Register Rsrc = $src$$Register;
7424     Label skip;
7425     __ bsrq(Rdst, Rsrc);
7426     __ jccb(Assembler::notZero, skip);
7427     __ movl(Rdst, -1);
7428     __ bind(skip);
7429     __ negl(Rdst);
7430     __ addl(Rdst, BitsPerLong - 1);
7431   %}
7432   ins_pipe(ialu_reg);
7433 %}
7434 
7435 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7436   match(Set dst (CountTrailingZerosI src));
7437   effect(KILL cr);
7438 
7439   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7440             "jnz     done\n\t"
7441             "movl    $dst, 32\n"
7442       "done:" %}
7443   ins_encode %{
7444     Register Rdst = $dst$$Register;
7445     Label done;
7446     __ bsfl(Rdst, $src$$Register);
7447     __ jccb(Assembler::notZero, done);
7448     __ movl(Rdst, BitsPerInt);
7449     __ bind(done);
7450   %}
7451   ins_pipe(ialu_reg);
7452 %}
7453 
7454 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7455   match(Set dst (CountTrailingZerosL src));
7456   effect(KILL cr);
7457 
7458   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7459             "jnz     done\n\t"
7460             "movl    $dst, 64\n"
7461       "done:" %}
7462   ins_encode %{
7463     Register Rdst = $dst$$Register;
7464     Label done;
7465     __ bsfq(Rdst, $src$$Register);
7466     __ jccb(Assembler::notZero, done);
7467     __ movl(Rdst, BitsPerLong);
7468     __ bind(done);
7469   %}
7470   ins_pipe(ialu_reg);
7471 %}
7472 
7473 
7474 //---------- Population Count Instructions -------------------------------------
7475 
7476 instruct popCountI(rRegI dst, rRegI src) %{
7477   predicate(UsePopCountInstruction);
7478   match(Set dst (PopCountI src));
7479 
7480   format %{ "popcnt  $dst, $src" %}
7481   ins_encode %{
7482     __ popcntl($dst$$Register, $src$$Register);
7483   %}
7484   ins_pipe(ialu_reg);
7485 %}
7486 
7487 instruct popCountI_mem(rRegI dst, memory mem) %{
7488   predicate(UsePopCountInstruction);
7489   match(Set dst (PopCountI (LoadI mem)));
7490 
7491   format %{ "popcnt  $dst, $mem" %}
7492   ins_encode %{
7493     __ popcntl($dst$$Register, $mem$$Address);
7494   %}
7495   ins_pipe(ialu_reg);
7496 %}
7497 
7498 // Note: Long.bitCount(long) returns an int.
7499 instruct popCountL(rRegI dst, rRegL src) %{
7500   predicate(UsePopCountInstruction);
7501   match(Set dst (PopCountL src));
7502 
7503   format %{ "popcnt  $dst, $src" %}
7504   ins_encode %{
7505     __ popcntq($dst$$Register, $src$$Register);
7506   %}
7507   ins_pipe(ialu_reg);
7508 %}
7509 
7510 // Note: Long.bitCount(long) returns an int.
7511 instruct popCountL_mem(rRegI dst, memory mem) %{
7512   predicate(UsePopCountInstruction);
7513   match(Set dst (PopCountL (LoadL mem)));
7514 
7515   format %{ "popcnt  $dst, $mem" %}
7516   ins_encode %{
7517     __ popcntq($dst$$Register, $mem$$Address);
7518   %}
7519   ins_pipe(ialu_reg);
7520 %}
7521 
7522 
7523 //----------MemBar Instructions-----------------------------------------------
7524 // Memory barrier flavors
7525 
7526 instruct membar_acquire()
7527 %{
7528   match(MemBarAcquire);
7529   ins_cost(0);
7530 
7531   size(0);
7532   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7533   ins_encode();
7534   ins_pipe(empty);
7535 %}
7536 
7537 instruct membar_acquire_lock()
7538 %{
7539   match(MemBarAcquire);
7540   predicate(Matcher::prior_fast_lock(n));
7541   ins_cost(0);
7542 
7543   size(0);
7544   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7545   ins_encode();
7546   ins_pipe(empty);
7547 %}
7548 
7549 instruct membar_release()
7550 %{
7551   match(MemBarRelease);
7552   ins_cost(0);
7553 
7554   size(0);
7555   format %{ "MEMBAR-release ! (empty encoding)" %}
7556   ins_encode();
7557   ins_pipe(empty);
7558 %}
7559 
7560 instruct membar_release_lock()
7561 %{
7562   match(MemBarRelease);
7563   predicate(Matcher::post_fast_unlock(n));
7564   ins_cost(0);
7565 
7566   size(0);
7567   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7568   ins_encode();
7569   ins_pipe(empty);
7570 %}
7571 
7572 instruct membar_volatile(rFlagsReg cr) %{
7573   match(MemBarVolatile);
7574   effect(KILL cr);
7575   ins_cost(400);
7576 
7577   format %{ 
7578     $$template
7579     if (os::is_MP()) {
7580       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7581     } else {
7582       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7583     }
7584   %}
7585   ins_encode %{
7586     __ membar(Assembler::StoreLoad);
7587   %}
7588   ins_pipe(pipe_slow);
7589 %}
7590 
7591 instruct unnecessary_membar_volatile()
7592 %{
7593   match(MemBarVolatile);
7594   predicate(Matcher::post_store_load_barrier(n));
7595   ins_cost(0);
7596 
7597   size(0);
7598   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7599   ins_encode();
7600   ins_pipe(empty);
7601 %}
7602 
7603 //----------Move Instructions--------------------------------------------------
7604 
7605 instruct castX2P(rRegP dst, rRegL src)
7606 %{
7607   match(Set dst (CastX2P src));
7608 
7609   format %{ "movq    $dst, $src\t# long->ptr" %}
7610   ins_encode(enc_copy_wide(dst, src));
7611   ins_pipe(ialu_reg_reg); // XXX
7612 %}
7613 
7614 instruct castP2X(rRegL dst, rRegP src)
7615 %{
7616   match(Set dst (CastP2X src));
7617 
7618   format %{ "movq    $dst, $src\t# ptr -> long" %}
7619   ins_encode(enc_copy_wide(dst, src));
7620   ins_pipe(ialu_reg_reg); // XXX
7621 %}
7622 
7623 
7624 // Convert oop pointer into compressed form
7625 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7626   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7627   match(Set dst (EncodeP src));
7628   effect(KILL cr);
7629   format %{ "encode_heap_oop $dst,$src" %}
7630   ins_encode %{
7631     Register s = $src$$Register;
7632     Register d = $dst$$Register;
7633     if (s != d) {
7634       __ movq(d, s);
7635     }
7636     __ encode_heap_oop(d);
7637   %}
7638   ins_pipe(ialu_reg_long);
7639 %}
7640 
7641 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7642   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7643   match(Set dst (EncodeP src));
7644   effect(KILL cr);
7645   format %{ "encode_heap_oop_not_null $dst,$src" %}
7646   ins_encode %{
7647     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7648   %}
7649   ins_pipe(ialu_reg_long);
7650 %}
7651 
7652 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7653   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7654             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7655   match(Set dst (DecodeN src));
7656   effect(KILL cr);
7657   format %{ "decode_heap_oop $dst,$src" %}
7658   ins_encode %{
7659     Register s = $src$$Register;
7660     Register d = $dst$$Register;
7661     if (s != d) {
7662       __ movq(d, s);
7663     }
7664     __ decode_heap_oop(d);
7665   %}
7666   ins_pipe(ialu_reg_long);
7667 %}
7668 
7669 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7670   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7671             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7672   match(Set dst (DecodeN src));
7673   effect(KILL cr);
7674   format %{ "decode_heap_oop_not_null $dst,$src" %}
7675   ins_encode %{
7676     Register s = $src$$Register;
7677     Register d = $dst$$Register;
7678     if (s != d) {
7679       __ decode_heap_oop_not_null(d, s);
7680     } else {
7681       __ decode_heap_oop_not_null(d);
7682     }
7683   %}
7684   ins_pipe(ialu_reg_long);
7685 %}
7686 
7687 
7688 //----------Conditional Move---------------------------------------------------
7689 // Jump
7690 // dummy instruction for generating temp registers
7691 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7692   match(Jump (LShiftL switch_val shift));
7693   ins_cost(350);
7694   predicate(false);
7695   effect(TEMP dest);
7696 
7697   format %{ "leaq    $dest, table_base\n\t"
7698             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7699   ins_encode(jump_enc_offset(switch_val, shift, dest));
7700   ins_pipe(pipe_jmp);
7701   ins_pc_relative(1);
7702 %}
7703 
7704 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7705   match(Jump (AddL (LShiftL switch_val shift) offset));
7706   ins_cost(350);
7707   effect(TEMP dest);
7708 
7709   format %{ "leaq    $dest, table_base\n\t"
7710             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7711   ins_encode(jump_enc_addr(switch_val, shift, offset, dest));
7712   ins_pipe(pipe_jmp);
7713   ins_pc_relative(1);
7714 %}
7715 
7716 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7717   match(Jump switch_val);
7718   ins_cost(350);
7719   effect(TEMP dest);
7720 
7721   format %{ "leaq    $dest, table_base\n\t"
7722             "jmp     [$dest + $switch_val]\n\t" %}
7723   ins_encode(jump_enc(switch_val, dest));
7724   ins_pipe(pipe_jmp);
7725   ins_pc_relative(1);
7726 %}
7727 
7728 // Conditional move
7729 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7730 %{
7731   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7732 
7733   ins_cost(200); // XXX
7734   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7735   opcode(0x0F, 0x40);
7736   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7737   ins_pipe(pipe_cmov_reg);
7738 %}
7739 
7740 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7741   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7742 
7743   ins_cost(200); // XXX
7744   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7745   opcode(0x0F, 0x40);
7746   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7747   ins_pipe(pipe_cmov_reg);
7748 %}
7749 
7750 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7751   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7752   ins_cost(200);
7753   expand %{
7754     cmovI_regU(cop, cr, dst, src);
7755   %}
7756 %}
7757 
7758 // Conditional move
7759 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7760   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7761 
7762   ins_cost(250); // XXX
7763   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7764   opcode(0x0F, 0x40);
7765   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7766   ins_pipe(pipe_cmov_mem);
7767 %}
7768 
7769 // Conditional move
7770 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7771 %{
7772   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7773 
7774   ins_cost(250); // XXX
7775   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7776   opcode(0x0F, 0x40);
7777   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7778   ins_pipe(pipe_cmov_mem);
7779 %}
7780 
7781 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7782   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7783   ins_cost(250);
7784   expand %{
7785     cmovI_memU(cop, cr, dst, src);
7786   %}
7787 %}
7788 
7789 // Conditional move
7790 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7791 %{
7792   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7793 
7794   ins_cost(200); // XXX
7795   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7796   opcode(0x0F, 0x40);
7797   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7798   ins_pipe(pipe_cmov_reg);
7799 %}
7800 
7801 // Conditional move
7802 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7803 %{
7804   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7805 
7806   ins_cost(200); // XXX
7807   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7808   opcode(0x0F, 0x40);
7809   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7810   ins_pipe(pipe_cmov_reg);
7811 %}
7812 
7813 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7814   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7815   ins_cost(200);
7816   expand %{
7817     cmovN_regU(cop, cr, dst, src);
7818   %}
7819 %}
7820 
7821 // Conditional move
7822 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7823 %{
7824   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7825 
7826   ins_cost(200); // XXX
7827   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7828   opcode(0x0F, 0x40);
7829   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7830   ins_pipe(pipe_cmov_reg);  // XXX
7831 %}
7832 
7833 // Conditional move
7834 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7835 %{
7836   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7837 
7838   ins_cost(200); // XXX
7839   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7840   opcode(0x0F, 0x40);
7841   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7842   ins_pipe(pipe_cmov_reg); // XXX
7843 %}
7844 
7845 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7846   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7847   ins_cost(200);
7848   expand %{
7849     cmovP_regU(cop, cr, dst, src);
7850   %}
7851 %}
7852 
7853 // DISABLED: Requires the ADLC to emit a bottom_type call that
7854 // correctly meets the two pointer arguments; one is an incoming
7855 // register but the other is a memory operand.  ALSO appears to
7856 // be buggy with implicit null checks.
7857 //
7858 //// Conditional move
7859 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7860 //%{
7861 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7862 //  ins_cost(250);
7863 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7864 //  opcode(0x0F,0x40);
7865 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7866 //  ins_pipe( pipe_cmov_mem );
7867 //%}
7868 //
7869 //// Conditional move
7870 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7871 //%{
7872 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7873 //  ins_cost(250);
7874 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7875 //  opcode(0x0F,0x40);
7876 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7877 //  ins_pipe( pipe_cmov_mem );
7878 //%}
7879 
7880 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7881 %{
7882   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7883 
7884   ins_cost(200); // XXX
7885   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7886   opcode(0x0F, 0x40);
7887   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7888   ins_pipe(pipe_cmov_reg);  // XXX
7889 %}
7890 
7891 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7892 %{
7893   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7894 
7895   ins_cost(200); // XXX
7896   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7897   opcode(0x0F, 0x40);
7898   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7899   ins_pipe(pipe_cmov_mem);  // XXX
7900 %}
7901 
7902 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7903 %{
7904   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7905 
7906   ins_cost(200); // XXX
7907   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7908   opcode(0x0F, 0x40);
7909   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7910   ins_pipe(pipe_cmov_reg); // XXX
7911 %}
7912 
7913 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7914   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7915   ins_cost(200);
7916   expand %{
7917     cmovL_regU(cop, cr, dst, src);
7918   %}
7919 %}
7920 
7921 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7922 %{
7923   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7924 
7925   ins_cost(200); // XXX
7926   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7927   opcode(0x0F, 0x40);
7928   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7929   ins_pipe(pipe_cmov_mem); // XXX
7930 %}
7931 
7932 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7933   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7934   ins_cost(200);
7935   expand %{
7936     cmovL_memU(cop, cr, dst, src);
7937   %}
7938 %}
7939 
7940 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7941 %{
7942   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7943 
7944   ins_cost(200); // XXX
7945   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7946             "movss     $dst, $src\n"
7947     "skip:" %}
7948   ins_encode(enc_cmovf_branch(cop, dst, src));
7949   ins_pipe(pipe_slow);
7950 %}
7951 
7952 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7953 // %{
7954 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7955 
7956 //   ins_cost(200); // XXX
7957 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7958 //             "movss     $dst, $src\n"
7959 //     "skip:" %}
7960 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7961 //   ins_pipe(pipe_slow);
7962 // %}
7963 
7964 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7965 %{
7966   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7967 
7968   ins_cost(200); // XXX
7969   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7970             "movss     $dst, $src\n"
7971     "skip:" %}
7972   ins_encode(enc_cmovf_branch(cop, dst, src));
7973   ins_pipe(pipe_slow);
7974 %}
7975 
7976 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7977   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7978   ins_cost(200);
7979   expand %{
7980     cmovF_regU(cop, cr, dst, src);
7981   %}
7982 %}
7983 
7984 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7985 %{
7986   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7987 
7988   ins_cost(200); // XXX
7989   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7990             "movsd     $dst, $src\n"
7991     "skip:" %}
7992   ins_encode(enc_cmovd_branch(cop, dst, src));
7993   ins_pipe(pipe_slow);
7994 %}
7995 
7996 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7997 %{
7998   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7999 
8000   ins_cost(200); // XXX
8001   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
8002             "movsd     $dst, $src\n"
8003     "skip:" %}
8004   ins_encode(enc_cmovd_branch(cop, dst, src));
8005   ins_pipe(pipe_slow);
8006 %}
8007 
8008 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
8009   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8010   ins_cost(200);
8011   expand %{
8012     cmovD_regU(cop, cr, dst, src);
8013   %}
8014 %}
8015 
8016 //----------Arithmetic Instructions--------------------------------------------
8017 //----------Addition Instructions----------------------------------------------
8018 
8019 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8020 %{
8021   match(Set dst (AddI dst src));
8022   effect(KILL cr);
8023 
8024   format %{ "addl    $dst, $src\t# int" %}
8025   opcode(0x03);
8026   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8027   ins_pipe(ialu_reg_reg);
8028 %}
8029 
8030 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8031 %{
8032   match(Set dst (AddI dst src));
8033   effect(KILL cr);
8034 
8035   format %{ "addl    $dst, $src\t# int" %}
8036   opcode(0x81, 0x00); /* /0 id */
8037   ins_encode(OpcSErm(dst, src), Con8or32(src));
8038   ins_pipe( ialu_reg );
8039 %}
8040 
8041 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8042 %{
8043   match(Set dst (AddI dst (LoadI src)));
8044   effect(KILL cr);
8045 
8046   ins_cost(125); // XXX
8047   format %{ "addl    $dst, $src\t# int" %}
8048   opcode(0x03);
8049   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8050   ins_pipe(ialu_reg_mem);
8051 %}
8052 
8053 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8054 %{
8055   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8056   effect(KILL cr);
8057 
8058   ins_cost(150); // XXX
8059   format %{ "addl    $dst, $src\t# int" %}
8060   opcode(0x01); /* Opcode 01 /r */
8061   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8062   ins_pipe(ialu_mem_reg);
8063 %}
8064 
8065 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8066 %{
8067   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8068   effect(KILL cr);
8069 
8070   ins_cost(125); // XXX
8071   format %{ "addl    $dst, $src\t# int" %}
8072   opcode(0x81); /* Opcode 81 /0 id */
8073   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8074   ins_pipe(ialu_mem_imm);
8075 %}
8076 
8077 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8078 %{
8079   predicate(UseIncDec);
8080   match(Set dst (AddI dst src));
8081   effect(KILL cr);
8082 
8083   format %{ "incl    $dst\t# int" %}
8084   opcode(0xFF, 0x00); // FF /0
8085   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8086   ins_pipe(ialu_reg);
8087 %}
8088 
8089 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8090 %{
8091   predicate(UseIncDec);
8092   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8093   effect(KILL cr);
8094 
8095   ins_cost(125); // XXX
8096   format %{ "incl    $dst\t# int" %}
8097   opcode(0xFF); /* Opcode FF /0 */
8098   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8099   ins_pipe(ialu_mem_imm);
8100 %}
8101 
8102 // XXX why does that use AddI
8103 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8104 %{
8105   predicate(UseIncDec);
8106   match(Set dst (AddI dst src));
8107   effect(KILL cr);
8108 
8109   format %{ "decl    $dst\t# int" %}
8110   opcode(0xFF, 0x01); // FF /1
8111   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8112   ins_pipe(ialu_reg);
8113 %}
8114 
8115 // XXX why does that use AddI
8116 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8117 %{
8118   predicate(UseIncDec);
8119   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8120   effect(KILL cr);
8121 
8122   ins_cost(125); // XXX
8123   format %{ "decl    $dst\t# int" %}
8124   opcode(0xFF); /* Opcode FF /1 */
8125   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8126   ins_pipe(ialu_mem_imm);
8127 %}
8128 
8129 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8130 %{
8131   match(Set dst (AddI src0 src1));
8132 
8133   ins_cost(110);
8134   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8135   opcode(0x8D); /* 0x8D /r */
8136   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8137   ins_pipe(ialu_reg_reg);
8138 %}
8139 
8140 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8141 %{
8142   match(Set dst (AddL dst src));
8143   effect(KILL cr);
8144 
8145   format %{ "addq    $dst, $src\t# long" %}
8146   opcode(0x03);
8147   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8148   ins_pipe(ialu_reg_reg);
8149 %}
8150 
8151 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8152 %{
8153   match(Set dst (AddL dst src));
8154   effect(KILL cr);
8155 
8156   format %{ "addq    $dst, $src\t# long" %}
8157   opcode(0x81, 0x00); /* /0 id */
8158   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8159   ins_pipe( ialu_reg );
8160 %}
8161 
8162 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8163 %{
8164   match(Set dst (AddL dst (LoadL src)));
8165   effect(KILL cr);
8166 
8167   ins_cost(125); // XXX
8168   format %{ "addq    $dst, $src\t# long" %}
8169   opcode(0x03);
8170   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8171   ins_pipe(ialu_reg_mem);
8172 %}
8173 
8174 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8175 %{
8176   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8177   effect(KILL cr);
8178 
8179   ins_cost(150); // XXX
8180   format %{ "addq    $dst, $src\t# long" %}
8181   opcode(0x01); /* Opcode 01 /r */
8182   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8183   ins_pipe(ialu_mem_reg);
8184 %}
8185 
8186 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8187 %{
8188   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8189   effect(KILL cr);
8190 
8191   ins_cost(125); // XXX
8192   format %{ "addq    $dst, $src\t# long" %}
8193   opcode(0x81); /* Opcode 81 /0 id */
8194   ins_encode(REX_mem_wide(dst),
8195              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8196   ins_pipe(ialu_mem_imm);
8197 %}
8198 
8199 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8200 %{
8201   predicate(UseIncDec);
8202   match(Set dst (AddL dst src));
8203   effect(KILL cr);
8204 
8205   format %{ "incq    $dst\t# long" %}
8206   opcode(0xFF, 0x00); // FF /0
8207   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8208   ins_pipe(ialu_reg);
8209 %}
8210 
8211 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8212 %{
8213   predicate(UseIncDec);
8214   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8215   effect(KILL cr);
8216 
8217   ins_cost(125); // XXX
8218   format %{ "incq    $dst\t# long" %}
8219   opcode(0xFF); /* Opcode FF /0 */
8220   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8221   ins_pipe(ialu_mem_imm);
8222 %}
8223 
8224 // XXX why does that use AddL
8225 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8226 %{
8227   predicate(UseIncDec);
8228   match(Set dst (AddL dst src));
8229   effect(KILL cr);
8230 
8231   format %{ "decq    $dst\t# long" %}
8232   opcode(0xFF, 0x01); // FF /1
8233   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8234   ins_pipe(ialu_reg);
8235 %}
8236 
8237 // XXX why does that use AddL
8238 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8239 %{
8240   predicate(UseIncDec);
8241   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8242   effect(KILL cr);
8243 
8244   ins_cost(125); // XXX
8245   format %{ "decq    $dst\t# long" %}
8246   opcode(0xFF); /* Opcode FF /1 */
8247   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8248   ins_pipe(ialu_mem_imm);
8249 %}
8250 
8251 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8252 %{
8253   match(Set dst (AddL src0 src1));
8254 
8255   ins_cost(110);
8256   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8257   opcode(0x8D); /* 0x8D /r */
8258   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8259   ins_pipe(ialu_reg_reg);
8260 %}
8261 
8262 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8263 %{
8264   match(Set dst (AddP dst src));
8265   effect(KILL cr);
8266 
8267   format %{ "addq    $dst, $src\t# ptr" %}
8268   opcode(0x03);
8269   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8270   ins_pipe(ialu_reg_reg);
8271 %}
8272 
8273 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8274 %{
8275   match(Set dst (AddP dst src));
8276   effect(KILL cr);
8277 
8278   format %{ "addq    $dst, $src\t# ptr" %}
8279   opcode(0x81, 0x00); /* /0 id */
8280   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8281   ins_pipe( ialu_reg );
8282 %}
8283 
8284 // XXX addP mem ops ????
8285 
8286 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8287 %{
8288   match(Set dst (AddP src0 src1));
8289 
8290   ins_cost(110);
8291   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8292   opcode(0x8D); /* 0x8D /r */
8293   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8294   ins_pipe(ialu_reg_reg);
8295 %}
8296 
8297 instruct checkCastPP(rRegP dst)
8298 %{
8299   match(Set dst (CheckCastPP dst));
8300 
8301   size(0);
8302   format %{ "# checkcastPP of $dst" %}
8303   ins_encode(/* empty encoding */);
8304   ins_pipe(empty);
8305 %}
8306 
8307 instruct castPP(rRegP dst)
8308 %{
8309   match(Set dst (CastPP dst));
8310 
8311   size(0);
8312   format %{ "# castPP of $dst" %}
8313   ins_encode(/* empty encoding */);
8314   ins_pipe(empty);
8315 %}
8316 
8317 instruct castII(rRegI dst)
8318 %{
8319   match(Set dst (CastII dst));
8320 
8321   size(0);
8322   format %{ "# castII of $dst" %}
8323   ins_encode(/* empty encoding */);
8324   ins_cost(0);
8325   ins_pipe(empty);
8326 %}
8327 
8328 // LoadP-locked same as a regular LoadP when used with compare-swap
8329 instruct loadPLocked(rRegP dst, memory mem)
8330 %{
8331   match(Set dst (LoadPLocked mem));
8332 
8333   ins_cost(125); // XXX
8334   format %{ "movq    $dst, $mem\t# ptr locked" %}
8335   opcode(0x8B);
8336   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8337   ins_pipe(ialu_reg_mem); // XXX
8338 %}
8339 
8340 // LoadL-locked - same as a regular LoadL when used with compare-swap
8341 instruct loadLLocked(rRegL dst, memory mem)
8342 %{
8343   match(Set dst (LoadLLocked mem));
8344 
8345   ins_cost(125); // XXX
8346   format %{ "movq    $dst, $mem\t# long locked" %}
8347   opcode(0x8B);
8348   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8349   ins_pipe(ialu_reg_mem); // XXX
8350 %}
8351 
8352 // Conditional-store of the updated heap-top.
8353 // Used during allocation of the shared heap.
8354 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8355 
8356 instruct storePConditional(memory heap_top_ptr,
8357                            rax_RegP oldval, rRegP newval,
8358                            rFlagsReg cr)
8359 %{
8360   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8361  
8362   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8363             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8364   opcode(0x0F, 0xB1);
8365   ins_encode(lock_prefix,
8366              REX_reg_mem_wide(newval, heap_top_ptr),
8367              OpcP, OpcS,
8368              reg_mem(newval, heap_top_ptr));
8369   ins_pipe(pipe_cmpxchg);
8370 %}
8371 
8372 // Conditional-store of an int value.
8373 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8374 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8375 %{
8376   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8377   effect(KILL oldval);
8378 
8379   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8380   opcode(0x0F, 0xB1);
8381   ins_encode(lock_prefix,
8382              REX_reg_mem(newval, mem),
8383              OpcP, OpcS,
8384              reg_mem(newval, mem));
8385   ins_pipe(pipe_cmpxchg);
8386 %}
8387 
8388 // Conditional-store of a long value.
8389 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8390 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8391 %{
8392   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8393   effect(KILL oldval);
8394 
8395   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8396   opcode(0x0F, 0xB1);
8397   ins_encode(lock_prefix,
8398              REX_reg_mem_wide(newval, mem),
8399              OpcP, OpcS,
8400              reg_mem(newval, mem));
8401   ins_pipe(pipe_cmpxchg);
8402 %}
8403 
8404 
8405 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8406 instruct compareAndSwapP(rRegI res,
8407                          memory mem_ptr,
8408                          rax_RegP oldval, rRegP newval,
8409                          rFlagsReg cr)
8410 %{
8411   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8412   effect(KILL cr, KILL oldval);
8413 
8414   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8415             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8416             "sete    $res\n\t"
8417             "movzbl  $res, $res" %}
8418   opcode(0x0F, 0xB1);
8419   ins_encode(lock_prefix,
8420              REX_reg_mem_wide(newval, mem_ptr),
8421              OpcP, OpcS,
8422              reg_mem(newval, mem_ptr),
8423              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8424              REX_reg_breg(res, res), // movzbl
8425              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8426   ins_pipe( pipe_cmpxchg );
8427 %}
8428 
8429 instruct compareAndSwapL(rRegI res,
8430                          memory mem_ptr,
8431                          rax_RegL oldval, rRegL newval,
8432                          rFlagsReg cr)
8433 %{
8434   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8435   effect(KILL cr, KILL oldval);
8436 
8437   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8438             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8439             "sete    $res\n\t"
8440             "movzbl  $res, $res" %}
8441   opcode(0x0F, 0xB1);
8442   ins_encode(lock_prefix,
8443              REX_reg_mem_wide(newval, mem_ptr),
8444              OpcP, OpcS,
8445              reg_mem(newval, mem_ptr),
8446              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8447              REX_reg_breg(res, res), // movzbl
8448              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8449   ins_pipe( pipe_cmpxchg );
8450 %}
8451 
8452 instruct compareAndSwapI(rRegI res,
8453                          memory mem_ptr,
8454                          rax_RegI oldval, rRegI newval,
8455                          rFlagsReg cr)
8456 %{
8457   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8458   effect(KILL cr, KILL oldval);
8459 
8460   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8461             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8462             "sete    $res\n\t"
8463             "movzbl  $res, $res" %}
8464   opcode(0x0F, 0xB1);
8465   ins_encode(lock_prefix,
8466              REX_reg_mem(newval, mem_ptr),
8467              OpcP, OpcS,
8468              reg_mem(newval, mem_ptr),
8469              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8470              REX_reg_breg(res, res), // movzbl
8471              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8472   ins_pipe( pipe_cmpxchg );
8473 %}
8474 
8475 
8476 instruct compareAndSwapN(rRegI res,
8477                           memory mem_ptr,
8478                           rax_RegN oldval, rRegN newval,
8479                           rFlagsReg cr) %{
8480   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8481   effect(KILL cr, KILL oldval);
8482 
8483   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8484             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8485             "sete    $res\n\t"
8486             "movzbl  $res, $res" %}
8487   opcode(0x0F, 0xB1);
8488   ins_encode(lock_prefix,
8489              REX_reg_mem(newval, mem_ptr),
8490              OpcP, OpcS,
8491              reg_mem(newval, mem_ptr),
8492              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8493              REX_reg_breg(res, res), // movzbl
8494              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8495   ins_pipe( pipe_cmpxchg );
8496 %}
8497 
8498 //----------Subtraction Instructions-------------------------------------------
8499 
8500 // Integer Subtraction Instructions
8501 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8502 %{
8503   match(Set dst (SubI dst src));
8504   effect(KILL cr);
8505 
8506   format %{ "subl    $dst, $src\t# int" %}
8507   opcode(0x2B);
8508   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8509   ins_pipe(ialu_reg_reg);
8510 %}
8511 
8512 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8513 %{
8514   match(Set dst (SubI dst src));
8515   effect(KILL cr);
8516 
8517   format %{ "subl    $dst, $src\t# int" %}
8518   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8519   ins_encode(OpcSErm(dst, src), Con8or32(src));
8520   ins_pipe(ialu_reg);
8521 %}
8522 
8523 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8524 %{
8525   match(Set dst (SubI dst (LoadI src)));
8526   effect(KILL cr);
8527 
8528   ins_cost(125);
8529   format %{ "subl    $dst, $src\t# int" %}
8530   opcode(0x2B);
8531   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8532   ins_pipe(ialu_reg_mem);
8533 %}
8534 
8535 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8536 %{
8537   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8538   effect(KILL cr);
8539 
8540   ins_cost(150);
8541   format %{ "subl    $dst, $src\t# int" %}
8542   opcode(0x29); /* Opcode 29 /r */
8543   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8544   ins_pipe(ialu_mem_reg);
8545 %}
8546 
8547 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8548 %{
8549   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8550   effect(KILL cr);
8551 
8552   ins_cost(125); // XXX
8553   format %{ "subl    $dst, $src\t# int" %}
8554   opcode(0x81); /* Opcode 81 /5 id */
8555   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8556   ins_pipe(ialu_mem_imm);
8557 %}
8558 
8559 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8560 %{
8561   match(Set dst (SubL dst src));
8562   effect(KILL cr);
8563 
8564   format %{ "subq    $dst, $src\t# long" %}
8565   opcode(0x2B);
8566   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8567   ins_pipe(ialu_reg_reg);
8568 %}
8569 
8570 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8571 %{
8572   match(Set dst (SubL dst src));
8573   effect(KILL cr);
8574 
8575   format %{ "subq    $dst, $src\t# long" %}
8576   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8577   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8578   ins_pipe(ialu_reg);
8579 %}
8580 
8581 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8582 %{
8583   match(Set dst (SubL dst (LoadL src)));
8584   effect(KILL cr);
8585 
8586   ins_cost(125);
8587   format %{ "subq    $dst, $src\t# long" %}
8588   opcode(0x2B);
8589   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8590   ins_pipe(ialu_reg_mem);
8591 %}
8592 
8593 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8594 %{
8595   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8596   effect(KILL cr);
8597 
8598   ins_cost(150);
8599   format %{ "subq    $dst, $src\t# long" %}
8600   opcode(0x29); /* Opcode 29 /r */
8601   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8602   ins_pipe(ialu_mem_reg);
8603 %}
8604 
8605 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8606 %{
8607   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8608   effect(KILL cr);
8609 
8610   ins_cost(125); // XXX
8611   format %{ "subq    $dst, $src\t# long" %}
8612   opcode(0x81); /* Opcode 81 /5 id */
8613   ins_encode(REX_mem_wide(dst),
8614              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8615   ins_pipe(ialu_mem_imm);
8616 %}
8617 
8618 // Subtract from a pointer
8619 // XXX hmpf???
8620 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8621 %{
8622   match(Set dst (AddP dst (SubI zero src)));
8623   effect(KILL cr);
8624 
8625   format %{ "subq    $dst, $src\t# ptr - int" %}
8626   opcode(0x2B);
8627   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8628   ins_pipe(ialu_reg_reg);
8629 %}
8630 
8631 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8632 %{
8633   match(Set dst (SubI zero dst));
8634   effect(KILL cr);
8635 
8636   format %{ "negl    $dst\t# int" %}
8637   opcode(0xF7, 0x03);  // Opcode F7 /3
8638   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8639   ins_pipe(ialu_reg);
8640 %}
8641 
8642 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8643 %{
8644   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8645   effect(KILL cr);
8646 
8647   format %{ "negl    $dst\t# int" %}
8648   opcode(0xF7, 0x03);  // Opcode F7 /3
8649   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8650   ins_pipe(ialu_reg);
8651 %}
8652 
8653 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8654 %{
8655   match(Set dst (SubL zero dst));
8656   effect(KILL cr);
8657 
8658   format %{ "negq    $dst\t# long" %}
8659   opcode(0xF7, 0x03);  // Opcode F7 /3
8660   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8661   ins_pipe(ialu_reg);
8662 %}
8663 
8664 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8665 %{
8666   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8667   effect(KILL cr);
8668 
8669   format %{ "negq    $dst\t# long" %}
8670   opcode(0xF7, 0x03);  // Opcode F7 /3
8671   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8672   ins_pipe(ialu_reg);
8673 %}
8674 
8675 
8676 //----------Multiplication/Division Instructions-------------------------------
8677 // Integer Multiplication Instructions
8678 // Multiply Register
8679 
8680 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8681 %{
8682   match(Set dst (MulI dst src));
8683   effect(KILL cr);
8684 
8685   ins_cost(300);
8686   format %{ "imull   $dst, $src\t# int" %}
8687   opcode(0x0F, 0xAF);
8688   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8689   ins_pipe(ialu_reg_reg_alu0);
8690 %}
8691 
8692 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8693 %{
8694   match(Set dst (MulI src imm));
8695   effect(KILL cr);
8696 
8697   ins_cost(300);
8698   format %{ "imull   $dst, $src, $imm\t# int" %}
8699   opcode(0x69); /* 69 /r id */
8700   ins_encode(REX_reg_reg(dst, src),
8701              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8702   ins_pipe(ialu_reg_reg_alu0);
8703 %}
8704 
8705 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8706 %{
8707   match(Set dst (MulI dst (LoadI src)));
8708   effect(KILL cr);
8709 
8710   ins_cost(350);
8711   format %{ "imull   $dst, $src\t# int" %}
8712   opcode(0x0F, 0xAF);
8713   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8714   ins_pipe(ialu_reg_mem_alu0);
8715 %}
8716 
8717 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8718 %{
8719   match(Set dst (MulI (LoadI src) imm));
8720   effect(KILL cr);
8721 
8722   ins_cost(300);
8723   format %{ "imull   $dst, $src, $imm\t# int" %}
8724   opcode(0x69); /* 69 /r id */
8725   ins_encode(REX_reg_mem(dst, src),
8726              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8727   ins_pipe(ialu_reg_mem_alu0);
8728 %}
8729 
8730 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8731 %{
8732   match(Set dst (MulL dst src));
8733   effect(KILL cr);
8734 
8735   ins_cost(300);
8736   format %{ "imulq   $dst, $src\t# long" %}
8737   opcode(0x0F, 0xAF);
8738   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8739   ins_pipe(ialu_reg_reg_alu0);
8740 %}
8741 
8742 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8743 %{
8744   match(Set dst (MulL src imm));
8745   effect(KILL cr);
8746 
8747   ins_cost(300);
8748   format %{ "imulq   $dst, $src, $imm\t# long" %}
8749   opcode(0x69); /* 69 /r id */
8750   ins_encode(REX_reg_reg_wide(dst, src),
8751              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8752   ins_pipe(ialu_reg_reg_alu0);
8753 %}
8754 
8755 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8756 %{
8757   match(Set dst (MulL dst (LoadL src)));
8758   effect(KILL cr);
8759 
8760   ins_cost(350);
8761   format %{ "imulq   $dst, $src\t# long" %}
8762   opcode(0x0F, 0xAF);
8763   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8764   ins_pipe(ialu_reg_mem_alu0);
8765 %}
8766 
8767 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8768 %{
8769   match(Set dst (MulL (LoadL src) imm));
8770   effect(KILL cr);
8771 
8772   ins_cost(300);
8773   format %{ "imulq   $dst, $src, $imm\t# long" %}
8774   opcode(0x69); /* 69 /r id */
8775   ins_encode(REX_reg_mem_wide(dst, src),
8776              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8777   ins_pipe(ialu_reg_mem_alu0);
8778 %}
8779 
8780 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8781 %{
8782   match(Set dst (MulHiL src rax));
8783   effect(USE_KILL rax, KILL cr);
8784 
8785   ins_cost(300);
8786   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8787   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8788   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8789   ins_pipe(ialu_reg_reg_alu0);
8790 %}
8791 
8792 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8793                    rFlagsReg cr)
8794 %{
8795   match(Set rax (DivI rax div));
8796   effect(KILL rdx, KILL cr);
8797 
8798   ins_cost(30*100+10*100); // XXX
8799   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8800             "jne,s   normal\n\t"
8801             "xorl    rdx, rdx\n\t"
8802             "cmpl    $div, -1\n\t"
8803             "je,s    done\n"
8804     "normal: cdql\n\t"
8805             "idivl   $div\n"
8806     "done:"        %}
8807   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8808   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8809   ins_pipe(ialu_reg_reg_alu0);
8810 %}
8811 
8812 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8813                    rFlagsReg cr)
8814 %{
8815   match(Set rax (DivL rax div));
8816   effect(KILL rdx, KILL cr);
8817 
8818   ins_cost(30*100+10*100); // XXX
8819   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8820             "cmpq    rax, rdx\n\t"
8821             "jne,s   normal\n\t"
8822             "xorl    rdx, rdx\n\t"
8823             "cmpq    $div, -1\n\t"
8824             "je,s    done\n"
8825     "normal: cdqq\n\t"
8826             "idivq   $div\n"
8827     "done:"        %}
8828   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8829   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8830   ins_pipe(ialu_reg_reg_alu0);
8831 %}
8832 
8833 // Integer DIVMOD with Register, both quotient and mod results
8834 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8835                              rFlagsReg cr)
8836 %{
8837   match(DivModI rax div);
8838   effect(KILL cr);
8839 
8840   ins_cost(30*100+10*100); // XXX
8841   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8842             "jne,s   normal\n\t"
8843             "xorl    rdx, rdx\n\t"
8844             "cmpl    $div, -1\n\t"
8845             "je,s    done\n"
8846     "normal: cdql\n\t"
8847             "idivl   $div\n"
8848     "done:"        %}
8849   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8850   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8851   ins_pipe(pipe_slow);
8852 %}
8853 
8854 // Long DIVMOD with Register, both quotient and mod results
8855 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8856                              rFlagsReg cr)
8857 %{
8858   match(DivModL rax div);
8859   effect(KILL cr);
8860 
8861   ins_cost(30*100+10*100); // XXX
8862   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8863             "cmpq    rax, rdx\n\t"
8864             "jne,s   normal\n\t"
8865             "xorl    rdx, rdx\n\t"
8866             "cmpq    $div, -1\n\t"
8867             "je,s    done\n"
8868     "normal: cdqq\n\t"
8869             "idivq   $div\n"
8870     "done:"        %}
8871   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8872   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8873   ins_pipe(pipe_slow);
8874 %}
8875 
8876 //----------- DivL-By-Constant-Expansions--------------------------------------
8877 // DivI cases are handled by the compiler
8878 
8879 // Magic constant, reciprocal of 10
8880 instruct loadConL_0x6666666666666667(rRegL dst)
8881 %{
8882   effect(DEF dst);
8883 
8884   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8885   ins_encode(load_immL(dst, 0x6666666666666667));
8886   ins_pipe(ialu_reg);
8887 %}
8888 
8889 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8890 %{
8891   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8892 
8893   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8894   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8895   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8896   ins_pipe(ialu_reg_reg_alu0);
8897 %}
8898 
8899 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8900 %{
8901   effect(USE_DEF dst, KILL cr);
8902 
8903   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8904   opcode(0xC1, 0x7); /* C1 /7 ib */
8905   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8906   ins_pipe(ialu_reg);
8907 %}
8908 
8909 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8910 %{
8911   effect(USE_DEF dst, KILL cr);
8912 
8913   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8914   opcode(0xC1, 0x7); /* C1 /7 ib */
8915   ins_encode(reg_opc_imm_wide(dst, 0x2));
8916   ins_pipe(ialu_reg);
8917 %}
8918 
8919 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8920 %{
8921   match(Set dst (DivL src div));
8922 
8923   ins_cost((5+8)*100);
8924   expand %{
8925     rax_RegL rax;                     // Killed temp
8926     rFlagsReg cr;                     // Killed
8927     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8928     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8929     sarL_rReg_63(src, cr);            // sarq  src, 63
8930     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8931     subL_rReg(dst, src, cr);          // subl  rdx, src
8932   %}
8933 %}
8934 
8935 //-----------------------------------------------------------------------------
8936 
8937 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8938                    rFlagsReg cr)
8939 %{
8940   match(Set rdx (ModI rax div));
8941   effect(KILL rax, KILL cr);
8942 
8943   ins_cost(300); // XXX
8944   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8945             "jne,s   normal\n\t"
8946             "xorl    rdx, rdx\n\t"
8947             "cmpl    $div, -1\n\t"
8948             "je,s    done\n"
8949     "normal: cdql\n\t"
8950             "idivl   $div\n"
8951     "done:"        %}
8952   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8953   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8954   ins_pipe(ialu_reg_reg_alu0);
8955 %}
8956 
8957 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8958                    rFlagsReg cr)
8959 %{
8960   match(Set rdx (ModL rax div));
8961   effect(KILL rax, KILL cr);
8962 
8963   ins_cost(300); // XXX
8964   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8965             "cmpq    rax, rdx\n\t"
8966             "jne,s   normal\n\t"
8967             "xorl    rdx, rdx\n\t"
8968             "cmpq    $div, -1\n\t"
8969             "je,s    done\n"
8970     "normal: cdqq\n\t"
8971             "idivq   $div\n"
8972     "done:"        %}
8973   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8974   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8975   ins_pipe(ialu_reg_reg_alu0);
8976 %}
8977 
8978 // Integer Shift Instructions
8979 // Shift Left by one
8980 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8981 %{
8982   match(Set dst (LShiftI dst shift));
8983   effect(KILL cr);
8984 
8985   format %{ "sall    $dst, $shift" %}
8986   opcode(0xD1, 0x4); /* D1 /4 */
8987   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8988   ins_pipe(ialu_reg);
8989 %}
8990 
8991 // Shift Left by one
8992 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8993 %{
8994   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8995   effect(KILL cr);
8996 
8997   format %{ "sall    $dst, $shift\t" %}
8998   opcode(0xD1, 0x4); /* D1 /4 */
8999   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9000   ins_pipe(ialu_mem_imm);
9001 %}
9002 
9003 // Shift Left by 8-bit immediate
9004 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9005 %{
9006   match(Set dst (LShiftI dst shift));
9007   effect(KILL cr);
9008 
9009   format %{ "sall    $dst, $shift" %}
9010   opcode(0xC1, 0x4); /* C1 /4 ib */
9011   ins_encode(reg_opc_imm(dst, shift));
9012   ins_pipe(ialu_reg);
9013 %}
9014 
9015 // Shift Left by 8-bit immediate
9016 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9017 %{
9018   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9019   effect(KILL cr);
9020 
9021   format %{ "sall    $dst, $shift" %}
9022   opcode(0xC1, 0x4); /* C1 /4 ib */
9023   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9024   ins_pipe(ialu_mem_imm);
9025 %}
9026 
9027 // Shift Left by variable
9028 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9029 %{
9030   match(Set dst (LShiftI dst shift));
9031   effect(KILL cr);
9032 
9033   format %{ "sall    $dst, $shift" %}
9034   opcode(0xD3, 0x4); /* D3 /4 */
9035   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9036   ins_pipe(ialu_reg_reg);
9037 %}
9038 
9039 // Shift Left by variable
9040 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9041 %{
9042   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
9043   effect(KILL cr);
9044 
9045   format %{ "sall    $dst, $shift" %}
9046   opcode(0xD3, 0x4); /* D3 /4 */
9047   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9048   ins_pipe(ialu_mem_reg);
9049 %}
9050 
9051 // Arithmetic shift right by one
9052 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9053 %{
9054   match(Set dst (RShiftI dst shift));
9055   effect(KILL cr);
9056 
9057   format %{ "sarl    $dst, $shift" %}
9058   opcode(0xD1, 0x7); /* D1 /7 */
9059   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9060   ins_pipe(ialu_reg);
9061 %}
9062 
9063 // Arithmetic shift right by one
9064 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9065 %{
9066   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9067   effect(KILL cr);
9068 
9069   format %{ "sarl    $dst, $shift" %}
9070   opcode(0xD1, 0x7); /* D1 /7 */
9071   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9072   ins_pipe(ialu_mem_imm);
9073 %}
9074 
9075 // Arithmetic Shift Right by 8-bit immediate
9076 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9077 %{
9078   match(Set dst (RShiftI dst shift));
9079   effect(KILL cr);
9080 
9081   format %{ "sarl    $dst, $shift" %}
9082   opcode(0xC1, 0x7); /* C1 /7 ib */
9083   ins_encode(reg_opc_imm(dst, shift));
9084   ins_pipe(ialu_mem_imm);
9085 %}
9086 
9087 // Arithmetic Shift Right by 8-bit immediate
9088 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9089 %{
9090   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9091   effect(KILL cr);
9092 
9093   format %{ "sarl    $dst, $shift" %}
9094   opcode(0xC1, 0x7); /* C1 /7 ib */
9095   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9096   ins_pipe(ialu_mem_imm);
9097 %}
9098 
9099 // Arithmetic Shift Right by variable
9100 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9101 %{
9102   match(Set dst (RShiftI dst shift));
9103   effect(KILL cr);
9104 
9105   format %{ "sarl    $dst, $shift" %}
9106   opcode(0xD3, 0x7); /* D3 /7 */
9107   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9108   ins_pipe(ialu_reg_reg);
9109 %}
9110 
9111 // Arithmetic Shift Right by variable
9112 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9113 %{
9114   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9115   effect(KILL cr);
9116 
9117   format %{ "sarl    $dst, $shift" %}
9118   opcode(0xD3, 0x7); /* D3 /7 */
9119   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9120   ins_pipe(ialu_mem_reg);
9121 %}
9122 
9123 // Logical shift right by one
9124 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9125 %{
9126   match(Set dst (URShiftI dst shift));
9127   effect(KILL cr);
9128 
9129   format %{ "shrl    $dst, $shift" %}
9130   opcode(0xD1, 0x5); /* D1 /5 */
9131   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9132   ins_pipe(ialu_reg);
9133 %}
9134 
9135 // Logical shift right by one
9136 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9137 %{
9138   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9139   effect(KILL cr);
9140 
9141   format %{ "shrl    $dst, $shift" %}
9142   opcode(0xD1, 0x5); /* D1 /5 */
9143   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9144   ins_pipe(ialu_mem_imm);
9145 %}
9146 
9147 // Logical Shift Right by 8-bit immediate
9148 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9149 %{
9150   match(Set dst (URShiftI dst shift));
9151   effect(KILL cr);
9152 
9153   format %{ "shrl    $dst, $shift" %}
9154   opcode(0xC1, 0x5); /* C1 /5 ib */
9155   ins_encode(reg_opc_imm(dst, shift));
9156   ins_pipe(ialu_reg);
9157 %}
9158 
9159 // Logical Shift Right by 8-bit immediate
9160 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9161 %{
9162   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9163   effect(KILL cr);
9164 
9165   format %{ "shrl    $dst, $shift" %}
9166   opcode(0xC1, 0x5); /* C1 /5 ib */
9167   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9168   ins_pipe(ialu_mem_imm);
9169 %}
9170 
9171 // Logical Shift Right by variable
9172 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9173 %{
9174   match(Set dst (URShiftI dst shift));
9175   effect(KILL cr);
9176 
9177   format %{ "shrl    $dst, $shift" %}
9178   opcode(0xD3, 0x5); /* D3 /5 */
9179   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9180   ins_pipe(ialu_reg_reg);
9181 %}
9182 
9183 // Logical Shift Right by variable
9184 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9185 %{
9186   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9187   effect(KILL cr);
9188 
9189   format %{ "shrl    $dst, $shift" %}
9190   opcode(0xD3, 0x5); /* D3 /5 */
9191   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9192   ins_pipe(ialu_mem_reg);
9193 %}
9194 
9195 // Long Shift Instructions
9196 // Shift Left by one
9197 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9198 %{
9199   match(Set dst (LShiftL dst shift));
9200   effect(KILL cr);
9201 
9202   format %{ "salq    $dst, $shift" %}
9203   opcode(0xD1, 0x4); /* D1 /4 */
9204   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9205   ins_pipe(ialu_reg);
9206 %}
9207 
9208 // Shift Left by one
9209 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9210 %{
9211   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9212   effect(KILL cr);
9213 
9214   format %{ "salq    $dst, $shift" %}
9215   opcode(0xD1, 0x4); /* D1 /4 */
9216   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9217   ins_pipe(ialu_mem_imm);
9218 %}
9219 
9220 // Shift Left by 8-bit immediate
9221 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9222 %{
9223   match(Set dst (LShiftL dst shift));
9224   effect(KILL cr);
9225 
9226   format %{ "salq    $dst, $shift" %}
9227   opcode(0xC1, 0x4); /* C1 /4 ib */
9228   ins_encode(reg_opc_imm_wide(dst, shift));
9229   ins_pipe(ialu_reg);
9230 %}
9231 
9232 // Shift Left by 8-bit immediate
9233 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9234 %{
9235   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9236   effect(KILL cr);
9237 
9238   format %{ "salq    $dst, $shift" %}
9239   opcode(0xC1, 0x4); /* C1 /4 ib */
9240   ins_encode(REX_mem_wide(dst), OpcP,
9241              RM_opc_mem(secondary, dst), Con8or32(shift));
9242   ins_pipe(ialu_mem_imm);
9243 %}
9244 
9245 // Shift Left by variable
9246 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9247 %{
9248   match(Set dst (LShiftL dst shift));
9249   effect(KILL cr);
9250 
9251   format %{ "salq    $dst, $shift" %}
9252   opcode(0xD3, 0x4); /* D3 /4 */
9253   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9254   ins_pipe(ialu_reg_reg);
9255 %}
9256 
9257 // Shift Left by variable
9258 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9259 %{
9260   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9261   effect(KILL cr);
9262 
9263   format %{ "salq    $dst, $shift" %}
9264   opcode(0xD3, 0x4); /* D3 /4 */
9265   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9266   ins_pipe(ialu_mem_reg);
9267 %}
9268 
9269 // Arithmetic shift right by one
9270 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9271 %{
9272   match(Set dst (RShiftL dst shift));
9273   effect(KILL cr);
9274 
9275   format %{ "sarq    $dst, $shift" %}
9276   opcode(0xD1, 0x7); /* D1 /7 */
9277   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9278   ins_pipe(ialu_reg);
9279 %}
9280 
9281 // Arithmetic shift right by one
9282 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9283 %{
9284   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9285   effect(KILL cr);
9286 
9287   format %{ "sarq    $dst, $shift" %}
9288   opcode(0xD1, 0x7); /* D1 /7 */
9289   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9290   ins_pipe(ialu_mem_imm);
9291 %}
9292 
9293 // Arithmetic Shift Right by 8-bit immediate
9294 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9295 %{
9296   match(Set dst (RShiftL dst shift));
9297   effect(KILL cr);
9298 
9299   format %{ "sarq    $dst, $shift" %}
9300   opcode(0xC1, 0x7); /* C1 /7 ib */
9301   ins_encode(reg_opc_imm_wide(dst, shift));
9302   ins_pipe(ialu_mem_imm);
9303 %}
9304 
9305 // Arithmetic Shift Right by 8-bit immediate
9306 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9307 %{
9308   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9309   effect(KILL cr);
9310 
9311   format %{ "sarq    $dst, $shift" %}
9312   opcode(0xC1, 0x7); /* C1 /7 ib */
9313   ins_encode(REX_mem_wide(dst), OpcP,
9314              RM_opc_mem(secondary, dst), Con8or32(shift));
9315   ins_pipe(ialu_mem_imm);
9316 %}
9317 
9318 // Arithmetic Shift Right by variable
9319 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9320 %{
9321   match(Set dst (RShiftL dst shift));
9322   effect(KILL cr);
9323 
9324   format %{ "sarq    $dst, $shift" %}
9325   opcode(0xD3, 0x7); /* D3 /7 */
9326   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9327   ins_pipe(ialu_reg_reg);
9328 %}
9329 
9330 // Arithmetic Shift Right by variable
9331 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9332 %{
9333   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9334   effect(KILL cr);
9335 
9336   format %{ "sarq    $dst, $shift" %}
9337   opcode(0xD3, 0x7); /* D3 /7 */
9338   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9339   ins_pipe(ialu_mem_reg);
9340 %}
9341 
9342 // Logical shift right by one
9343 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9344 %{
9345   match(Set dst (URShiftL dst shift));
9346   effect(KILL cr);
9347 
9348   format %{ "shrq    $dst, $shift" %}
9349   opcode(0xD1, 0x5); /* D1 /5 */
9350   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9351   ins_pipe(ialu_reg);
9352 %}
9353 
9354 // Logical shift right by one
9355 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9356 %{
9357   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9358   effect(KILL cr);
9359 
9360   format %{ "shrq    $dst, $shift" %}
9361   opcode(0xD1, 0x5); /* D1 /5 */
9362   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9363   ins_pipe(ialu_mem_imm);
9364 %}
9365 
9366 // Logical Shift Right by 8-bit immediate
9367 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9368 %{
9369   match(Set dst (URShiftL dst shift));
9370   effect(KILL cr);
9371 
9372   format %{ "shrq    $dst, $shift" %}
9373   opcode(0xC1, 0x5); /* C1 /5 ib */
9374   ins_encode(reg_opc_imm_wide(dst, shift));
9375   ins_pipe(ialu_reg);
9376 %}
9377 
9378 
9379 // Logical Shift Right by 8-bit immediate
9380 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9381 %{
9382   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9383   effect(KILL cr);
9384 
9385   format %{ "shrq    $dst, $shift" %}
9386   opcode(0xC1, 0x5); /* C1 /5 ib */
9387   ins_encode(REX_mem_wide(dst), OpcP,
9388              RM_opc_mem(secondary, dst), Con8or32(shift));
9389   ins_pipe(ialu_mem_imm);
9390 %}
9391 
9392 // Logical Shift Right by variable
9393 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9394 %{
9395   match(Set dst (URShiftL dst shift));
9396   effect(KILL cr);
9397 
9398   format %{ "shrq    $dst, $shift" %}
9399   opcode(0xD3, 0x5); /* D3 /5 */
9400   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9401   ins_pipe(ialu_reg_reg);
9402 %}
9403 
9404 // Logical Shift Right by variable
9405 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9406 %{
9407   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9408   effect(KILL cr);
9409 
9410   format %{ "shrq    $dst, $shift" %}
9411   opcode(0xD3, 0x5); /* D3 /5 */
9412   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9413   ins_pipe(ialu_mem_reg);
9414 %}
9415 
9416 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9417 // This idiom is used by the compiler for the i2b bytecode.
9418 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9419 %{
9420   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9421 
9422   format %{ "movsbl  $dst, $src\t# i2b" %}
9423   opcode(0x0F, 0xBE);
9424   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9425   ins_pipe(ialu_reg_reg);
9426 %}
9427 
9428 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9429 // This idiom is used by the compiler the i2s bytecode.
9430 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9431 %{
9432   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9433 
9434   format %{ "movswl  $dst, $src\t# i2s" %}
9435   opcode(0x0F, 0xBF);
9436   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9437   ins_pipe(ialu_reg_reg);
9438 %}
9439 
9440 // ROL/ROR instructions
9441 
9442 // ROL expand
9443 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9444   effect(KILL cr, USE_DEF dst);
9445 
9446   format %{ "roll    $dst" %}
9447   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9448   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9449   ins_pipe(ialu_reg);
9450 %}
9451 
9452 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9453   effect(USE_DEF dst, USE shift, KILL cr);
9454 
9455   format %{ "roll    $dst, $shift" %}
9456   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9457   ins_encode( reg_opc_imm(dst, shift) );
9458   ins_pipe(ialu_reg);
9459 %}
9460 
9461 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9462 %{
9463   effect(USE_DEF dst, USE shift, KILL cr);
9464 
9465   format %{ "roll    $dst, $shift" %}
9466   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9467   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9468   ins_pipe(ialu_reg_reg);
9469 %}
9470 // end of ROL expand
9471 
9472 // Rotate Left by one
9473 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9474 %{
9475   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9476 
9477   expand %{
9478     rolI_rReg_imm1(dst, cr);
9479   %}
9480 %}
9481 
9482 // Rotate Left by 8-bit immediate
9483 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9484 %{
9485   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9486   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9487 
9488   expand %{
9489     rolI_rReg_imm8(dst, lshift, cr);
9490   %}
9491 %}
9492 
9493 // Rotate Left by variable
9494 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9495 %{
9496   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9497 
9498   expand %{
9499     rolI_rReg_CL(dst, shift, cr);
9500   %}
9501 %}
9502 
9503 // Rotate Left by variable
9504 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9505 %{
9506   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9507 
9508   expand %{
9509     rolI_rReg_CL(dst, shift, cr);
9510   %}
9511 %}
9512 
9513 // ROR expand
9514 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9515 %{
9516   effect(USE_DEF dst, KILL cr);
9517 
9518   format %{ "rorl    $dst" %}
9519   opcode(0xD1, 0x1); /* D1 /1 */
9520   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9521   ins_pipe(ialu_reg);
9522 %}
9523 
9524 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9525 %{
9526   effect(USE_DEF dst, USE shift, KILL cr);
9527 
9528   format %{ "rorl    $dst, $shift" %}
9529   opcode(0xC1, 0x1); /* C1 /1 ib */
9530   ins_encode(reg_opc_imm(dst, shift));
9531   ins_pipe(ialu_reg);
9532 %}
9533 
9534 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9535 %{
9536   effect(USE_DEF dst, USE shift, KILL cr);
9537 
9538   format %{ "rorl    $dst, $shift" %}
9539   opcode(0xD3, 0x1); /* D3 /1 */
9540   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9541   ins_pipe(ialu_reg_reg);
9542 %}
9543 // end of ROR expand
9544 
9545 // Rotate Right by one
9546 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9547 %{
9548   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9549 
9550   expand %{
9551     rorI_rReg_imm1(dst, cr);
9552   %}
9553 %}
9554 
9555 // Rotate Right by 8-bit immediate
9556 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9557 %{
9558   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9559   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9560 
9561   expand %{
9562     rorI_rReg_imm8(dst, rshift, cr);
9563   %}
9564 %}
9565 
9566 // Rotate Right by variable
9567 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9568 %{
9569   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9570 
9571   expand %{
9572     rorI_rReg_CL(dst, shift, cr);
9573   %}
9574 %}
9575 
9576 // Rotate Right by variable
9577 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9578 %{
9579   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9580 
9581   expand %{
9582     rorI_rReg_CL(dst, shift, cr);
9583   %}
9584 %}
9585 
9586 // for long rotate
9587 // ROL expand
9588 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9589   effect(USE_DEF dst, KILL cr);
9590 
9591   format %{ "rolq    $dst" %}
9592   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9593   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9594   ins_pipe(ialu_reg);
9595 %}
9596 
9597 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9598   effect(USE_DEF dst, USE shift, KILL cr);
9599 
9600   format %{ "rolq    $dst, $shift" %}
9601   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9602   ins_encode( reg_opc_imm_wide(dst, shift) );
9603   ins_pipe(ialu_reg);
9604 %}
9605 
9606 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9607 %{
9608   effect(USE_DEF dst, USE shift, KILL cr);
9609 
9610   format %{ "rolq    $dst, $shift" %}
9611   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9612   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9613   ins_pipe(ialu_reg_reg);
9614 %}
9615 // end of ROL expand
9616 
9617 // Rotate Left by one
9618 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9619 %{
9620   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9621 
9622   expand %{
9623     rolL_rReg_imm1(dst, cr);
9624   %}
9625 %}
9626 
9627 // Rotate Left by 8-bit immediate
9628 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9629 %{
9630   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9631   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9632 
9633   expand %{
9634     rolL_rReg_imm8(dst, lshift, cr);
9635   %}
9636 %}
9637 
9638 // Rotate Left by variable
9639 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9640 %{
9641   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9642 
9643   expand %{
9644     rolL_rReg_CL(dst, shift, cr);
9645   %}
9646 %}
9647 
9648 // Rotate Left by variable
9649 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9650 %{
9651   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9652 
9653   expand %{
9654     rolL_rReg_CL(dst, shift, cr);
9655   %}
9656 %}
9657 
9658 // ROR expand
9659 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9660 %{
9661   effect(USE_DEF dst, KILL cr);
9662 
9663   format %{ "rorq    $dst" %}
9664   opcode(0xD1, 0x1); /* D1 /1 */
9665   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9666   ins_pipe(ialu_reg);
9667 %}
9668 
9669 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9670 %{
9671   effect(USE_DEF dst, USE shift, KILL cr);
9672 
9673   format %{ "rorq    $dst, $shift" %}
9674   opcode(0xC1, 0x1); /* C1 /1 ib */
9675   ins_encode(reg_opc_imm_wide(dst, shift));
9676   ins_pipe(ialu_reg);
9677 %}
9678 
9679 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9680 %{
9681   effect(USE_DEF dst, USE shift, KILL cr);
9682 
9683   format %{ "rorq    $dst, $shift" %}
9684   opcode(0xD3, 0x1); /* D3 /1 */
9685   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9686   ins_pipe(ialu_reg_reg);
9687 %}
9688 // end of ROR expand
9689 
9690 // Rotate Right by one
9691 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9692 %{
9693   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9694 
9695   expand %{
9696     rorL_rReg_imm1(dst, cr);
9697   %}
9698 %}
9699 
9700 // Rotate Right by 8-bit immediate
9701 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9702 %{
9703   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9704   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9705 
9706   expand %{
9707     rorL_rReg_imm8(dst, rshift, cr);
9708   %}
9709 %}
9710 
9711 // Rotate Right by variable
9712 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9713 %{
9714   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9715 
9716   expand %{
9717     rorL_rReg_CL(dst, shift, cr);
9718   %}
9719 %}
9720 
9721 // Rotate Right by variable
9722 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9723 %{
9724   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9725 
9726   expand %{
9727     rorL_rReg_CL(dst, shift, cr);
9728   %}
9729 %}
9730 
9731 // Logical Instructions
9732 
9733 // Integer Logical Instructions
9734 
9735 // And Instructions
9736 // And Register with Register
9737 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9738 %{
9739   match(Set dst (AndI dst src));
9740   effect(KILL cr);
9741 
9742   format %{ "andl    $dst, $src\t# int" %}
9743   opcode(0x23);
9744   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9745   ins_pipe(ialu_reg_reg);
9746 %}
9747 
9748 // And Register with Immediate 255
9749 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9750 %{
9751   match(Set dst (AndI dst src));
9752 
9753   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9754   opcode(0x0F, 0xB6);
9755   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9756   ins_pipe(ialu_reg);
9757 %}
9758 
9759 // And Register with Immediate 255 and promote to long
9760 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9761 %{
9762   match(Set dst (ConvI2L (AndI src mask)));
9763 
9764   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9765   opcode(0x0F, 0xB6);
9766   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9767   ins_pipe(ialu_reg);
9768 %}
9769 
9770 // And Register with Immediate 65535
9771 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9772 %{
9773   match(Set dst (AndI dst src));
9774 
9775   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9776   opcode(0x0F, 0xB7);
9777   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9778   ins_pipe(ialu_reg);
9779 %}
9780 
9781 // And Register with Immediate 65535 and promote to long
9782 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9783 %{
9784   match(Set dst (ConvI2L (AndI src mask)));
9785 
9786   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9787   opcode(0x0F, 0xB7);
9788   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9789   ins_pipe(ialu_reg);
9790 %}
9791 
9792 // And Register with Immediate
9793 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9794 %{
9795   match(Set dst (AndI dst src));
9796   effect(KILL cr);
9797 
9798   format %{ "andl    $dst, $src\t# int" %}
9799   opcode(0x81, 0x04); /* Opcode 81 /4 */
9800   ins_encode(OpcSErm(dst, src), Con8or32(src));
9801   ins_pipe(ialu_reg);
9802 %}
9803 
9804 // And Register with Memory
9805 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9806 %{
9807   match(Set dst (AndI dst (LoadI src)));
9808   effect(KILL cr);
9809 
9810   ins_cost(125);
9811   format %{ "andl    $dst, $src\t# int" %}
9812   opcode(0x23);
9813   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9814   ins_pipe(ialu_reg_mem);
9815 %}
9816 
9817 // And Memory with Register
9818 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9819 %{
9820   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9821   effect(KILL cr);
9822 
9823   ins_cost(150);
9824   format %{ "andl    $dst, $src\t# int" %}
9825   opcode(0x21); /* Opcode 21 /r */
9826   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9827   ins_pipe(ialu_mem_reg);
9828 %}
9829 
9830 // And Memory with Immediate
9831 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9832 %{
9833   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9834   effect(KILL cr);
9835 
9836   ins_cost(125);
9837   format %{ "andl    $dst, $src\t# int" %}
9838   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9839   ins_encode(REX_mem(dst), OpcSE(src),
9840              RM_opc_mem(secondary, dst), Con8or32(src));
9841   ins_pipe(ialu_mem_imm);
9842 %}
9843 
9844 // Or Instructions
9845 // Or Register with Register
9846 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9847 %{
9848   match(Set dst (OrI dst src));
9849   effect(KILL cr);
9850 
9851   format %{ "orl     $dst, $src\t# int" %}
9852   opcode(0x0B);
9853   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9854   ins_pipe(ialu_reg_reg);
9855 %}
9856 
9857 // Or Register with Immediate
9858 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9859 %{
9860   match(Set dst (OrI dst src));
9861   effect(KILL cr);
9862 
9863   format %{ "orl     $dst, $src\t# int" %}
9864   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9865   ins_encode(OpcSErm(dst, src), Con8or32(src));
9866   ins_pipe(ialu_reg);
9867 %}
9868 
9869 // Or Register with Memory
9870 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9871 %{
9872   match(Set dst (OrI dst (LoadI src)));
9873   effect(KILL cr);
9874 
9875   ins_cost(125);
9876   format %{ "orl     $dst, $src\t# int" %}
9877   opcode(0x0B);
9878   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9879   ins_pipe(ialu_reg_mem);
9880 %}
9881 
9882 // Or Memory with Register
9883 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9884 %{
9885   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9886   effect(KILL cr);
9887 
9888   ins_cost(150);
9889   format %{ "orl     $dst, $src\t# int" %}
9890   opcode(0x09); /* Opcode 09 /r */
9891   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9892   ins_pipe(ialu_mem_reg);
9893 %}
9894 
9895 // Or Memory with Immediate
9896 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9897 %{
9898   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9899   effect(KILL cr);
9900 
9901   ins_cost(125);
9902   format %{ "orl     $dst, $src\t# int" %}
9903   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9904   ins_encode(REX_mem(dst), OpcSE(src),
9905              RM_opc_mem(secondary, dst), Con8or32(src));
9906   ins_pipe(ialu_mem_imm);
9907 %}
9908 
9909 // Xor Instructions
9910 // Xor Register with Register
9911 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9912 %{
9913   match(Set dst (XorI dst src));
9914   effect(KILL cr);
9915 
9916   format %{ "xorl    $dst, $src\t# int" %}
9917   opcode(0x33);
9918   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9919   ins_pipe(ialu_reg_reg);
9920 %}
9921 
9922 // Xor Register with Immediate -1
9923 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9924   match(Set dst (XorI dst imm));  
9925 
9926   format %{ "not    $dst" %}  
9927   ins_encode %{
9928      __ notl($dst$$Register);
9929   %}
9930   ins_pipe(ialu_reg);
9931 %}
9932 
9933 // Xor Register with Immediate
9934 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9935 %{
9936   match(Set dst (XorI dst src));
9937   effect(KILL cr);
9938 
9939   format %{ "xorl    $dst, $src\t# int" %}
9940   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9941   ins_encode(OpcSErm(dst, src), Con8or32(src));
9942   ins_pipe(ialu_reg);
9943 %}
9944 
9945 // Xor Register with Memory
9946 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9947 %{
9948   match(Set dst (XorI dst (LoadI src)));
9949   effect(KILL cr);
9950 
9951   ins_cost(125);
9952   format %{ "xorl    $dst, $src\t# int" %}
9953   opcode(0x33);
9954   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9955   ins_pipe(ialu_reg_mem);
9956 %}
9957 
9958 // Xor Memory with Register
9959 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9960 %{
9961   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9962   effect(KILL cr);
9963 
9964   ins_cost(150);
9965   format %{ "xorl    $dst, $src\t# int" %}
9966   opcode(0x31); /* Opcode 31 /r */
9967   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9968   ins_pipe(ialu_mem_reg);
9969 %}
9970 
9971 // Xor Memory with Immediate
9972 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9973 %{
9974   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9975   effect(KILL cr);
9976 
9977   ins_cost(125);
9978   format %{ "xorl    $dst, $src\t# int" %}
9979   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9980   ins_encode(REX_mem(dst), OpcSE(src),
9981              RM_opc_mem(secondary, dst), Con8or32(src));
9982   ins_pipe(ialu_mem_imm);
9983 %}
9984 
9985 
9986 // Long Logical Instructions
9987 
9988 // And Instructions
9989 // And Register with Register
9990 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9991 %{
9992   match(Set dst (AndL dst src));
9993   effect(KILL cr);
9994 
9995   format %{ "andq    $dst, $src\t# long" %}
9996   opcode(0x23);
9997   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9998   ins_pipe(ialu_reg_reg);
9999 %}
10000 
10001 // And Register with Immediate 255
10002 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
10003 %{
10004   match(Set dst (AndL dst src));
10005 
10006   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
10007   opcode(0x0F, 0xB6);
10008   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10009   ins_pipe(ialu_reg);
10010 %}
10011 
10012 // And Register with Immediate 65535
10013 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
10014 %{
10015   match(Set dst (AndL dst src));
10016 
10017   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
10018   opcode(0x0F, 0xB7);
10019   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
10020   ins_pipe(ialu_reg);
10021 %}
10022 
10023 // And Register with Immediate
10024 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10025 %{
10026   match(Set dst (AndL dst src));
10027   effect(KILL cr);
10028 
10029   format %{ "andq    $dst, $src\t# long" %}
10030   opcode(0x81, 0x04); /* Opcode 81 /4 */
10031   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10032   ins_pipe(ialu_reg);
10033 %}
10034 
10035 // And Register with Memory
10036 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10037 %{
10038   match(Set dst (AndL dst (LoadL src)));
10039   effect(KILL cr);
10040 
10041   ins_cost(125);
10042   format %{ "andq    $dst, $src\t# long" %}
10043   opcode(0x23);
10044   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10045   ins_pipe(ialu_reg_mem);
10046 %}
10047 
10048 // And Memory with Register
10049 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10050 %{
10051   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10052   effect(KILL cr);
10053 
10054   ins_cost(150);
10055   format %{ "andq    $dst, $src\t# long" %}
10056   opcode(0x21); /* Opcode 21 /r */
10057   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10058   ins_pipe(ialu_mem_reg);
10059 %}
10060 
10061 // And Memory with Immediate
10062 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10063 %{
10064   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10065   effect(KILL cr);
10066 
10067   ins_cost(125);
10068   format %{ "andq    $dst, $src\t# long" %}
10069   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10070   ins_encode(REX_mem_wide(dst), OpcSE(src),
10071              RM_opc_mem(secondary, dst), Con8or32(src));
10072   ins_pipe(ialu_mem_imm);
10073 %}
10074 
10075 // Or Instructions
10076 // Or Register with Register
10077 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10078 %{
10079   match(Set dst (OrL dst src));
10080   effect(KILL cr);
10081 
10082   format %{ "orq     $dst, $src\t# long" %}
10083   opcode(0x0B);
10084   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10085   ins_pipe(ialu_reg_reg);
10086 %}
10087 
10088 // Use any_RegP to match R15 (TLS register) without spilling.
10089 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10090   match(Set dst (OrL dst (CastP2X src)));
10091   effect(KILL cr);
10092 
10093   format %{ "orq     $dst, $src\t# long" %}
10094   opcode(0x0B);
10095   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10096   ins_pipe(ialu_reg_reg);
10097 %}
10098 
10099 
10100 // Or Register with Immediate
10101 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10102 %{
10103   match(Set dst (OrL dst src));
10104   effect(KILL cr);
10105 
10106   format %{ "orq     $dst, $src\t# long" %}
10107   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10108   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10109   ins_pipe(ialu_reg);
10110 %}
10111 
10112 // Or Register with Memory
10113 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10114 %{
10115   match(Set dst (OrL dst (LoadL src)));
10116   effect(KILL cr);
10117 
10118   ins_cost(125);
10119   format %{ "orq     $dst, $src\t# long" %}
10120   opcode(0x0B);
10121   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10122   ins_pipe(ialu_reg_mem);
10123 %}
10124 
10125 // Or Memory with Register
10126 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10127 %{
10128   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10129   effect(KILL cr);
10130 
10131   ins_cost(150);
10132   format %{ "orq     $dst, $src\t# long" %}
10133   opcode(0x09); /* Opcode 09 /r */
10134   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10135   ins_pipe(ialu_mem_reg);
10136 %}
10137 
10138 // Or Memory with Immediate
10139 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10140 %{
10141   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10142   effect(KILL cr);
10143 
10144   ins_cost(125);
10145   format %{ "orq     $dst, $src\t# long" %}
10146   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10147   ins_encode(REX_mem_wide(dst), OpcSE(src),
10148              RM_opc_mem(secondary, dst), Con8or32(src));
10149   ins_pipe(ialu_mem_imm);
10150 %}
10151 
10152 // Xor Instructions
10153 // Xor Register with Register
10154 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10155 %{
10156   match(Set dst (XorL dst src));
10157   effect(KILL cr);
10158 
10159   format %{ "xorq    $dst, $src\t# long" %}
10160   opcode(0x33);
10161   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10162   ins_pipe(ialu_reg_reg);
10163 %}
10164 
10165 // Xor Register with Immediate -1
10166 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10167   match(Set dst (XorL dst imm));  
10168 
10169   format %{ "notq   $dst" %}  
10170   ins_encode %{
10171      __ notq($dst$$Register);
10172   %}
10173   ins_pipe(ialu_reg);
10174 %}
10175 
10176 // Xor Register with Immediate
10177 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10178 %{
10179   match(Set dst (XorL dst src));
10180   effect(KILL cr);
10181 
10182   format %{ "xorq    $dst, $src\t# long" %}
10183   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10184   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10185   ins_pipe(ialu_reg);
10186 %}
10187 
10188 // Xor Register with Memory
10189 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10190 %{
10191   match(Set dst (XorL dst (LoadL src)));
10192   effect(KILL cr);
10193 
10194   ins_cost(125);
10195   format %{ "xorq    $dst, $src\t# long" %}
10196   opcode(0x33);
10197   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10198   ins_pipe(ialu_reg_mem);
10199 %}
10200 
10201 // Xor Memory with Register
10202 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10203 %{
10204   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10205   effect(KILL cr);
10206 
10207   ins_cost(150);
10208   format %{ "xorq    $dst, $src\t# long" %}
10209   opcode(0x31); /* Opcode 31 /r */
10210   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10211   ins_pipe(ialu_mem_reg);
10212 %}
10213 
10214 // Xor Memory with Immediate
10215 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10216 %{
10217   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10218   effect(KILL cr);
10219 
10220   ins_cost(125);
10221   format %{ "xorq    $dst, $src\t# long" %}
10222   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10223   ins_encode(REX_mem_wide(dst), OpcSE(src),
10224              RM_opc_mem(secondary, dst), Con8or32(src));
10225   ins_pipe(ialu_mem_imm);
10226 %}
10227 
10228 // Convert Int to Boolean
10229 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10230 %{
10231   match(Set dst (Conv2B src));
10232   effect(KILL cr);
10233 
10234   format %{ "testl   $src, $src\t# ci2b\n\t"
10235             "setnz   $dst\n\t"
10236             "movzbl  $dst, $dst" %}
10237   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10238              setNZ_reg(dst),
10239              REX_reg_breg(dst, dst), // movzbl
10240              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10241   ins_pipe(pipe_slow); // XXX
10242 %}
10243 
10244 // Convert Pointer to Boolean
10245 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10246 %{
10247   match(Set dst (Conv2B src));
10248   effect(KILL cr);
10249 
10250   format %{ "testq   $src, $src\t# cp2b\n\t"
10251             "setnz   $dst\n\t"
10252             "movzbl  $dst, $dst" %}
10253   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10254              setNZ_reg(dst),
10255              REX_reg_breg(dst, dst), // movzbl
10256              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10257   ins_pipe(pipe_slow); // XXX
10258 %}
10259 
10260 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10261 %{
10262   match(Set dst (CmpLTMask p q));
10263   effect(KILL cr);
10264 
10265   ins_cost(400); // XXX
10266   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10267             "setlt   $dst\n\t"
10268             "movzbl  $dst, $dst\n\t"
10269             "negl    $dst" %}
10270   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10271              setLT_reg(dst),
10272              REX_reg_breg(dst, dst), // movzbl
10273              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10274              neg_reg(dst));
10275   ins_pipe(pipe_slow);
10276 %}
10277 
10278 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10279 %{
10280   match(Set dst (CmpLTMask dst zero));
10281   effect(KILL cr);
10282 
10283   ins_cost(100); // XXX
10284   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10285   opcode(0xC1, 0x7);  /* C1 /7 ib */
10286   ins_encode(reg_opc_imm(dst, 0x1F));
10287   ins_pipe(ialu_reg);
10288 %}
10289 
10290 
10291 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10292                          rRegI tmp,
10293                          rFlagsReg cr)
10294 %{
10295   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10296   effect(TEMP tmp, KILL cr);
10297 
10298   ins_cost(400); // XXX
10299   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10300             "sbbl    $tmp, $tmp\n\t"
10301             "andl    $tmp, $y\n\t"
10302             "addl    $p, $tmp" %}
10303   ins_encode(enc_cmpLTP(p, q, y, tmp));
10304   ins_pipe(pipe_cmplt);
10305 %}
10306 
10307 /* If I enable this, I encourage spilling in the inner loop of compress.
10308 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10309 %{
10310   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10311   effect( TEMP tmp, KILL cr );
10312   ins_cost(400);
10313 
10314   format %{ "SUB    $p,$q\n\t"
10315             "SBB    RCX,RCX\n\t"
10316             "AND    RCX,$y\n\t"
10317             "ADD    $p,RCX" %}
10318   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10319 %}
10320 */
10321 
10322 //---------- FP Instructions------------------------------------------------
10323 
10324 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10325 %{
10326   match(Set cr (CmpF src1 src2));
10327 
10328   ins_cost(145);
10329   format %{ "ucomiss $src1, $src2\n\t"
10330             "jnp,s   exit\n\t"
10331             "pushfq\t# saw NaN, set CF\n\t"
10332             "andq    [rsp], #0xffffff2b\n\t"
10333             "popfq\n"
10334     "exit:   nop\t# avoid branch to branch" %}
10335   opcode(0x0F, 0x2E);
10336   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10337              cmpfp_fixup);
10338   ins_pipe(pipe_slow);
10339 %}
10340 
10341 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10342   match(Set cr (CmpF src1 src2));
10343 
10344   ins_cost(145);
10345   format %{ "ucomiss $src1, $src2" %}
10346   ins_encode %{
10347     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10348   %}
10349   ins_pipe(pipe_slow);
10350 %}
10351 
10352 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10353 %{
10354   match(Set cr (CmpF src1 (LoadF src2)));
10355 
10356   ins_cost(145);
10357   format %{ "ucomiss $src1, $src2\n\t"
10358             "jnp,s   exit\n\t"
10359             "pushfq\t# saw NaN, set CF\n\t"
10360             "andq    [rsp], #0xffffff2b\n\t"
10361             "popfq\n"
10362     "exit:   nop\t# avoid branch to branch" %}
10363   opcode(0x0F, 0x2E);
10364   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10365              cmpfp_fixup);
10366   ins_pipe(pipe_slow);
10367 %}
10368 
10369 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10370   match(Set cr (CmpF src1 (LoadF src2)));
10371 
10372   ins_cost(100);
10373   format %{ "ucomiss $src1, $src2" %}
10374   opcode(0x0F, 0x2E);
10375   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10376   ins_pipe(pipe_slow);
10377 %}
10378 
10379 instruct cmpF_cc_imm(rFlagsRegU cr, regF src1, immF src2)
10380 %{
10381   match(Set cr (CmpF src1 src2));
10382 
10383   ins_cost(145);
10384   format %{ "ucomiss $src1, $src2\n\t"
10385             "jnp,s   exit\n\t"
10386             "pushfq\t# saw NaN, set CF\n\t"
10387             "andq    [rsp], #0xffffff2b\n\t"
10388             "popfq\n"
10389     "exit:   nop\t# avoid branch to branch" %}
10390   opcode(0x0F, 0x2E);
10391   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10392              cmpfp_fixup);
10393   ins_pipe(pipe_slow);
10394 %}
10395 
10396 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src1, immF src2) %{
10397   match(Set cr (CmpF src1 src2));
10398 
10399   ins_cost(100);
10400   format %{ "ucomiss $src1, $src2" %}
10401   opcode(0x0F, 0x2E);
10402   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2));
10403   ins_pipe(pipe_slow);
10404 %}
10405 
10406 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10407 %{
10408   match(Set cr (CmpD src1 src2));
10409 
10410   ins_cost(145);
10411   format %{ "ucomisd $src1, $src2\n\t"
10412             "jnp,s   exit\n\t"
10413             "pushfq\t# saw NaN, set CF\n\t"
10414             "andq    [rsp], #0xffffff2b\n\t"
10415             "popfq\n"
10416     "exit:   nop\t# avoid branch to branch" %}
10417   opcode(0x66, 0x0F, 0x2E);
10418   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10419              cmpfp_fixup);
10420   ins_pipe(pipe_slow);
10421 %}
10422 
10423 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10424   match(Set cr (CmpD src1 src2));
10425 
10426   ins_cost(100);
10427   format %{ "ucomisd $src1, $src2 test" %}
10428   ins_encode %{
10429     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10430   %}
10431   ins_pipe(pipe_slow);
10432 %}
10433 
10434 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10435 %{
10436   match(Set cr (CmpD src1 (LoadD src2)));
10437 
10438   ins_cost(145);
10439   format %{ "ucomisd $src1, $src2\n\t"
10440             "jnp,s   exit\n\t"
10441             "pushfq\t# saw NaN, set CF\n\t"
10442             "andq    [rsp], #0xffffff2b\n\t"
10443             "popfq\n"
10444     "exit:   nop\t# avoid branch to branch" %}
10445   opcode(0x66, 0x0F, 0x2E);
10446   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10447              cmpfp_fixup);
10448   ins_pipe(pipe_slow);
10449 %}
10450 
10451 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10452   match(Set cr (CmpD src1 (LoadD src2)));
10453 
10454   ins_cost(100);
10455   format %{ "ucomisd $src1, $src2" %}
10456   opcode(0x66, 0x0F, 0x2E);
10457   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10458   ins_pipe(pipe_slow);
10459 %}
10460 
10461 instruct cmpD_cc_imm(rFlagsRegU cr, regD src1, immD src2)
10462 %{
10463   match(Set cr (CmpD src1 src2));
10464 
10465   ins_cost(145);
10466   format %{ "ucomisd $src1, [$src2]\n\t"
10467             "jnp,s   exit\n\t"
10468             "pushfq\t# saw NaN, set CF\n\t"
10469             "andq    [rsp], #0xffffff2b\n\t"
10470             "popfq\n"
10471     "exit:   nop\t# avoid branch to branch" %}
10472   opcode(0x66, 0x0F, 0x2E);
10473   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10474              cmpfp_fixup);
10475   ins_pipe(pipe_slow);
10476 %}
10477 
10478 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src1, immD src2) %{
10479   match(Set cr (CmpD src1 src2));
10480 
10481   ins_cost(100);
10482   format %{ "ucomisd $src1, [$src2]" %}
10483   opcode(0x66, 0x0F, 0x2E);
10484   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2));
10485   ins_pipe(pipe_slow);
10486 %}
10487 
10488 // Compare into -1,0,1
10489 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10490 %{
10491   match(Set dst (CmpF3 src1 src2));
10492   effect(KILL cr);
10493 
10494   ins_cost(275);
10495   format %{ "ucomiss $src1, $src2\n\t"
10496             "movl    $dst, #-1\n\t"
10497             "jp,s    done\n\t"
10498             "jb,s    done\n\t"
10499             "setne   $dst\n\t"
10500             "movzbl  $dst, $dst\n"
10501     "done:" %}
10502 
10503   opcode(0x0F, 0x2E);
10504   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10505              cmpfp3(dst));
10506   ins_pipe(pipe_slow);
10507 %}
10508 
10509 // Compare into -1,0,1
10510 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10511 %{
10512   match(Set dst (CmpF3 src1 (LoadF src2)));
10513   effect(KILL cr);
10514 
10515   ins_cost(275);
10516   format %{ "ucomiss $src1, $src2\n\t"
10517             "movl    $dst, #-1\n\t"
10518             "jp,s    done\n\t"
10519             "jb,s    done\n\t"
10520             "setne   $dst\n\t"
10521             "movzbl  $dst, $dst\n"
10522     "done:" %}
10523 
10524   opcode(0x0F, 0x2E);
10525   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10526              cmpfp3(dst));
10527   ins_pipe(pipe_slow);
10528 %}
10529 
10530 // Compare into -1,0,1
10531 instruct cmpF_imm(rRegI dst, regF src1, immF src2, rFlagsReg cr)
10532 %{
10533   match(Set dst (CmpF3 src1 src2));
10534   effect(KILL cr);
10535 
10536   ins_cost(275);
10537   format %{ "ucomiss $src1, [$src2]\n\t"
10538             "movl    $dst, #-1\n\t"
10539             "jp,s    done\n\t"
10540             "jb,s    done\n\t"
10541             "setne   $dst\n\t"
10542             "movzbl  $dst, $dst\n"
10543     "done:" %}
10544 
10545   opcode(0x0F, 0x2E);
10546   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, load_immF(src1, src2),
10547              cmpfp3(dst));
10548   ins_pipe(pipe_slow);
10549 %}
10550 
10551 // Compare into -1,0,1
10552 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10553 %{
10554   match(Set dst (CmpD3 src1 src2));
10555   effect(KILL cr);
10556 
10557   ins_cost(275);
10558   format %{ "ucomisd $src1, $src2\n\t"
10559             "movl    $dst, #-1\n\t"
10560             "jp,s    done\n\t"
10561             "jb,s    done\n\t"
10562             "setne   $dst\n\t"
10563             "movzbl  $dst, $dst\n"
10564     "done:" %}
10565 
10566   opcode(0x66, 0x0F, 0x2E);
10567   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10568              cmpfp3(dst));
10569   ins_pipe(pipe_slow);
10570 %}
10571 
10572 // Compare into -1,0,1
10573 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10574 %{
10575   match(Set dst (CmpD3 src1 (LoadD src2)));
10576   effect(KILL cr);
10577 
10578   ins_cost(275);
10579   format %{ "ucomisd $src1, $src2\n\t"
10580             "movl    $dst, #-1\n\t"
10581             "jp,s    done\n\t"
10582             "jb,s    done\n\t"
10583             "setne   $dst\n\t"
10584             "movzbl  $dst, $dst\n"
10585     "done:" %}
10586 
10587   opcode(0x66, 0x0F, 0x2E);
10588   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10589              cmpfp3(dst));
10590   ins_pipe(pipe_slow);
10591 %}
10592 
10593 // Compare into -1,0,1
10594 instruct cmpD_imm(rRegI dst, regD src1, immD src2, rFlagsReg cr)
10595 %{
10596   match(Set dst (CmpD3 src1 src2));
10597   effect(KILL cr);
10598 
10599   ins_cost(275);
10600   format %{ "ucomisd $src1, [$src2]\n\t"
10601             "movl    $dst, #-1\n\t"
10602             "jp,s    done\n\t"
10603             "jb,s    done\n\t"
10604             "setne   $dst\n\t"
10605             "movzbl  $dst, $dst\n"
10606     "done:" %}
10607 
10608   opcode(0x66, 0x0F, 0x2E);
10609   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, load_immD(src1, src2),
10610              cmpfp3(dst));
10611   ins_pipe(pipe_slow);
10612 %}
10613 
10614 instruct addF_reg(regF dst, regF src)
10615 %{
10616   match(Set dst (AddF dst src));
10617 
10618   format %{ "addss   $dst, $src" %}
10619   ins_cost(150); // XXX
10620   opcode(0xF3, 0x0F, 0x58);
10621   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10622   ins_pipe(pipe_slow);
10623 %}
10624 
10625 instruct addF_mem(regF dst, memory src)
10626 %{
10627   match(Set dst (AddF dst (LoadF src)));
10628 
10629   format %{ "addss   $dst, $src" %}
10630   ins_cost(150); // XXX
10631   opcode(0xF3, 0x0F, 0x58);
10632   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10633   ins_pipe(pipe_slow);
10634 %}
10635 
10636 instruct addF_imm(regF dst, immF src)
10637 %{
10638   match(Set dst (AddF dst src));
10639 
10640   format %{ "addss   $dst, [$src]" %}
10641   ins_cost(150); // XXX
10642   opcode(0xF3, 0x0F, 0x58);
10643   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10644   ins_pipe(pipe_slow);
10645 %}
10646 
10647 instruct addD_reg(regD dst, regD src)
10648 %{
10649   match(Set dst (AddD dst src));
10650 
10651   format %{ "addsd   $dst, $src" %}
10652   ins_cost(150); // XXX
10653   opcode(0xF2, 0x0F, 0x58);
10654   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10655   ins_pipe(pipe_slow);
10656 %}
10657 
10658 instruct addD_mem(regD dst, memory src)
10659 %{
10660   match(Set dst (AddD dst (LoadD src)));
10661 
10662   format %{ "addsd   $dst, $src" %}
10663   ins_cost(150); // XXX
10664   opcode(0xF2, 0x0F, 0x58);
10665   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10666   ins_pipe(pipe_slow);
10667 %}
10668 
10669 instruct addD_imm(regD dst, immD src)
10670 %{
10671   match(Set dst (AddD dst src));
10672 
10673   format %{ "addsd   $dst, [$src]" %}
10674   ins_cost(150); // XXX
10675   opcode(0xF2, 0x0F, 0x58);
10676   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10677   ins_pipe(pipe_slow);
10678 %}
10679 
10680 instruct subF_reg(regF dst, regF src)
10681 %{
10682   match(Set dst (SubF dst src));
10683 
10684   format %{ "subss   $dst, $src" %}
10685   ins_cost(150); // XXX
10686   opcode(0xF3, 0x0F, 0x5C);
10687   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10688   ins_pipe(pipe_slow);
10689 %}
10690 
10691 instruct subF_mem(regF dst, memory src)
10692 %{
10693   match(Set dst (SubF dst (LoadF src)));
10694 
10695   format %{ "subss   $dst, $src" %}
10696   ins_cost(150); // XXX
10697   opcode(0xF3, 0x0F, 0x5C);
10698   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10699   ins_pipe(pipe_slow);
10700 %}
10701 
10702 instruct subF_imm(regF dst, immF src)
10703 %{
10704   match(Set dst (SubF dst src));
10705 
10706   format %{ "subss   $dst, [$src]" %}
10707   ins_cost(150); // XXX
10708   opcode(0xF3, 0x0F, 0x5C);
10709   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10710   ins_pipe(pipe_slow);
10711 %}
10712 
10713 instruct subD_reg(regD dst, regD src)
10714 %{
10715   match(Set dst (SubD dst src));
10716 
10717   format %{ "subsd   $dst, $src" %}
10718   ins_cost(150); // XXX
10719   opcode(0xF2, 0x0F, 0x5C);
10720   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10721   ins_pipe(pipe_slow);
10722 %}
10723 
10724 instruct subD_mem(regD dst, memory src)
10725 %{
10726   match(Set dst (SubD dst (LoadD src)));
10727 
10728   format %{ "subsd   $dst, $src" %}
10729   ins_cost(150); // XXX
10730   opcode(0xF2, 0x0F, 0x5C);
10731   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10732   ins_pipe(pipe_slow);
10733 %}
10734 
10735 instruct subD_imm(regD dst, immD src)
10736 %{
10737   match(Set dst (SubD dst src));
10738 
10739   format %{ "subsd   $dst, [$src]" %}
10740   ins_cost(150); // XXX
10741   opcode(0xF2, 0x0F, 0x5C);
10742   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10743   ins_pipe(pipe_slow);
10744 %}
10745 
10746 instruct mulF_reg(regF dst, regF src)
10747 %{
10748   match(Set dst (MulF dst src));
10749 
10750   format %{ "mulss   $dst, $src" %}
10751   ins_cost(150); // XXX
10752   opcode(0xF3, 0x0F, 0x59);
10753   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10754   ins_pipe(pipe_slow);
10755 %}
10756 
10757 instruct mulF_mem(regF dst, memory src)
10758 %{
10759   match(Set dst (MulF dst (LoadF src)));
10760 
10761   format %{ "mulss   $dst, $src" %}
10762   ins_cost(150); // XXX
10763   opcode(0xF3, 0x0F, 0x59);
10764   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10765   ins_pipe(pipe_slow);
10766 %}
10767 
10768 instruct mulF_imm(regF dst, immF src)
10769 %{
10770   match(Set dst (MulF dst src));
10771 
10772   format %{ "mulss   $dst, [$src]" %}
10773   ins_cost(150); // XXX
10774   opcode(0xF3, 0x0F, 0x59);
10775   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10776   ins_pipe(pipe_slow);
10777 %}
10778 
10779 instruct mulD_reg(regD dst, regD src)
10780 %{
10781   match(Set dst (MulD dst src));
10782 
10783   format %{ "mulsd   $dst, $src" %}
10784   ins_cost(150); // XXX
10785   opcode(0xF2, 0x0F, 0x59);
10786   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10787   ins_pipe(pipe_slow);
10788 %}
10789 
10790 instruct mulD_mem(regD dst, memory src)
10791 %{
10792   match(Set dst (MulD dst (LoadD src)));
10793 
10794   format %{ "mulsd   $dst, $src" %}
10795   ins_cost(150); // XXX
10796   opcode(0xF2, 0x0F, 0x59);
10797   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10798   ins_pipe(pipe_slow);
10799 %}
10800 
10801 instruct mulD_imm(regD dst, immD src)
10802 %{
10803   match(Set dst (MulD dst src));
10804 
10805   format %{ "mulsd   $dst, [$src]" %}
10806   ins_cost(150); // XXX
10807   opcode(0xF2, 0x0F, 0x59);
10808   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10809   ins_pipe(pipe_slow);
10810 %}
10811 
10812 instruct divF_reg(regF dst, regF src)
10813 %{
10814   match(Set dst (DivF dst src));
10815 
10816   format %{ "divss   $dst, $src" %}
10817   ins_cost(150); // XXX
10818   opcode(0xF3, 0x0F, 0x5E);
10819   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10820   ins_pipe(pipe_slow);
10821 %}
10822 
10823 instruct divF_mem(regF dst, memory src)
10824 %{
10825   match(Set dst (DivF dst (LoadF src)));
10826 
10827   format %{ "divss   $dst, $src" %}
10828   ins_cost(150); // XXX
10829   opcode(0xF3, 0x0F, 0x5E);
10830   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10831   ins_pipe(pipe_slow);
10832 %}
10833 
10834 instruct divF_imm(regF dst, immF src)
10835 %{
10836   match(Set dst (DivF dst src));
10837 
10838   format %{ "divss   $dst, [$src]" %}
10839   ins_cost(150); // XXX
10840   opcode(0xF3, 0x0F, 0x5E);
10841   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10842   ins_pipe(pipe_slow);
10843 %}
10844 
10845 instruct divD_reg(regD dst, regD src)
10846 %{
10847   match(Set dst (DivD dst src));
10848 
10849   format %{ "divsd   $dst, $src" %}
10850   ins_cost(150); // XXX
10851   opcode(0xF2, 0x0F, 0x5E);
10852   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10853   ins_pipe(pipe_slow);
10854 %}
10855 
10856 instruct divD_mem(regD dst, memory src)
10857 %{
10858   match(Set dst (DivD dst (LoadD src)));
10859 
10860   format %{ "divsd   $dst, $src" %}
10861   ins_cost(150); // XXX
10862   opcode(0xF2, 0x0F, 0x5E);
10863   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10864   ins_pipe(pipe_slow);
10865 %}
10866 
10867 instruct divD_imm(regD dst, immD src)
10868 %{
10869   match(Set dst (DivD dst src));
10870 
10871   format %{ "divsd   $dst, [$src]" %}
10872   ins_cost(150); // XXX
10873   opcode(0xF2, 0x0F, 0x5E);
10874   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10875   ins_pipe(pipe_slow);
10876 %}
10877 
10878 instruct sqrtF_reg(regF dst, regF src)
10879 %{
10880   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10881 
10882   format %{ "sqrtss  $dst, $src" %}
10883   ins_cost(150); // XXX
10884   opcode(0xF3, 0x0F, 0x51);
10885   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10886   ins_pipe(pipe_slow);
10887 %}
10888 
10889 instruct sqrtF_mem(regF dst, memory src)
10890 %{
10891   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10892 
10893   format %{ "sqrtss  $dst, $src" %}
10894   ins_cost(150); // XXX
10895   opcode(0xF3, 0x0F, 0x51);
10896   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10897   ins_pipe(pipe_slow);
10898 %}
10899 
10900 instruct sqrtF_imm(regF dst, immF src)
10901 %{
10902   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10903 
10904   format %{ "sqrtss  $dst, [$src]" %}
10905   ins_cost(150); // XXX
10906   opcode(0xF3, 0x0F, 0x51);
10907   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immF(dst, src));
10908   ins_pipe(pipe_slow);
10909 %}
10910 
10911 instruct sqrtD_reg(regD dst, regD src)
10912 %{
10913   match(Set dst (SqrtD src));
10914 
10915   format %{ "sqrtsd  $dst, $src" %}
10916   ins_cost(150); // XXX
10917   opcode(0xF2, 0x0F, 0x51);
10918   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10919   ins_pipe(pipe_slow);
10920 %}
10921 
10922 instruct sqrtD_mem(regD dst, memory src)
10923 %{
10924   match(Set dst (SqrtD (LoadD src)));
10925 
10926   format %{ "sqrtsd  $dst, $src" %}
10927   ins_cost(150); // XXX
10928   opcode(0xF2, 0x0F, 0x51);
10929   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10930   ins_pipe(pipe_slow);
10931 %}
10932 
10933 instruct sqrtD_imm(regD dst, immD src)
10934 %{
10935   match(Set dst (SqrtD src));
10936 
10937   format %{ "sqrtsd  $dst, [$src]" %}
10938   ins_cost(150); // XXX
10939   opcode(0xF2, 0x0F, 0x51);
10940   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, load_immD(dst, src));
10941   ins_pipe(pipe_slow);
10942 %}
10943 
10944 instruct absF_reg(regF dst)
10945 %{
10946   match(Set dst (AbsF dst));
10947 
10948   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10949   ins_encode(absF_encoding(dst));
10950   ins_pipe(pipe_slow);
10951 %}
10952 
10953 instruct absD_reg(regD dst)
10954 %{
10955   match(Set dst (AbsD dst));
10956 
10957   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10958             "# abs double by sign masking" %}
10959   ins_encode(absD_encoding(dst));
10960   ins_pipe(pipe_slow);
10961 %}
10962 
10963 instruct negF_reg(regF dst)
10964 %{
10965   match(Set dst (NegF dst));
10966 
10967   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10968   ins_encode(negF_encoding(dst));
10969   ins_pipe(pipe_slow);
10970 %}
10971 
10972 instruct negD_reg(regD dst)
10973 %{
10974   match(Set dst (NegD dst));
10975 
10976   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10977             "# neg double by sign flipping" %}
10978   ins_encode(negD_encoding(dst));
10979   ins_pipe(pipe_slow);
10980 %}
10981 
10982 // -----------Trig and Trancendental Instructions------------------------------
10983 instruct cosD_reg(regD dst) %{
10984   match(Set dst (CosD dst));
10985 
10986   format %{ "dcos   $dst\n\t" %}
10987   opcode(0xD9, 0xFF);
10988   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10989   ins_pipe( pipe_slow );
10990 %}
10991 
10992 instruct sinD_reg(regD dst) %{
10993   match(Set dst (SinD dst));
10994 
10995   format %{ "dsin   $dst\n\t" %}
10996   opcode(0xD9, 0xFE);
10997   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10998   ins_pipe( pipe_slow );
10999 %}
11000 
11001 instruct tanD_reg(regD dst) %{
11002   match(Set dst (TanD dst));
11003 
11004   format %{ "dtan   $dst\n\t" %}
11005   ins_encode( Push_SrcXD(dst),
11006               Opcode(0xD9), Opcode(0xF2),   //fptan
11007               Opcode(0xDD), Opcode(0xD8),   //fstp st
11008               Push_ResultXD(dst) );
11009   ins_pipe( pipe_slow );
11010 %}
11011 
11012 instruct log10D_reg(regD dst) %{
11013   // The source and result Double operands in XMM registers
11014   match(Set dst (Log10D dst));
11015   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
11016   // fyl2x        ; compute log_10(2) * log_2(x)
11017   format %{ "fldlg2\t\t\t#Log10\n\t"
11018             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
11019          %}
11020    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
11021               Push_SrcXD(dst),
11022               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11023               Push_ResultXD(dst));
11024 
11025   ins_pipe( pipe_slow );
11026 %}
11027 
11028 instruct logD_reg(regD dst) %{
11029   // The source and result Double operands in XMM registers
11030   match(Set dst (LogD dst));
11031   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
11032   // fyl2x        ; compute log_e(2) * log_2(x)
11033   format %{ "fldln2\t\t\t#Log_e\n\t"
11034             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
11035          %}
11036   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
11037               Push_SrcXD(dst),
11038               Opcode(0xD9), Opcode(0xF1),   // fyl2x
11039               Push_ResultXD(dst));
11040   ins_pipe( pipe_slow );
11041 %}
11042 
11043 
11044 
11045 //----------Arithmetic Conversion Instructions---------------------------------
11046 
11047 instruct roundFloat_nop(regF dst)
11048 %{
11049   match(Set dst (RoundFloat dst));
11050 
11051   ins_cost(0);
11052   ins_encode();
11053   ins_pipe(empty);
11054 %}
11055 
11056 instruct roundDouble_nop(regD dst)
11057 %{
11058   match(Set dst (RoundDouble dst));
11059 
11060   ins_cost(0);
11061   ins_encode();
11062   ins_pipe(empty);
11063 %}
11064 
11065 instruct convF2D_reg_reg(regD dst, regF src)
11066 %{
11067   match(Set dst (ConvF2D src));
11068 
11069   format %{ "cvtss2sd $dst, $src" %}
11070   opcode(0xF3, 0x0F, 0x5A);
11071   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11072   ins_pipe(pipe_slow); // XXX
11073 %}
11074 
11075 instruct convF2D_reg_mem(regD dst, memory src)
11076 %{
11077   match(Set dst (ConvF2D (LoadF src)));
11078 
11079   format %{ "cvtss2sd $dst, $src" %}
11080   opcode(0xF3, 0x0F, 0x5A);
11081   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11082   ins_pipe(pipe_slow); // XXX
11083 %}
11084 
11085 instruct convD2F_reg_reg(regF dst, regD src)
11086 %{
11087   match(Set dst (ConvD2F src));
11088 
11089   format %{ "cvtsd2ss $dst, $src" %}
11090   opcode(0xF2, 0x0F, 0x5A);
11091   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11092   ins_pipe(pipe_slow); // XXX
11093 %}
11094 
11095 instruct convD2F_reg_mem(regF dst, memory src)
11096 %{
11097   match(Set dst (ConvD2F (LoadD src)));
11098 
11099   format %{ "cvtsd2ss $dst, $src" %}
11100   opcode(0xF2, 0x0F, 0x5A);
11101   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11102   ins_pipe(pipe_slow); // XXX
11103 %}
11104 
11105 // XXX do mem variants
11106 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11107 %{
11108   match(Set dst (ConvF2I src));
11109   effect(KILL cr);
11110 
11111   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11112             "cmpl    $dst, #0x80000000\n\t"
11113             "jne,s   done\n\t"
11114             "subq    rsp, #8\n\t"
11115             "movss   [rsp], $src\n\t"
11116             "call    f2i_fixup\n\t"
11117             "popq    $dst\n"
11118     "done:   "%}
11119   opcode(0xF3, 0x0F, 0x2C);
11120   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11121              f2i_fixup(dst, src));
11122   ins_pipe(pipe_slow);
11123 %}
11124 
11125 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11126 %{
11127   match(Set dst (ConvF2L src));
11128   effect(KILL cr);
11129 
11130   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11131             "cmpq    $dst, [0x8000000000000000]\n\t"
11132             "jne,s   done\n\t"
11133             "subq    rsp, #8\n\t"
11134             "movss   [rsp], $src\n\t"
11135             "call    f2l_fixup\n\t"
11136             "popq    $dst\n"
11137     "done:   "%}
11138   opcode(0xF3, 0x0F, 0x2C);
11139   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11140              f2l_fixup(dst, src));
11141   ins_pipe(pipe_slow);
11142 %}
11143 
11144 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11145 %{
11146   match(Set dst (ConvD2I src));
11147   effect(KILL cr);
11148 
11149   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11150             "cmpl    $dst, #0x80000000\n\t"
11151             "jne,s   done\n\t"
11152             "subq    rsp, #8\n\t"
11153             "movsd   [rsp], $src\n\t"
11154             "call    d2i_fixup\n\t"
11155             "popq    $dst\n"
11156     "done:   "%}
11157   opcode(0xF2, 0x0F, 0x2C);
11158   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11159              d2i_fixup(dst, src));
11160   ins_pipe(pipe_slow);
11161 %}
11162 
11163 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11164 %{
11165   match(Set dst (ConvD2L src));
11166   effect(KILL cr);
11167 
11168   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11169             "cmpq    $dst, [0x8000000000000000]\n\t"
11170             "jne,s   done\n\t"
11171             "subq    rsp, #8\n\t"
11172             "movsd   [rsp], $src\n\t"
11173             "call    d2l_fixup\n\t"
11174             "popq    $dst\n"
11175     "done:   "%}
11176   opcode(0xF2, 0x0F, 0x2C);
11177   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11178              d2l_fixup(dst, src));
11179   ins_pipe(pipe_slow);
11180 %}
11181 
11182 instruct convI2F_reg_reg(regF dst, rRegI src)
11183 %{
11184   predicate(!UseXmmI2F);
11185   match(Set dst (ConvI2F src));
11186 
11187   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11188   opcode(0xF3, 0x0F, 0x2A);
11189   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11190   ins_pipe(pipe_slow); // XXX
11191 %}
11192 
11193 instruct convI2F_reg_mem(regF dst, memory src)
11194 %{
11195   match(Set dst (ConvI2F (LoadI src)));
11196 
11197   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11198   opcode(0xF3, 0x0F, 0x2A);
11199   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11200   ins_pipe(pipe_slow); // XXX
11201 %}
11202 
11203 instruct convI2D_reg_reg(regD dst, rRegI src)
11204 %{
11205   predicate(!UseXmmI2D);
11206   match(Set dst (ConvI2D src));
11207 
11208   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11209   opcode(0xF2, 0x0F, 0x2A);
11210   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11211   ins_pipe(pipe_slow); // XXX
11212 %}
11213 
11214 instruct convI2D_reg_mem(regD dst, memory src)
11215 %{
11216   match(Set dst (ConvI2D (LoadI src)));
11217 
11218   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11219   opcode(0xF2, 0x0F, 0x2A);
11220   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11221   ins_pipe(pipe_slow); // XXX
11222 %}
11223 
11224 instruct convXI2F_reg(regF dst, rRegI src)
11225 %{
11226   predicate(UseXmmI2F);
11227   match(Set dst (ConvI2F src));
11228 
11229   format %{ "movdl $dst, $src\n\t"
11230             "cvtdq2psl $dst, $dst\t# i2f" %}
11231   ins_encode %{
11232     __ movdl($dst$$XMMRegister, $src$$Register);
11233     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11234   %}
11235   ins_pipe(pipe_slow); // XXX
11236 %}
11237 
11238 instruct convXI2D_reg(regD dst, rRegI src)
11239 %{
11240   predicate(UseXmmI2D);
11241   match(Set dst (ConvI2D src));
11242 
11243   format %{ "movdl $dst, $src\n\t"
11244             "cvtdq2pdl $dst, $dst\t# i2d" %}
11245   ins_encode %{
11246     __ movdl($dst$$XMMRegister, $src$$Register);
11247     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11248   %}
11249   ins_pipe(pipe_slow); // XXX
11250 %}
11251 
11252 instruct convL2F_reg_reg(regF dst, rRegL src)
11253 %{
11254   match(Set dst (ConvL2F src));
11255 
11256   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11257   opcode(0xF3, 0x0F, 0x2A);
11258   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11259   ins_pipe(pipe_slow); // XXX
11260 %}
11261 
11262 instruct convL2F_reg_mem(regF dst, memory src)
11263 %{
11264   match(Set dst (ConvL2F (LoadL src)));
11265 
11266   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11267   opcode(0xF3, 0x0F, 0x2A);
11268   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11269   ins_pipe(pipe_slow); // XXX
11270 %}
11271 
11272 instruct convL2D_reg_reg(regD dst, rRegL src)
11273 %{
11274   match(Set dst (ConvL2D src));
11275 
11276   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11277   opcode(0xF2, 0x0F, 0x2A);
11278   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11279   ins_pipe(pipe_slow); // XXX
11280 %}
11281 
11282 instruct convL2D_reg_mem(regD dst, memory src)
11283 %{
11284   match(Set dst (ConvL2D (LoadL src)));
11285 
11286   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11287   opcode(0xF2, 0x0F, 0x2A);
11288   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11289   ins_pipe(pipe_slow); // XXX
11290 %}
11291 
11292 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11293 %{
11294   match(Set dst (ConvI2L src));
11295 
11296   ins_cost(125);
11297   format %{ "movslq  $dst, $src\t# i2l" %}
11298   ins_encode %{
11299     __ movslq($dst$$Register, $src$$Register);
11300   %}
11301   ins_pipe(ialu_reg_reg);
11302 %}
11303 
11304 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11305 // %{
11306 //   match(Set dst (ConvI2L src));
11307 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11308 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11309 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11310 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11311 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11312 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11313 
11314 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11315 //   ins_encode(enc_copy(dst, src));
11316 // //   opcode(0x63); // needs REX.W
11317 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11318 //   ins_pipe(ialu_reg_reg);
11319 // %}
11320 
11321 // Zero-extend convert int to long
11322 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11323 %{
11324   match(Set dst (AndL (ConvI2L src) mask));
11325 
11326   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11327   ins_encode(enc_copy(dst, src));
11328   ins_pipe(ialu_reg_reg);
11329 %}
11330 
11331 // Zero-extend convert int to long
11332 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11333 %{
11334   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11335 
11336   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11337   opcode(0x8B);
11338   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11339   ins_pipe(ialu_reg_mem);
11340 %}
11341 
11342 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11343 %{
11344   match(Set dst (AndL src mask));
11345 
11346   format %{ "movl    $dst, $src\t# zero-extend long" %}
11347   ins_encode(enc_copy_always(dst, src));
11348   ins_pipe(ialu_reg_reg);
11349 %}
11350 
11351 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11352 %{
11353   match(Set dst (ConvL2I src));
11354 
11355   format %{ "movl    $dst, $src\t# l2i" %}
11356   ins_encode(enc_copy_always(dst, src));
11357   ins_pipe(ialu_reg_reg);
11358 %}
11359 
11360 
11361 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11362   match(Set dst (MoveF2I src));
11363   effect(DEF dst, USE src);
11364 
11365   ins_cost(125);
11366   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11367   opcode(0x8B);
11368   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11369   ins_pipe(ialu_reg_mem);
11370 %}
11371 
11372 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11373   match(Set dst (MoveI2F src));
11374   effect(DEF dst, USE src);
11375 
11376   ins_cost(125);
11377   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11378   opcode(0xF3, 0x0F, 0x10);
11379   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11380   ins_pipe(pipe_slow);
11381 %}
11382 
11383 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11384   match(Set dst (MoveD2L src));
11385   effect(DEF dst, USE src);
11386 
11387   ins_cost(125);
11388   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11389   opcode(0x8B);
11390   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11391   ins_pipe(ialu_reg_mem);
11392 %}
11393 
11394 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11395   predicate(!UseXmmLoadAndClearUpper);
11396   match(Set dst (MoveL2D src));
11397   effect(DEF dst, USE src);
11398 
11399   ins_cost(125);
11400   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11401   opcode(0x66, 0x0F, 0x12);
11402   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11403   ins_pipe(pipe_slow);
11404 %}
11405 
11406 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11407   predicate(UseXmmLoadAndClearUpper);
11408   match(Set dst (MoveL2D src));
11409   effect(DEF dst, USE src);
11410 
11411   ins_cost(125);
11412   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11413   opcode(0xF2, 0x0F, 0x10);
11414   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11415   ins_pipe(pipe_slow);
11416 %}
11417 
11418 
11419 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11420   match(Set dst (MoveF2I src));
11421   effect(DEF dst, USE src);
11422 
11423   ins_cost(95); // XXX
11424   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11425   opcode(0xF3, 0x0F, 0x11);
11426   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11427   ins_pipe(pipe_slow);
11428 %}
11429 
11430 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11431   match(Set dst (MoveI2F src));
11432   effect(DEF dst, USE src);
11433 
11434   ins_cost(100);
11435   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11436   opcode(0x89);
11437   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11438   ins_pipe( ialu_mem_reg );
11439 %}
11440 
11441 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11442   match(Set dst (MoveD2L src));
11443   effect(DEF dst, USE src);
11444 
11445   ins_cost(95); // XXX
11446   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11447   opcode(0xF2, 0x0F, 0x11);
11448   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11449   ins_pipe(pipe_slow);
11450 %}
11451 
11452 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11453   match(Set dst (MoveL2D src));
11454   effect(DEF dst, USE src);
11455 
11456   ins_cost(100);
11457   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11458   opcode(0x89);
11459   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11460   ins_pipe(ialu_mem_reg);
11461 %}
11462 
11463 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11464   match(Set dst (MoveF2I src));
11465   effect(DEF dst, USE src);
11466   ins_cost(85);
11467   format %{ "movd    $dst,$src\t# MoveF2I" %}
11468   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11469   ins_pipe( pipe_slow );
11470 %}
11471 
11472 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11473   match(Set dst (MoveD2L src));
11474   effect(DEF dst, USE src);
11475   ins_cost(85);
11476   format %{ "movd    $dst,$src\t# MoveD2L" %}
11477   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11478   ins_pipe( pipe_slow );
11479 %}
11480 
11481 // The next instructions have long latency and use Int unit. Set high cost.
11482 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11483   match(Set dst (MoveI2F src));
11484   effect(DEF dst, USE src);
11485   ins_cost(300);
11486   format %{ "movd    $dst,$src\t# MoveI2F" %}
11487   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11488   ins_pipe( pipe_slow );
11489 %}
11490 
11491 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11492   match(Set dst (MoveL2D src));
11493   effect(DEF dst, USE src);
11494   ins_cost(300);
11495   format %{ "movd    $dst,$src\t# MoveL2D" %}
11496   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11497   ins_pipe( pipe_slow );
11498 %}
11499 
11500 // Replicate scalar to packed byte (1 byte) values in xmm
11501 instruct Repl8B_reg(regD dst, regD src) %{
11502   match(Set dst (Replicate8B src));
11503   format %{ "MOVDQA  $dst,$src\n\t"
11504             "PUNPCKLBW $dst,$dst\n\t"
11505             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11506   ins_encode( pshufd_8x8(dst, src));
11507   ins_pipe( pipe_slow );
11508 %}
11509 
11510 // Replicate scalar to packed byte (1 byte) values in xmm
11511 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11512   match(Set dst (Replicate8B src));
11513   format %{ "MOVD    $dst,$src\n\t"
11514             "PUNPCKLBW $dst,$dst\n\t"
11515             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11516   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11517   ins_pipe( pipe_slow );
11518 %}
11519 
11520 // Replicate scalar zero to packed byte (1 byte) values in xmm
11521 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11522   match(Set dst (Replicate8B zero));
11523   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11524   ins_encode( pxor(dst, dst));
11525   ins_pipe( fpu_reg_reg );
11526 %}
11527 
11528 // Replicate scalar to packed shore (2 byte) values in xmm
11529 instruct Repl4S_reg(regD dst, regD src) %{
11530   match(Set dst (Replicate4S src));
11531   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11532   ins_encode( pshufd_4x16(dst, src));
11533   ins_pipe( fpu_reg_reg );
11534 %}
11535 
11536 // Replicate scalar to packed shore (2 byte) values in xmm
11537 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11538   match(Set dst (Replicate4S src));
11539   format %{ "MOVD    $dst,$src\n\t"
11540             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11541   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11542   ins_pipe( fpu_reg_reg );
11543 %}
11544 
11545 // Replicate scalar zero to packed short (2 byte) values in xmm
11546 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11547   match(Set dst (Replicate4S zero));
11548   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11549   ins_encode( pxor(dst, dst));
11550   ins_pipe( fpu_reg_reg );
11551 %}
11552 
11553 // Replicate scalar to packed char (2 byte) values in xmm
11554 instruct Repl4C_reg(regD dst, regD src) %{
11555   match(Set dst (Replicate4C src));
11556   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11557   ins_encode( pshufd_4x16(dst, src));
11558   ins_pipe( fpu_reg_reg );
11559 %}
11560 
11561 // Replicate scalar to packed char (2 byte) values in xmm
11562 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11563   match(Set dst (Replicate4C src));
11564   format %{ "MOVD    $dst,$src\n\t"
11565             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11566   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11567   ins_pipe( fpu_reg_reg );
11568 %}
11569 
11570 // Replicate scalar zero to packed char (2 byte) values in xmm
11571 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11572   match(Set dst (Replicate4C zero));
11573   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11574   ins_encode( pxor(dst, dst));
11575   ins_pipe( fpu_reg_reg );
11576 %}
11577 
11578 // Replicate scalar to packed integer (4 byte) values in xmm
11579 instruct Repl2I_reg(regD dst, regD src) %{
11580   match(Set dst (Replicate2I src));
11581   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11582   ins_encode( pshufd(dst, src, 0x00));
11583   ins_pipe( fpu_reg_reg );
11584 %}
11585 
11586 // Replicate scalar to packed integer (4 byte) values in xmm
11587 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11588   match(Set dst (Replicate2I src));
11589   format %{ "MOVD   $dst,$src\n\t"
11590             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11591   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11592   ins_pipe( fpu_reg_reg );
11593 %}
11594 
11595 // Replicate scalar zero to packed integer (2 byte) values in xmm
11596 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11597   match(Set dst (Replicate2I zero));
11598   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11599   ins_encode( pxor(dst, dst));
11600   ins_pipe( fpu_reg_reg );
11601 %}
11602 
11603 // Replicate scalar to packed single precision floating point values in xmm
11604 instruct Repl2F_reg(regD dst, regD src) %{
11605   match(Set dst (Replicate2F src));
11606   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11607   ins_encode( pshufd(dst, src, 0xe0));
11608   ins_pipe( fpu_reg_reg );
11609 %}
11610 
11611 // Replicate scalar to packed single precision floating point values in xmm
11612 instruct Repl2F_regF(regD dst, regF src) %{
11613   match(Set dst (Replicate2F src));
11614   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11615   ins_encode( pshufd(dst, src, 0xe0));
11616   ins_pipe( fpu_reg_reg );
11617 %}
11618 
11619 // Replicate scalar to packed single precision floating point values in xmm
11620 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11621   match(Set dst (Replicate2F zero));
11622   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11623   ins_encode( pxor(dst, dst));
11624   ins_pipe( fpu_reg_reg );
11625 %}
11626 
11627 
11628 // =======================================================================
11629 // fast clearing of an array
11630 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11631                   rFlagsReg cr)
11632 %{
11633   match(Set dummy (ClearArray cnt base));
11634   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11635 
11636   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11637             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11638   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11639              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11640   ins_pipe(pipe_slow);
11641 %}
11642 
11643 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
11644                         rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
11645 %{
11646   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11647   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11648 
11649   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11650   ins_encode %{
11651     __ string_compare($str1$$Register, $str2$$Register,
11652                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11653                       $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11654   %}
11655   ins_pipe( pipe_slow );
11656 %}
11657 
11658 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11659                         rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
11660 %{
11661   predicate(UseSSE42Intrinsics);
11662   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11663   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
11664 
11665   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11666   ins_encode %{
11667     __ string_indexof($str1$$Register, $str2$$Register,
11668                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11669                       $tmp1$$XMMRegister, $tmp2$$Register);
11670   %}
11671   ins_pipe( pipe_slow );
11672 %}
11673 
11674 // fast string equals
11675 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11676                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11677 %{
11678   match(Set result (StrEquals (Binary str1 str2) cnt));
11679   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11680 
11681   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11682   ins_encode %{
11683     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11684                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11685                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11686   %}
11687   ins_pipe( pipe_slow );
11688 %}
11689 
11690 // fast array equals
11691 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11692                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11693 %{
11694   match(Set result (AryEq ary1 ary2));
11695   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11696   //ins_cost(300);
11697 
11698   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11699   ins_encode %{
11700     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11701                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11702                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11703   %}
11704   ins_pipe( pipe_slow );
11705 %}
11706 
11707 //----------Control Flow Instructions------------------------------------------
11708 // Signed compare Instructions
11709 
11710 // XXX more variants!!
11711 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11712 %{
11713   match(Set cr (CmpI op1 op2));
11714   effect(DEF cr, USE op1, USE op2);
11715 
11716   format %{ "cmpl    $op1, $op2" %}
11717   opcode(0x3B);  /* Opcode 3B /r */
11718   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11719   ins_pipe(ialu_cr_reg_reg);
11720 %}
11721 
11722 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11723 %{
11724   match(Set cr (CmpI op1 op2));
11725 
11726   format %{ "cmpl    $op1, $op2" %}
11727   opcode(0x81, 0x07); /* Opcode 81 /7 */
11728   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11729   ins_pipe(ialu_cr_reg_imm);
11730 %}
11731 
11732 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11733 %{
11734   match(Set cr (CmpI op1 (LoadI op2)));
11735 
11736   ins_cost(500); // XXX
11737   format %{ "cmpl    $op1, $op2" %}
11738   opcode(0x3B); /* Opcode 3B /r */
11739   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11740   ins_pipe(ialu_cr_reg_mem);
11741 %}
11742 
11743 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11744 %{
11745   match(Set cr (CmpI src zero));
11746 
11747   format %{ "testl   $src, $src" %}
11748   opcode(0x85);
11749   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11750   ins_pipe(ialu_cr_reg_imm);
11751 %}
11752 
11753 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11754 %{
11755   match(Set cr (CmpI (AndI src con) zero));
11756 
11757   format %{ "testl   $src, $con" %}
11758   opcode(0xF7, 0x00);
11759   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11760   ins_pipe(ialu_cr_reg_imm);
11761 %}
11762 
11763 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11764 %{
11765   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11766 
11767   format %{ "testl   $src, $mem" %}
11768   opcode(0x85);
11769   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11770   ins_pipe(ialu_cr_reg_mem);
11771 %}
11772 
11773 // Unsigned compare Instructions; really, same as signed except they
11774 // produce an rFlagsRegU instead of rFlagsReg.
11775 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11776 %{
11777   match(Set cr (CmpU op1 op2));
11778 
11779   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11780   opcode(0x3B); /* Opcode 3B /r */
11781   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11782   ins_pipe(ialu_cr_reg_reg);
11783 %}
11784 
11785 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11786 %{
11787   match(Set cr (CmpU op1 op2));
11788 
11789   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11790   opcode(0x81,0x07); /* Opcode 81 /7 */
11791   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11792   ins_pipe(ialu_cr_reg_imm);
11793 %}
11794 
11795 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11796 %{
11797   match(Set cr (CmpU op1 (LoadI op2)));
11798 
11799   ins_cost(500); // XXX
11800   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11801   opcode(0x3B); /* Opcode 3B /r */
11802   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11803   ins_pipe(ialu_cr_reg_mem);
11804 %}
11805 
11806 // // // Cisc-spilled version of cmpU_rReg
11807 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11808 // //%{
11809 // //  match(Set cr (CmpU (LoadI op1) op2));
11810 // //
11811 // //  format %{ "CMPu   $op1,$op2" %}
11812 // //  ins_cost(500);
11813 // //  opcode(0x39);  /* Opcode 39 /r */
11814 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11815 // //%}
11816 
11817 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11818 %{
11819   match(Set cr (CmpU src zero));
11820 
11821   format %{ "testl  $src, $src\t# unsigned" %}
11822   opcode(0x85);
11823   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11824   ins_pipe(ialu_cr_reg_imm);
11825 %}
11826 
11827 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11828 %{
11829   match(Set cr (CmpP op1 op2));
11830 
11831   format %{ "cmpq    $op1, $op2\t# ptr" %}
11832   opcode(0x3B); /* Opcode 3B /r */
11833   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11834   ins_pipe(ialu_cr_reg_reg);
11835 %}
11836 
11837 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11838 %{
11839   match(Set cr (CmpP op1 (LoadP op2)));
11840 
11841   ins_cost(500); // XXX
11842   format %{ "cmpq    $op1, $op2\t# ptr" %}
11843   opcode(0x3B); /* Opcode 3B /r */
11844   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11845   ins_pipe(ialu_cr_reg_mem);
11846 %}
11847 
11848 // // // Cisc-spilled version of cmpP_rReg
11849 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11850 // //%{
11851 // //  match(Set cr (CmpP (LoadP op1) op2));
11852 // //
11853 // //  format %{ "CMPu   $op1,$op2" %}
11854 // //  ins_cost(500);
11855 // //  opcode(0x39);  /* Opcode 39 /r */
11856 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11857 // //%}
11858 
11859 // XXX this is generalized by compP_rReg_mem???
11860 // Compare raw pointer (used in out-of-heap check).
11861 // Only works because non-oop pointers must be raw pointers
11862 // and raw pointers have no anti-dependencies.
11863 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11864 %{
11865   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11866   match(Set cr (CmpP op1 (LoadP op2)));
11867 
11868   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11869   opcode(0x3B); /* Opcode 3B /r */
11870   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11871   ins_pipe(ialu_cr_reg_mem);
11872 %}
11873 
11874 // This will generate a signed flags result. This should be OK since
11875 // any compare to a zero should be eq/neq.
11876 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11877 %{
11878   match(Set cr (CmpP src zero));
11879 
11880   format %{ "testq   $src, $src\t# ptr" %}
11881   opcode(0x85);
11882   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11883   ins_pipe(ialu_cr_reg_imm);
11884 %}
11885 
11886 // This will generate a signed flags result. This should be OK since
11887 // any compare to a zero should be eq/neq.
11888 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11889 %{
11890   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11891   match(Set cr (CmpP (LoadP op) zero));
11892 
11893   ins_cost(500); // XXX
11894   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11895   opcode(0xF7); /* Opcode F7 /0 */
11896   ins_encode(REX_mem_wide(op),
11897              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11898   ins_pipe(ialu_cr_reg_imm);
11899 %}
11900 
11901 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11902 %{
11903   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11904   match(Set cr (CmpP (LoadP mem) zero));
11905 
11906   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11907   ins_encode %{
11908     __ cmpq(r12, $mem$$Address);
11909   %}
11910   ins_pipe(ialu_cr_reg_mem);
11911 %}
11912 
11913 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11914 %{
11915   match(Set cr (CmpN op1 op2));
11916 
11917   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11918   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11919   ins_pipe(ialu_cr_reg_reg);
11920 %}
11921 
11922 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11923 %{
11924   match(Set cr (CmpN src (LoadN mem)));
11925 
11926   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11927   ins_encode %{
11928     __ cmpl($src$$Register, $mem$$Address);
11929   %}
11930   ins_pipe(ialu_cr_reg_mem);
11931 %}
11932 
11933 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11934   match(Set cr (CmpN op1 op2));
11935 
11936   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11937   ins_encode %{
11938     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11939   %}
11940   ins_pipe(ialu_cr_reg_imm);
11941 %}
11942 
11943 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11944 %{
11945   match(Set cr (CmpN src (LoadN mem)));
11946 
11947   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11948   ins_encode %{
11949     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11950   %}
11951   ins_pipe(ialu_cr_reg_mem);
11952 %}
11953 
11954 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11955   match(Set cr (CmpN src zero));
11956 
11957   format %{ "testl   $src, $src\t# compressed ptr" %}
11958   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11959   ins_pipe(ialu_cr_reg_imm);
11960 %}
11961 
11962 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11963 %{
11964   predicate(Universe::narrow_oop_base() != NULL);
11965   match(Set cr (CmpN (LoadN mem) zero));
11966 
11967   ins_cost(500); // XXX
11968   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11969   ins_encode %{
11970     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11971   %}
11972   ins_pipe(ialu_cr_reg_mem);
11973 %}
11974 
11975 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11976 %{
11977   predicate(Universe::narrow_oop_base() == NULL);
11978   match(Set cr (CmpN (LoadN mem) zero));
11979 
11980   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11981   ins_encode %{
11982     __ cmpl(r12, $mem$$Address);
11983   %}
11984   ins_pipe(ialu_cr_reg_mem);
11985 %}
11986 
11987 // Yanked all unsigned pointer compare operations.
11988 // Pointer compares are done with CmpP which is already unsigned.
11989 
11990 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11991 %{
11992   match(Set cr (CmpL op1 op2));
11993 
11994   format %{ "cmpq    $op1, $op2" %}
11995   opcode(0x3B);  /* Opcode 3B /r */
11996   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11997   ins_pipe(ialu_cr_reg_reg);
11998 %}
11999 
12000 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12001 %{
12002   match(Set cr (CmpL op1 op2));
12003 
12004   format %{ "cmpq    $op1, $op2" %}
12005   opcode(0x81, 0x07); /* Opcode 81 /7 */
12006   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12007   ins_pipe(ialu_cr_reg_imm);
12008 %}
12009 
12010 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12011 %{
12012   match(Set cr (CmpL op1 (LoadL op2)));
12013 
12014   format %{ "cmpq    $op1, $op2" %}
12015   opcode(0x3B); /* Opcode 3B /r */
12016   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12017   ins_pipe(ialu_cr_reg_mem);
12018 %}
12019 
12020 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12021 %{
12022   match(Set cr (CmpL src zero));
12023 
12024   format %{ "testq   $src, $src" %}
12025   opcode(0x85);
12026   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12027   ins_pipe(ialu_cr_reg_imm);
12028 %}
12029 
12030 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12031 %{
12032   match(Set cr (CmpL (AndL src con) zero));
12033 
12034   format %{ "testq   $src, $con\t# long" %}
12035   opcode(0xF7, 0x00);
12036   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12037   ins_pipe(ialu_cr_reg_imm);
12038 %}
12039 
12040 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12041 %{
12042   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12043 
12044   format %{ "testq   $src, $mem" %}
12045   opcode(0x85);
12046   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12047   ins_pipe(ialu_cr_reg_mem);
12048 %}
12049 
12050 // Manifest a CmpL result in an integer register.  Very painful.
12051 // This is the test to avoid.
12052 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12053 %{
12054   match(Set dst (CmpL3 src1 src2));
12055   effect(KILL flags);
12056 
12057   ins_cost(275); // XXX
12058   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12059             "movl    $dst, -1\n\t"
12060             "jl,s    done\n\t"
12061             "setne   $dst\n\t"
12062             "movzbl  $dst, $dst\n\t"
12063     "done:" %}
12064   ins_encode(cmpl3_flag(src1, src2, dst));
12065   ins_pipe(pipe_slow);
12066 %}
12067 
12068 //----------Max and Min--------------------------------------------------------
12069 // Min Instructions
12070 
12071 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12072 %{
12073   effect(USE_DEF dst, USE src, USE cr);
12074 
12075   format %{ "cmovlgt $dst, $src\t# min" %}
12076   opcode(0x0F, 0x4F);
12077   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12078   ins_pipe(pipe_cmov_reg);
12079 %}
12080 
12081 
12082 instruct minI_rReg(rRegI dst, rRegI src)
12083 %{
12084   match(Set dst (MinI dst src));
12085 
12086   ins_cost(200);
12087   expand %{
12088     rFlagsReg cr;
12089     compI_rReg(cr, dst, src);
12090     cmovI_reg_g(dst, src, cr);
12091   %}
12092 %}
12093 
12094 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12095 %{
12096   effect(USE_DEF dst, USE src, USE cr);
12097 
12098   format %{ "cmovllt $dst, $src\t# max" %}
12099   opcode(0x0F, 0x4C);
12100   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12101   ins_pipe(pipe_cmov_reg);
12102 %}
12103 
12104 
12105 instruct maxI_rReg(rRegI dst, rRegI src)
12106 %{
12107   match(Set dst (MaxI dst src));
12108 
12109   ins_cost(200);
12110   expand %{
12111     rFlagsReg cr;
12112     compI_rReg(cr, dst, src);
12113     cmovI_reg_l(dst, src, cr);
12114   %}
12115 %}
12116 
12117 // ============================================================================
12118 // Branch Instructions
12119 
12120 // Jump Direct - Label defines a relative address from JMP+1
12121 instruct jmpDir(label labl)
12122 %{
12123   match(Goto);
12124   effect(USE labl);
12125 
12126   ins_cost(300);
12127   format %{ "jmp     $labl" %}
12128   size(5);
12129   opcode(0xE9);
12130   ins_encode(OpcP, Lbl(labl));
12131   ins_pipe(pipe_jmp);
12132   ins_pc_relative(1);
12133 %}
12134 
12135 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12136 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12137 %{
12138   match(If cop cr);
12139   effect(USE labl);
12140 
12141   ins_cost(300);
12142   format %{ "j$cop     $labl" %}
12143   size(6);
12144   opcode(0x0F, 0x80);
12145   ins_encode(Jcc(cop, labl));
12146   ins_pipe(pipe_jcc);
12147   ins_pc_relative(1);
12148 %}
12149 
12150 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12151 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12152 %{
12153   match(CountedLoopEnd cop cr);
12154   effect(USE labl);
12155 
12156   ins_cost(300);
12157   format %{ "j$cop     $labl\t# loop end" %}
12158   size(6);
12159   opcode(0x0F, 0x80);
12160   ins_encode(Jcc(cop, labl));
12161   ins_pipe(pipe_jcc);
12162   ins_pc_relative(1);
12163 %}
12164 
12165 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12166 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12167   match(CountedLoopEnd cop cmp);
12168   effect(USE labl);
12169 
12170   ins_cost(300);
12171   format %{ "j$cop,u   $labl\t# loop end" %}
12172   size(6);
12173   opcode(0x0F, 0x80);
12174   ins_encode(Jcc(cop, labl));
12175   ins_pipe(pipe_jcc);
12176   ins_pc_relative(1);
12177 %}
12178 
12179 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12180   match(CountedLoopEnd cop cmp);
12181   effect(USE labl);
12182 
12183   ins_cost(200);
12184   format %{ "j$cop,u   $labl\t# loop end" %}
12185   size(6);
12186   opcode(0x0F, 0x80);
12187   ins_encode(Jcc(cop, labl));
12188   ins_pipe(pipe_jcc);
12189   ins_pc_relative(1);
12190 %}
12191 
12192 // Jump Direct Conditional - using unsigned comparison
12193 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12194   match(If cop cmp);
12195   effect(USE labl);
12196 
12197   ins_cost(300);
12198   format %{ "j$cop,u  $labl" %}
12199   size(6);
12200   opcode(0x0F, 0x80);
12201   ins_encode(Jcc(cop, labl));
12202   ins_pipe(pipe_jcc);
12203   ins_pc_relative(1);
12204 %}
12205 
12206 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12207   match(If cop cmp);
12208   effect(USE labl);
12209 
12210   ins_cost(200);
12211   format %{ "j$cop,u  $labl" %}
12212   size(6);
12213   opcode(0x0F, 0x80);
12214   ins_encode(Jcc(cop, labl));
12215   ins_pipe(pipe_jcc);
12216   ins_pc_relative(1);
12217 %}
12218 
12219 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12220   match(If cop cmp);
12221   effect(USE labl);
12222 
12223   ins_cost(200);
12224   format %{ $$template
12225     if ($cop$$cmpcode == Assembler::notEqual) {
12226       $$emit$$"jp,u   $labl\n\t"
12227       $$emit$$"j$cop,u   $labl"
12228     } else {
12229       $$emit$$"jp,u   done\n\t"
12230       $$emit$$"j$cop,u   $labl\n\t"
12231       $$emit$$"done:"
12232     }
12233   %}
12234   size(12);
12235   opcode(0x0F, 0x80);
12236   ins_encode %{
12237     Label* l = $labl$$label;
12238     $$$emit8$primary;
12239     emit_cc(cbuf, $secondary, Assembler::parity);
12240     int parity_disp = -1;
12241     if ($cop$$cmpcode == Assembler::notEqual) {
12242        // the two jumps 6 bytes apart so the jump distances are too
12243        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12244     } else if ($cop$$cmpcode == Assembler::equal) {
12245        parity_disp = 6;
12246     } else {
12247        ShouldNotReachHere();
12248     }
12249     emit_d32(cbuf, parity_disp);
12250     $$$emit8$primary;
12251     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12252     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12253     emit_d32(cbuf, disp);
12254   %}
12255   ins_pipe(pipe_jcc);
12256   ins_pc_relative(1);
12257 %}
12258 
12259 // ============================================================================
12260 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12261 // superklass array for an instance of the superklass.  Set a hidden
12262 // internal cache on a hit (cache is checked with exposed code in
12263 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12264 // encoding ALSO sets flags.
12265 
12266 instruct partialSubtypeCheck(rdi_RegP result,
12267                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12268                              rFlagsReg cr)
12269 %{
12270   match(Set result (PartialSubtypeCheck sub super));
12271   effect(KILL rcx, KILL cr);
12272 
12273   ins_cost(1100);  // slightly larger than the next version
12274   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12275             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12276             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12277             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12278             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12279             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12280             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12281     "miss:\t" %}
12282 
12283   opcode(0x1); // Force a XOR of RDI
12284   ins_encode(enc_PartialSubtypeCheck());
12285   ins_pipe(pipe_slow);
12286 %}
12287 
12288 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12289                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12290                                      immP0 zero,
12291                                      rdi_RegP result)
12292 %{
12293   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12294   effect(KILL rcx, KILL result);
12295 
12296   ins_cost(1000);
12297   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12298             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12299             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12300             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12301             "jne,s   miss\t\t# Missed: flags nz\n\t"
12302             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12303     "miss:\t" %}
12304 
12305   opcode(0x0); // No need to XOR RDI
12306   ins_encode(enc_PartialSubtypeCheck());
12307   ins_pipe(pipe_slow);
12308 %}
12309 
12310 // ============================================================================
12311 // Branch Instructions -- short offset versions
12312 //
12313 // These instructions are used to replace jumps of a long offset (the default
12314 // match) with jumps of a shorter offset.  These instructions are all tagged
12315 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12316 // match rules in general matching.  Instead, the ADLC generates a conversion
12317 // method in the MachNode which can be used to do in-place replacement of the
12318 // long variant with the shorter variant.  The compiler will determine if a
12319 // branch can be taken by the is_short_branch_offset() predicate in the machine
12320 // specific code section of the file.
12321 
12322 // Jump Direct - Label defines a relative address from JMP+1
12323 instruct jmpDir_short(label labl) %{
12324   match(Goto);
12325   effect(USE labl);
12326 
12327   ins_cost(300);
12328   format %{ "jmp,s   $labl" %}
12329   size(2);
12330   opcode(0xEB);
12331   ins_encode(OpcP, LblShort(labl));
12332   ins_pipe(pipe_jmp);
12333   ins_pc_relative(1);
12334   ins_short_branch(1);
12335 %}
12336 
12337 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12338 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12339   match(If cop cr);
12340   effect(USE labl);
12341 
12342   ins_cost(300);
12343   format %{ "j$cop,s   $labl" %}
12344   size(2);
12345   opcode(0x70);
12346   ins_encode(JccShort(cop, labl));
12347   ins_pipe(pipe_jcc);
12348   ins_pc_relative(1);
12349   ins_short_branch(1);
12350 %}
12351 
12352 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12353 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12354   match(CountedLoopEnd cop cr);
12355   effect(USE labl);
12356 
12357   ins_cost(300);
12358   format %{ "j$cop,s   $labl\t# loop end" %}
12359   size(2);
12360   opcode(0x70);
12361   ins_encode(JccShort(cop, labl));
12362   ins_pipe(pipe_jcc);
12363   ins_pc_relative(1);
12364   ins_short_branch(1);
12365 %}
12366 
12367 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12368 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12369   match(CountedLoopEnd cop cmp);
12370   effect(USE labl);
12371 
12372   ins_cost(300);
12373   format %{ "j$cop,us  $labl\t# loop end" %}
12374   size(2);
12375   opcode(0x70);
12376   ins_encode(JccShort(cop, labl));
12377   ins_pipe(pipe_jcc);
12378   ins_pc_relative(1);
12379   ins_short_branch(1);
12380 %}
12381 
12382 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12383   match(CountedLoopEnd cop cmp);
12384   effect(USE labl);
12385 
12386   ins_cost(300);
12387   format %{ "j$cop,us  $labl\t# loop end" %}
12388   size(2);
12389   opcode(0x70);
12390   ins_encode(JccShort(cop, labl));
12391   ins_pipe(pipe_jcc);
12392   ins_pc_relative(1);
12393   ins_short_branch(1);
12394 %}
12395 
12396 // Jump Direct Conditional - using unsigned comparison
12397 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12398   match(If cop cmp);
12399   effect(USE labl);
12400 
12401   ins_cost(300);
12402   format %{ "j$cop,us  $labl" %}
12403   size(2);
12404   opcode(0x70);
12405   ins_encode(JccShort(cop, labl));
12406   ins_pipe(pipe_jcc);
12407   ins_pc_relative(1);
12408   ins_short_branch(1);
12409 %}
12410 
12411 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12412   match(If cop cmp);
12413   effect(USE labl);
12414 
12415   ins_cost(300);
12416   format %{ "j$cop,us  $labl" %}
12417   size(2);
12418   opcode(0x70);
12419   ins_encode(JccShort(cop, labl));
12420   ins_pipe(pipe_jcc);
12421   ins_pc_relative(1);
12422   ins_short_branch(1);
12423 %}
12424 
12425 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12426   match(If cop cmp);
12427   effect(USE labl);
12428 
12429   ins_cost(300);
12430   format %{ $$template
12431     if ($cop$$cmpcode == Assembler::notEqual) {
12432       $$emit$$"jp,u,s   $labl\n\t"
12433       $$emit$$"j$cop,u,s   $labl"
12434     } else {
12435       $$emit$$"jp,u,s   done\n\t"
12436       $$emit$$"j$cop,u,s  $labl\n\t"
12437       $$emit$$"done:"
12438     }
12439   %}
12440   size(4);
12441   opcode(0x70);
12442   ins_encode %{
12443     Label* l = $labl$$label;
12444     emit_cc(cbuf, $primary, Assembler::parity);
12445     int parity_disp = -1;
12446     if ($cop$$cmpcode == Assembler::notEqual) {
12447       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12448     } else if ($cop$$cmpcode == Assembler::equal) {
12449       parity_disp = 2;
12450     } else {
12451       ShouldNotReachHere();
12452     }
12453     emit_d8(cbuf, parity_disp);
12454     emit_cc(cbuf, $primary, $cop$$cmpcode);
12455     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12456     emit_d8(cbuf, disp);
12457     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12458     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12459   %}
12460   ins_pipe(pipe_jcc);
12461   ins_pc_relative(1);
12462   ins_short_branch(1);
12463 %}
12464 
12465 // ============================================================================
12466 // inlined locking and unlocking
12467 
12468 instruct cmpFastLock(rFlagsReg cr,
12469                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12470 %{
12471   match(Set cr (FastLock object box));
12472   effect(TEMP tmp, TEMP scr);
12473 
12474   ins_cost(300);
12475   format %{ "fastlock $object,$box,$tmp,$scr" %}
12476   ins_encode(Fast_Lock(object, box, tmp, scr));
12477   ins_pipe(pipe_slow);
12478   ins_pc_relative(1);
12479 %}
12480 
12481 instruct cmpFastUnlock(rFlagsReg cr,
12482                        rRegP object, rax_RegP box, rRegP tmp)
12483 %{
12484   match(Set cr (FastUnlock object box));
12485   effect(TEMP tmp);
12486 
12487   ins_cost(300);
12488   format %{ "fastunlock $object, $box, $tmp" %}
12489   ins_encode(Fast_Unlock(object, box, tmp));
12490   ins_pipe(pipe_slow);
12491   ins_pc_relative(1);
12492 %}
12493 
12494 
12495 // ============================================================================
12496 // Safepoint Instructions
12497 instruct safePoint_poll(rFlagsReg cr)
12498 %{
12499   match(SafePoint);
12500   effect(KILL cr);
12501 
12502   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12503             "# Safepoint: poll for GC" %}
12504   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12505   ins_cost(125);
12506   ins_encode(enc_safepoint_poll);
12507   ins_pipe(ialu_reg_mem);
12508 %}
12509 
12510 // ============================================================================
12511 // Procedure Call/Return Instructions
12512 // Call Java Static Instruction
12513 // Note: If this code changes, the corresponding ret_addr_offset() and
12514 //       compute_padding() functions will have to be adjusted.
12515 instruct CallStaticJavaDirect(method meth) %{
12516   match(CallStaticJava);
12517   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12518   effect(USE meth);
12519 
12520   ins_cost(300);
12521   format %{ "call,static " %}
12522   opcode(0xE8); /* E8 cd */
12523   ins_encode(Java_Static_Call(meth), call_epilog);
12524   ins_pipe(pipe_slow);
12525   ins_pc_relative(1);
12526   ins_alignment(4);
12527 %}
12528 
12529 // Call Java Static Instruction (method handle version)
12530 // Note: If this code changes, the corresponding ret_addr_offset() and
12531 //       compute_padding() functions will have to be adjusted.
12532 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12533   match(CallStaticJava);
12534   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12535   effect(USE meth);
12536   // RBP is saved by all callees (for interpreter stack correction).
12537   // We use it here for a similar purpose, in {preserve,restore}_SP.
12538 
12539   ins_cost(300);
12540   format %{ "call,static/MethodHandle " %}
12541   opcode(0xE8); /* E8 cd */
12542   ins_encode(preserve_SP,
12543              Java_Static_Call(meth),
12544              restore_SP,
12545              call_epilog);
12546   ins_pipe(pipe_slow);
12547   ins_pc_relative(1);
12548   ins_alignment(4);
12549 %}
12550 
12551 // Call Java Dynamic Instruction
12552 // Note: If this code changes, the corresponding ret_addr_offset() and
12553 //       compute_padding() functions will have to be adjusted.
12554 instruct CallDynamicJavaDirect(method meth)
12555 %{
12556   match(CallDynamicJava);
12557   effect(USE meth);
12558 
12559   ins_cost(300);
12560   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12561             "call,dynamic " %}
12562   opcode(0xE8); /* E8 cd */
12563   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12564   ins_pipe(pipe_slow);
12565   ins_pc_relative(1);
12566   ins_alignment(4);
12567 %}
12568 
12569 // Call Runtime Instruction
12570 instruct CallRuntimeDirect(method meth)
12571 %{
12572   match(CallRuntime);
12573   effect(USE meth);
12574 
12575   ins_cost(300);
12576   format %{ "call,runtime " %}
12577   opcode(0xE8); /* E8 cd */
12578   ins_encode(Java_To_Runtime(meth));
12579   ins_pipe(pipe_slow);
12580   ins_pc_relative(1);
12581 %}
12582 
12583 // Call runtime without safepoint
12584 instruct CallLeafDirect(method meth)
12585 %{
12586   match(CallLeaf);
12587   effect(USE meth);
12588 
12589   ins_cost(300);
12590   format %{ "call_leaf,runtime " %}
12591   opcode(0xE8); /* E8 cd */
12592   ins_encode(Java_To_Runtime(meth));
12593   ins_pipe(pipe_slow);
12594   ins_pc_relative(1);
12595 %}
12596 
12597 // Call runtime without safepoint
12598 instruct CallLeafNoFPDirect(method meth)
12599 %{
12600   match(CallLeafNoFP);
12601   effect(USE meth);
12602 
12603   ins_cost(300);
12604   format %{ "call_leaf_nofp,runtime " %}
12605   opcode(0xE8); /* E8 cd */
12606   ins_encode(Java_To_Runtime(meth));
12607   ins_pipe(pipe_slow);
12608   ins_pc_relative(1);
12609 %}
12610 
12611 // Return Instruction
12612 // Remove the return address & jump to it.
12613 // Notice: We always emit a nop after a ret to make sure there is room
12614 // for safepoint patching
12615 instruct Ret()
12616 %{
12617   match(Return);
12618 
12619   format %{ "ret" %}
12620   opcode(0xC3);
12621   ins_encode(OpcP);
12622   ins_pipe(pipe_jmp);
12623 %}
12624 
12625 // Tail Call; Jump from runtime stub to Java code.
12626 // Also known as an 'interprocedural jump'.
12627 // Target of jump will eventually return to caller.
12628 // TailJump below removes the return address.
12629 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12630 %{
12631   match(TailCall jump_target method_oop);
12632 
12633   ins_cost(300);
12634   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12635   opcode(0xFF, 0x4); /* Opcode FF /4 */
12636   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12637   ins_pipe(pipe_jmp);
12638 %}
12639 
12640 // Tail Jump; remove the return address; jump to target.
12641 // TailCall above leaves the return address around.
12642 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12643 %{
12644   match(TailJump jump_target ex_oop);
12645 
12646   ins_cost(300);
12647   format %{ "popq    rdx\t# pop return address\n\t"
12648             "jmp     $jump_target" %}
12649   opcode(0xFF, 0x4); /* Opcode FF /4 */
12650   ins_encode(Opcode(0x5a), // popq rdx
12651              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12652   ins_pipe(pipe_jmp);
12653 %}
12654 
12655 // Create exception oop: created by stack-crawling runtime code.
12656 // Created exception is now available to this handler, and is setup
12657 // just prior to jumping to this handler.  No code emitted.
12658 instruct CreateException(rax_RegP ex_oop)
12659 %{
12660   match(Set ex_oop (CreateEx));
12661 
12662   size(0);
12663   // use the following format syntax
12664   format %{ "# exception oop is in rax; no code emitted" %}
12665   ins_encode();
12666   ins_pipe(empty);
12667 %}
12668 
12669 // Rethrow exception:
12670 // The exception oop will come in the first argument position.
12671 // Then JUMP (not call) to the rethrow stub code.
12672 instruct RethrowException()
12673 %{
12674   match(Rethrow);
12675 
12676   // use the following format syntax
12677   format %{ "jmp     rethrow_stub" %}
12678   ins_encode(enc_rethrow);
12679   ins_pipe(pipe_jmp);
12680 %}
12681 
12682 
12683 //----------PEEPHOLE RULES-----------------------------------------------------
12684 // These must follow all instruction definitions as they use the names
12685 // defined in the instructions definitions.
12686 //
12687 // peepmatch ( root_instr_name [preceding_instruction]* );
12688 //
12689 // peepconstraint %{
12690 // (instruction_number.operand_name relational_op instruction_number.operand_name
12691 //  [, ...] );
12692 // // instruction numbers are zero-based using left to right order in peepmatch
12693 //
12694 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12695 // // provide an instruction_number.operand_name for each operand that appears
12696 // // in the replacement instruction's match rule
12697 //
12698 // ---------VM FLAGS---------------------------------------------------------
12699 //
12700 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12701 //
12702 // Each peephole rule is given an identifying number starting with zero and
12703 // increasing by one in the order seen by the parser.  An individual peephole
12704 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12705 // on the command-line.
12706 //
12707 // ---------CURRENT LIMITATIONS----------------------------------------------
12708 //
12709 // Only match adjacent instructions in same basic block
12710 // Only equality constraints
12711 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12712 // Only one replacement instruction
12713 //
12714 // ---------EXAMPLE----------------------------------------------------------
12715 //
12716 // // pertinent parts of existing instructions in architecture description
12717 // instruct movI(rRegI dst, rRegI src)
12718 // %{
12719 //   match(Set dst (CopyI src));
12720 // %}
12721 //
12722 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12723 // %{
12724 //   match(Set dst (AddI dst src));
12725 //   effect(KILL cr);
12726 // %}
12727 //
12728 // // Change (inc mov) to lea
12729 // peephole %{
12730 //   // increment preceeded by register-register move
12731 //   peepmatch ( incI_rReg movI );
12732 //   // require that the destination register of the increment
12733 //   // match the destination register of the move
12734 //   peepconstraint ( 0.dst == 1.dst );
12735 //   // construct a replacement instruction that sets
12736 //   // the destination to ( move's source register + one )
12737 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12738 // %}
12739 //
12740 
12741 // Implementation no longer uses movX instructions since
12742 // machine-independent system no longer uses CopyX nodes.
12743 //
12744 // peephole
12745 // %{
12746 //   peepmatch (incI_rReg movI);
12747 //   peepconstraint (0.dst == 1.dst);
12748 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12749 // %}
12750 
12751 // peephole
12752 // %{
12753 //   peepmatch (decI_rReg movI);
12754 //   peepconstraint (0.dst == 1.dst);
12755 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12756 // %}
12757 
12758 // peephole
12759 // %{
12760 //   peepmatch (addI_rReg_imm movI);
12761 //   peepconstraint (0.dst == 1.dst);
12762 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12763 // %}
12764 
12765 // peephole
12766 // %{
12767 //   peepmatch (incL_rReg movL);
12768 //   peepconstraint (0.dst == 1.dst);
12769 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12770 // %}
12771 
12772 // peephole
12773 // %{
12774 //   peepmatch (decL_rReg movL);
12775 //   peepconstraint (0.dst == 1.dst);
12776 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12777 // %}
12778 
12779 // peephole
12780 // %{
12781 //   peepmatch (addL_rReg_imm movL);
12782 //   peepconstraint (0.dst == 1.dst);
12783 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12784 // %}
12785 
12786 // peephole
12787 // %{
12788 //   peepmatch (addP_rReg_imm movP);
12789 //   peepconstraint (0.dst == 1.dst);
12790 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12791 // %}
12792 
12793 // // Change load of spilled value to only a spill
12794 // instruct storeI(memory mem, rRegI src)
12795 // %{
12796 //   match(Set mem (StoreI mem src));
12797 // %}
12798 //
12799 // instruct loadI(rRegI dst, memory mem)
12800 // %{
12801 //   match(Set dst (LoadI mem));
12802 // %}
12803 //
12804 
12805 peephole
12806 %{
12807   peepmatch (loadI storeI);
12808   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12809   peepreplace (storeI(1.mem 1.mem 1.src));
12810 %}
12811 
12812 peephole
12813 %{
12814   peepmatch (loadL storeL);
12815   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12816   peepreplace (storeL(1.mem 1.mem 1.src));
12817 %}
12818 
12819 //----------SMARTSPILL RULES---------------------------------------------------
12820 // These must follow all instruction definitions as they use the names
12821 // defined in the instructions definitions.