1 //
   2 // Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input.
 578 // Since amd64 does not have absolute addressing but RIP-relative
 579 // addressing and the polling page is within 2G, it doesn't.
 580 bool SafePointNode::needs_polling_address_input()
 581 {
 582   return false;
 583 }
 584 
 585 //
 586 // Compute padding required for nodes which need alignment
 587 //
 588 
 589 // The address of the call instruction needs to be 4-byte aligned to
 590 // ensure that it does not span a cache line so that it can be patched.
 591 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 592 {
 593   current_offset += 1; // skip call opcode byte
 594   return round_to(current_offset, alignment_required()) - current_offset;
 595 }
 596 
 597 // The address of the call instruction needs to be 4-byte aligned to
 598 // ensure that it does not span a cache line so that it can be patched.
 599 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 600 {
 601   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 602   current_offset += 1; // skip call opcode byte
 603   return round_to(current_offset, alignment_required()) - current_offset;
 604 }
 605 
 606 // The address of the call instruction needs to be 4-byte aligned to
 607 // ensure that it does not span a cache line so that it can be patched.
 608 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 609 {
 610   current_offset += 11; // skip movq instruction + call opcode byte
 611   return round_to(current_offset, alignment_required()) - current_offset;
 612 }
 613 
 614 #ifndef PRODUCT
 615 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 616 {
 617   st->print("INT3");
 618 }
 619 #endif
 620 
 621 // EMIT_RM()
 622 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 623   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 624   cbuf.insts()->emit_int8(c);
 625 }
 626 
 627 // EMIT_CC()
 628 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 629   unsigned char c = (unsigned char) (f1 | f2);
 630   cbuf.insts()->emit_int8(c);
 631 }
 632 
 633 // EMIT_OPCODE()
 634 void emit_opcode(CodeBuffer &cbuf, int code) {
 635   cbuf.insts()->emit_int8((unsigned char) code);
 636 }
 637 
 638 // EMIT_OPCODE() w/ relocation information
 639 void emit_opcode(CodeBuffer &cbuf,
 640                  int code, relocInfo::relocType reloc, int offset, int format)
 641 {
 642   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 643   emit_opcode(cbuf, code);
 644 }
 645 
 646 // EMIT_D8()
 647 void emit_d8(CodeBuffer &cbuf, int d8) {
 648   cbuf.insts()->emit_int8((unsigned char) d8);
 649 }
 650 
 651 // EMIT_D16()
 652 void emit_d16(CodeBuffer &cbuf, int d16) {
 653   cbuf.insts()->emit_int16(d16);
 654 }
 655 
 656 // EMIT_D32()
 657 void emit_d32(CodeBuffer &cbuf, int d32) {
 658   cbuf.insts()->emit_int32(d32);
 659 }
 660 
 661 // EMIT_D64()
 662 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 663   cbuf.insts()->emit_int64(d64);
 664 }
 665 
 666 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 667 void emit_d32_reloc(CodeBuffer& cbuf,
 668                     int d32,
 669                     relocInfo::relocType reloc,
 670                     int format)
 671 {
 672   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 673   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 674   cbuf.insts()->emit_int32(d32);
 675 }
 676 
 677 // emit 32 bit value and construct relocation entry from RelocationHolder
 678 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 679 #ifdef ASSERT
 680   if (rspec.reloc()->type() == relocInfo::oop_type &&
 681       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 682     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 683   }
 684 #endif
 685   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 686   cbuf.insts()->emit_int32(d32);
 687 }
 688 
 689 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 690   address next_ip = cbuf.insts_end() + 4;
 691   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 692                  external_word_Relocation::spec(addr),
 693                  RELOC_DISP32);
 694 }
 695 
 696 
 697 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 698 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 699   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 700   cbuf.insts()->emit_int64(d64);
 701 }
 702 
 703 // emit 64 bit value and construct relocation entry from RelocationHolder
 704 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 705 #ifdef ASSERT
 706   if (rspec.reloc()->type() == relocInfo::oop_type &&
 707       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 708     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 709            "cannot embed scavengable oops in code");
 710   }
 711 #endif
 712   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 713   cbuf.insts()->emit_int64(d64);
 714 }
 715 
 716 // Access stack slot for load or store
 717 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 718 {
 719   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 720   if (-0x80 <= disp && disp < 0x80) {
 721     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 722     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 723     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 724   } else {
 725     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 726     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 727     emit_d32(cbuf, disp);     // Displacement // R/M byte
 728   }
 729 }
 730 
 731    // rRegI ereg, memory mem) %{    // emit_reg_mem
 732 void encode_RegMem(CodeBuffer &cbuf,
 733                    int reg,
 734                    int base, int index, int scale, int disp, bool disp_is_oop)
 735 {
 736   assert(!disp_is_oop, "cannot have disp");
 737   int regenc = reg & 7;
 738   int baseenc = base & 7;
 739   int indexenc = index & 7;
 740 
 741   // There is no index & no scale, use form without SIB byte
 742   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 743     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 744     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 745       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 746     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 747       // If 8-bit displacement, mode 0x1
 748       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 749       emit_d8(cbuf, disp);
 750     } else {
 751       // If 32-bit displacement
 752       if (base == -1) { // Special flag for absolute address
 753         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 754         if (disp_is_oop) {
 755           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 756         } else {
 757           emit_d32(cbuf, disp);
 758         }
 759       } else {
 760         // Normal base + offset
 761         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 762         if (disp_is_oop) {
 763           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 764         } else {
 765           emit_d32(cbuf, disp);
 766         }
 767       }
 768     }
 769   } else {
 770     // Else, encode with the SIB byte
 771     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 772     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 773       // If no displacement
 774       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 775       emit_rm(cbuf, scale, indexenc, baseenc);
 776     } else {
 777       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 778         // If 8-bit displacement, mode 0x1
 779         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 780         emit_rm(cbuf, scale, indexenc, baseenc);
 781         emit_d8(cbuf, disp);
 782       } else {
 783         // If 32-bit displacement
 784         if (base == 0x04 ) {
 785           emit_rm(cbuf, 0x2, regenc, 0x4);
 786           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 787         } else {
 788           emit_rm(cbuf, 0x2, regenc, 0x4);
 789           emit_rm(cbuf, scale, indexenc, baseenc); // *
 790         }
 791         if (disp_is_oop) {
 792           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 793         } else {
 794           emit_d32(cbuf, disp);
 795         }
 796       }
 797     }
 798   }
 799 }
 800 
 801 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 802 {
 803   if (dstenc != srcenc) {
 804     if (dstenc < 8) {
 805       if (srcenc >= 8) {
 806         emit_opcode(cbuf, Assembler::REX_B);
 807         srcenc -= 8;
 808       }
 809     } else {
 810       if (srcenc < 8) {
 811         emit_opcode(cbuf, Assembler::REX_R);
 812       } else {
 813         emit_opcode(cbuf, Assembler::REX_RB);
 814         srcenc -= 8;
 815       }
 816       dstenc -= 8;
 817     }
 818 
 819     emit_opcode(cbuf, 0x8B);
 820     emit_rm(cbuf, 0x3, dstenc, srcenc);
 821   }
 822 }
 823 
 824 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 825   if( dst_encoding == src_encoding ) {
 826     // reg-reg copy, use an empty encoding
 827   } else {
 828     MacroAssembler _masm(&cbuf);
 829 
 830     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 831   }
 832 }
 833 
 834 
 835 //=============================================================================
 836 const bool Matcher::constant_table_absolute_addressing = true;
 837 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 838 
 839 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 840   // Empty encoding
 841 }
 842 
 843 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 844   return 0;
 845 }
 846 
 847 #ifndef PRODUCT
 848 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 849   st->print("# MachConstantBaseNode (empty encoding)");
 850 }
 851 #endif
 852 
 853 
 854 //=============================================================================
 855 #ifndef PRODUCT
 856 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 857 {
 858   Compile* C = ra_->C;
 859 
 860   int framesize = C->frame_slots() << LogBytesPerInt;
 861   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 862   // Remove wordSize for return adr already pushed
 863   // and another for the RBP we are going to save
 864   framesize -= 2*wordSize;
 865   bool need_nop = true;
 866 
 867   // Calls to C2R adapters often do not accept exceptional returns.
 868   // We require that their callers must bang for them.  But be
 869   // careful, because some VM calls (such as call site linkage) can
 870   // use several kilobytes of stack.  But the stack safety zone should
 871   // account for that.  See bugs 4446381, 4468289, 4497237.
 872   if (C->need_stack_bang(framesize)) {
 873     st->print_cr("# stack bang"); st->print("\t");
 874     need_nop = false;
 875   }
 876   st->print_cr("pushq   rbp"); st->print("\t");
 877 
 878   if (VerifyStackAtCalls) {
 879     // Majik cookie to verify stack depth
 880     st->print_cr("pushq   0xffffffffbadb100d"
 881                   "\t# Majik cookie for stack depth check");
 882     st->print("\t");
 883     framesize -= wordSize; // Remove 2 for cookie
 884     need_nop = false;
 885   }
 886 
 887   if (framesize) {
 888     st->print("subq    rsp, #%d\t# Create frame", framesize);
 889     if (framesize < 0x80 && need_nop) {
 890       st->print("\n\tnop\t# nop for patch_verified_entry");
 891     }
 892   }
 893 }
 894 #endif
 895 
 896 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 897 {
 898   Compile* C = ra_->C;
 899 
 900   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 901   // NativeJump::patch_verified_entry will be able to patch out the entry
 902   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 903   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 904   // 6 bytes. So if we don't do the fldcw or the push then we must
 905   // use the 6 byte frame allocation even if we have no frame. :-(
 906   // If method sets FPU control word do it now
 907 
 908   int framesize = C->frame_slots() << LogBytesPerInt;
 909   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 910   // Remove wordSize for return adr already pushed
 911   // and another for the RBP we are going to save
 912   framesize -= 2*wordSize;
 913   bool need_nop = true;
 914 
 915   // Calls to C2R adapters often do not accept exceptional returns.
 916   // We require that their callers must bang for them.  But be
 917   // careful, because some VM calls (such as call site linkage) can
 918   // use several kilobytes of stack.  But the stack safety zone should
 919   // account for that.  See bugs 4446381, 4468289, 4497237.
 920   if (C->need_stack_bang(framesize)) {
 921     MacroAssembler masm(&cbuf);
 922     masm.generate_stack_overflow_check(framesize);
 923     need_nop = false;
 924   }
 925 
 926   // We always push rbp so that on return to interpreter rbp will be
 927   // restored correctly and we can correct the stack.
 928   emit_opcode(cbuf, 0x50 | RBP_enc);
 929 
 930   if (VerifyStackAtCalls) {
 931     // Majik cookie to verify stack depth
 932     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 933     emit_d32(cbuf, 0xbadb100d);
 934     framesize -= wordSize; // Remove 2 for cookie
 935     need_nop = false;
 936   }
 937 
 938   if (framesize) {
 939     emit_opcode(cbuf, Assembler::REX_W);
 940     if (framesize < 0x80) {
 941       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 942       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 943       emit_d8(cbuf, framesize);
 944       if (need_nop) {
 945         emit_opcode(cbuf, 0x90); // nop
 946       }
 947     } else {
 948       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 949       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 950       emit_d32(cbuf, framesize);
 951     }
 952   }
 953 
 954   C->set_frame_complete(cbuf.insts_size());
 955 
 956 #ifdef ASSERT
 957   if (VerifyStackAtCalls) {
 958     Label L;
 959     MacroAssembler masm(&cbuf);
 960     masm.push(rax);
 961     masm.mov(rax, rsp);
 962     masm.andptr(rax, StackAlignmentInBytes-1);
 963     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 964     masm.pop(rax);
 965     masm.jcc(Assembler::equal, L);
 966     masm.stop("Stack is not properly aligned!");
 967     masm.bind(L);
 968   }
 969 #endif
 970 }
 971 
 972 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 973 {
 974   return MachNode::size(ra_); // too many variables; just compute it
 975                               // the hard way
 976 }
 977 
 978 int MachPrologNode::reloc() const
 979 {
 980   return 0; // a large enough number
 981 }
 982 
 983 //=============================================================================
 984 #ifndef PRODUCT
 985 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 986 {
 987   Compile* C = ra_->C;
 988   int framesize = C->frame_slots() << LogBytesPerInt;
 989   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 990   // Remove word for return adr already pushed
 991   // and RBP
 992   framesize -= 2*wordSize;
 993 
 994   if (framesize) {
 995     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
 996     st->print("\t");
 997   }
 998 
 999   st->print_cr("popq\trbp");
1000   if (do_polling() && C->is_method_compilation()) {
1001     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1002                   "# Safepoint: poll for GC");
1003     st->print("\t");
1004   }
1005 }
1006 #endif
1007 
1008 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1009 {
1010   Compile* C = ra_->C;
1011   int framesize = C->frame_slots() << LogBytesPerInt;
1012   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1013   // Remove word for return adr already pushed
1014   // and RBP
1015   framesize -= 2*wordSize;
1016 
1017   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1018 
1019   if (framesize) {
1020     emit_opcode(cbuf, Assembler::REX_W);
1021     if (framesize < 0x80) {
1022       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1023       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1024       emit_d8(cbuf, framesize);
1025     } else {
1026       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1027       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1028       emit_d32(cbuf, framesize);
1029     }
1030   }
1031 
1032   // popq rbp
1033   emit_opcode(cbuf, 0x58 | RBP_enc);
1034 
1035   if (do_polling() && C->is_method_compilation()) {
1036     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1037     // XXX reg_mem doesn't support RIP-relative addressing yet
1038     cbuf.set_insts_mark();
1039     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_return_type, 0); // XXX
1040     emit_opcode(cbuf, 0x85); // testl
1041     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1042     // cbuf.insts_mark() is beginning of instruction
1043     emit_d32_reloc(cbuf, os::get_polling_page());
1044 //                    relocInfo::poll_return_type,
1045   }
1046 }
1047 
1048 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1049 {
1050   Compile* C = ra_->C;
1051   int framesize = C->frame_slots() << LogBytesPerInt;
1052   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1053   // Remove word for return adr already pushed
1054   // and RBP
1055   framesize -= 2*wordSize;
1056 
1057   uint size = 0;
1058 
1059   if (do_polling() && C->is_method_compilation()) {
1060     size += 6;
1061   }
1062 
1063   // count popq rbp
1064   size++;
1065 
1066   if (framesize) {
1067     if (framesize < 0x80) {
1068       size += 4;
1069     } else if (framesize) {
1070       size += 7;
1071     }
1072   }
1073 
1074   return size;
1075 }
1076 
1077 int MachEpilogNode::reloc() const
1078 {
1079   return 2; // a large enough number
1080 }
1081 
1082 const Pipeline* MachEpilogNode::pipeline() const
1083 {
1084   return MachNode::pipeline_class();
1085 }
1086 
1087 int MachEpilogNode::safepoint_offset() const
1088 {
1089   return 0;
1090 }
1091 
1092 //=============================================================================
1093 
1094 enum RC {
1095   rc_bad,
1096   rc_int,
1097   rc_float,
1098   rc_stack
1099 };
1100 
1101 static enum RC rc_class(OptoReg::Name reg)
1102 {
1103   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1104 
1105   if (OptoReg::is_stack(reg)) return rc_stack;
1106 
1107   VMReg r = OptoReg::as_VMReg(reg);
1108 
1109   if (r->is_Register()) return rc_int;
1110 
1111   assert(r->is_XMMRegister(), "must be");
1112   return rc_float;
1113 }
1114 
1115 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1116                                        PhaseRegAlloc* ra_,
1117                                        bool do_size,
1118                                        outputStream* st) const
1119 {
1120 
1121   // Get registers to move
1122   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1123   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1124   OptoReg::Name dst_second = ra_->get_reg_second(this);
1125   OptoReg::Name dst_first = ra_->get_reg_first(this);
1126 
1127   enum RC src_second_rc = rc_class(src_second);
1128   enum RC src_first_rc = rc_class(src_first);
1129   enum RC dst_second_rc = rc_class(dst_second);
1130   enum RC dst_first_rc = rc_class(dst_first);
1131 
1132   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1133          "must move at least 1 register" );
1134 
1135   if (src_first == dst_first && src_second == dst_second) {
1136     // Self copy, no move
1137     return 0;
1138   } else if (src_first_rc == rc_stack) {
1139     // mem ->
1140     if (dst_first_rc == rc_stack) {
1141       // mem -> mem
1142       assert(src_second != dst_first, "overlap");
1143       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1144           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1145         // 64-bit
1146         int src_offset = ra_->reg2offset(src_first);
1147         int dst_offset = ra_->reg2offset(dst_first);
1148         if (cbuf) {
1149           emit_opcode(*cbuf, 0xFF);
1150           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1151 
1152           emit_opcode(*cbuf, 0x8F);
1153           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1154 
1155 #ifndef PRODUCT
1156         } else if (!do_size) {
1157           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1158                      "popq    [rsp + #%d]",
1159                      src_offset,
1160                      dst_offset);
1161 #endif
1162         }
1163         return
1164           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1165           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1166       } else {
1167         // 32-bit
1168         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1169         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1170         // No pushl/popl, so:
1171         int src_offset = ra_->reg2offset(src_first);
1172         int dst_offset = ra_->reg2offset(dst_first);
1173         if (cbuf) {
1174           emit_opcode(*cbuf, Assembler::REX_W);
1175           emit_opcode(*cbuf, 0x89);
1176           emit_opcode(*cbuf, 0x44);
1177           emit_opcode(*cbuf, 0x24);
1178           emit_opcode(*cbuf, 0xF8);
1179 
1180           emit_opcode(*cbuf, 0x8B);
1181           encode_RegMem(*cbuf,
1182                         RAX_enc,
1183                         RSP_enc, 0x4, 0, src_offset,
1184                         false);
1185 
1186           emit_opcode(*cbuf, 0x89);
1187           encode_RegMem(*cbuf,
1188                         RAX_enc,
1189                         RSP_enc, 0x4, 0, dst_offset,
1190                         false);
1191 
1192           emit_opcode(*cbuf, Assembler::REX_W);
1193           emit_opcode(*cbuf, 0x8B);
1194           emit_opcode(*cbuf, 0x44);
1195           emit_opcode(*cbuf, 0x24);
1196           emit_opcode(*cbuf, 0xF8);
1197 
1198 #ifndef PRODUCT
1199         } else if (!do_size) {
1200           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1201                      "movl    rax, [rsp + #%d]\n\t"
1202                      "movl    [rsp + #%d], rax\n\t"
1203                      "movq    rax, [rsp - #8]",
1204                      src_offset,
1205                      dst_offset);
1206 #endif
1207         }
1208         return
1209           5 + // movq
1210           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1211           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1212           5; // movq
1213       }
1214     } else if (dst_first_rc == rc_int) {
1215       // mem -> gpr
1216       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1217           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1218         // 64-bit
1219         int offset = ra_->reg2offset(src_first);
1220         if (cbuf) {
1221           if (Matcher::_regEncode[dst_first] < 8) {
1222             emit_opcode(*cbuf, Assembler::REX_W);
1223           } else {
1224             emit_opcode(*cbuf, Assembler::REX_WR);
1225           }
1226           emit_opcode(*cbuf, 0x8B);
1227           encode_RegMem(*cbuf,
1228                         Matcher::_regEncode[dst_first],
1229                         RSP_enc, 0x4, 0, offset,
1230                         false);
1231 #ifndef PRODUCT
1232         } else if (!do_size) {
1233           st->print("movq    %s, [rsp + #%d]\t# spill",
1234                      Matcher::regName[dst_first],
1235                      offset);
1236 #endif
1237         }
1238         return
1239           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1240       } else {
1241         // 32-bit
1242         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1243         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1244         int offset = ra_->reg2offset(src_first);
1245         if (cbuf) {
1246           if (Matcher::_regEncode[dst_first] >= 8) {
1247             emit_opcode(*cbuf, Assembler::REX_R);
1248           }
1249           emit_opcode(*cbuf, 0x8B);
1250           encode_RegMem(*cbuf,
1251                         Matcher::_regEncode[dst_first],
1252                         RSP_enc, 0x4, 0, offset,
1253                         false);
1254 #ifndef PRODUCT
1255         } else if (!do_size) {
1256           st->print("movl    %s, [rsp + #%d]\t# spill",
1257                      Matcher::regName[dst_first],
1258                      offset);
1259 #endif
1260         }
1261         return
1262           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1263           ((Matcher::_regEncode[dst_first] < 8)
1264            ? 3
1265            : 4); // REX
1266       }
1267     } else if (dst_first_rc == rc_float) {
1268       // mem-> xmm
1269       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1270           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1271         // 64-bit
1272         int offset = ra_->reg2offset(src_first);
1273         if (cbuf) {
1274           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1275           if (Matcher::_regEncode[dst_first] >= 8) {
1276             emit_opcode(*cbuf, Assembler::REX_R);
1277           }
1278           emit_opcode(*cbuf, 0x0F);
1279           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1280           encode_RegMem(*cbuf,
1281                         Matcher::_regEncode[dst_first],
1282                         RSP_enc, 0x4, 0, offset,
1283                         false);
1284 #ifndef PRODUCT
1285         } else if (!do_size) {
1286           st->print("%s  %s, [rsp + #%d]\t# spill",
1287                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1288                      Matcher::regName[dst_first],
1289                      offset);
1290 #endif
1291         }
1292         return
1293           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1294           ((Matcher::_regEncode[dst_first] < 8)
1295            ? 5
1296            : 6); // REX
1297       } else {
1298         // 32-bit
1299         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1300         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1301         int offset = ra_->reg2offset(src_first);
1302         if (cbuf) {
1303           emit_opcode(*cbuf, 0xF3);
1304           if (Matcher::_regEncode[dst_first] >= 8) {
1305             emit_opcode(*cbuf, Assembler::REX_R);
1306           }
1307           emit_opcode(*cbuf, 0x0F);
1308           emit_opcode(*cbuf, 0x10);
1309           encode_RegMem(*cbuf,
1310                         Matcher::_regEncode[dst_first],
1311                         RSP_enc, 0x4, 0, offset,
1312                         false);
1313 #ifndef PRODUCT
1314         } else if (!do_size) {
1315           st->print("movss   %s, [rsp + #%d]\t# spill",
1316                      Matcher::regName[dst_first],
1317                      offset);
1318 #endif
1319         }
1320         return
1321           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1322           ((Matcher::_regEncode[dst_first] < 8)
1323            ? 5
1324            : 6); // REX
1325       }
1326     }
1327   } else if (src_first_rc == rc_int) {
1328     // gpr ->
1329     if (dst_first_rc == rc_stack) {
1330       // gpr -> mem
1331       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1332           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1333         // 64-bit
1334         int offset = ra_->reg2offset(dst_first);
1335         if (cbuf) {
1336           if (Matcher::_regEncode[src_first] < 8) {
1337             emit_opcode(*cbuf, Assembler::REX_W);
1338           } else {
1339             emit_opcode(*cbuf, Assembler::REX_WR);
1340           }
1341           emit_opcode(*cbuf, 0x89);
1342           encode_RegMem(*cbuf,
1343                         Matcher::_regEncode[src_first],
1344                         RSP_enc, 0x4, 0, offset,
1345                         false);
1346 #ifndef PRODUCT
1347         } else if (!do_size) {
1348           st->print("movq    [rsp + #%d], %s\t# spill",
1349                      offset,
1350                      Matcher::regName[src_first]);
1351 #endif
1352         }
1353         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1354       } else {
1355         // 32-bit
1356         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1357         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1358         int offset = ra_->reg2offset(dst_first);
1359         if (cbuf) {
1360           if (Matcher::_regEncode[src_first] >= 8) {
1361             emit_opcode(*cbuf, Assembler::REX_R);
1362           }
1363           emit_opcode(*cbuf, 0x89);
1364           encode_RegMem(*cbuf,
1365                         Matcher::_regEncode[src_first],
1366                         RSP_enc, 0x4, 0, offset,
1367                         false);
1368 #ifndef PRODUCT
1369         } else if (!do_size) {
1370           st->print("movl    [rsp + #%d], %s\t# spill",
1371                      offset,
1372                      Matcher::regName[src_first]);
1373 #endif
1374         }
1375         return
1376           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1377           ((Matcher::_regEncode[src_first] < 8)
1378            ? 3
1379            : 4); // REX
1380       }
1381     } else if (dst_first_rc == rc_int) {
1382       // gpr -> gpr
1383       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1384           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1385         // 64-bit
1386         if (cbuf) {
1387           if (Matcher::_regEncode[dst_first] < 8) {
1388             if (Matcher::_regEncode[src_first] < 8) {
1389               emit_opcode(*cbuf, Assembler::REX_W);
1390             } else {
1391               emit_opcode(*cbuf, Assembler::REX_WB);
1392             }
1393           } else {
1394             if (Matcher::_regEncode[src_first] < 8) {
1395               emit_opcode(*cbuf, Assembler::REX_WR);
1396             } else {
1397               emit_opcode(*cbuf, Assembler::REX_WRB);
1398             }
1399           }
1400           emit_opcode(*cbuf, 0x8B);
1401           emit_rm(*cbuf, 0x3,
1402                   Matcher::_regEncode[dst_first] & 7,
1403                   Matcher::_regEncode[src_first] & 7);
1404 #ifndef PRODUCT
1405         } else if (!do_size) {
1406           st->print("movq    %s, %s\t# spill",
1407                      Matcher::regName[dst_first],
1408                      Matcher::regName[src_first]);
1409 #endif
1410         }
1411         return 3; // REX
1412       } else {
1413         // 32-bit
1414         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1415         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1416         if (cbuf) {
1417           if (Matcher::_regEncode[dst_first] < 8) {
1418             if (Matcher::_regEncode[src_first] >= 8) {
1419               emit_opcode(*cbuf, Assembler::REX_B);
1420             }
1421           } else {
1422             if (Matcher::_regEncode[src_first] < 8) {
1423               emit_opcode(*cbuf, Assembler::REX_R);
1424             } else {
1425               emit_opcode(*cbuf, Assembler::REX_RB);
1426             }
1427           }
1428           emit_opcode(*cbuf, 0x8B);
1429           emit_rm(*cbuf, 0x3,
1430                   Matcher::_regEncode[dst_first] & 7,
1431                   Matcher::_regEncode[src_first] & 7);
1432 #ifndef PRODUCT
1433         } else if (!do_size) {
1434           st->print("movl    %s, %s\t# spill",
1435                      Matcher::regName[dst_first],
1436                      Matcher::regName[src_first]);
1437 #endif
1438         }
1439         return
1440           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1441           ? 2
1442           : 3; // REX
1443       }
1444     } else if (dst_first_rc == rc_float) {
1445       // gpr -> xmm
1446       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1447           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1448         // 64-bit
1449         if (cbuf) {
1450           emit_opcode(*cbuf, 0x66);
1451           if (Matcher::_regEncode[dst_first] < 8) {
1452             if (Matcher::_regEncode[src_first] < 8) {
1453               emit_opcode(*cbuf, Assembler::REX_W);
1454             } else {
1455               emit_opcode(*cbuf, Assembler::REX_WB);
1456             }
1457           } else {
1458             if (Matcher::_regEncode[src_first] < 8) {
1459               emit_opcode(*cbuf, Assembler::REX_WR);
1460             } else {
1461               emit_opcode(*cbuf, Assembler::REX_WRB);
1462             }
1463           }
1464           emit_opcode(*cbuf, 0x0F);
1465           emit_opcode(*cbuf, 0x6E);
1466           emit_rm(*cbuf, 0x3,
1467                   Matcher::_regEncode[dst_first] & 7,
1468                   Matcher::_regEncode[src_first] & 7);
1469 #ifndef PRODUCT
1470         } else if (!do_size) {
1471           st->print("movdq   %s, %s\t# spill",
1472                      Matcher::regName[dst_first],
1473                      Matcher::regName[src_first]);
1474 #endif
1475         }
1476         return 5; // REX
1477       } else {
1478         // 32-bit
1479         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1480         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1481         if (cbuf) {
1482           emit_opcode(*cbuf, 0x66);
1483           if (Matcher::_regEncode[dst_first] < 8) {
1484             if (Matcher::_regEncode[src_first] >= 8) {
1485               emit_opcode(*cbuf, Assembler::REX_B);
1486             }
1487           } else {
1488             if (Matcher::_regEncode[src_first] < 8) {
1489               emit_opcode(*cbuf, Assembler::REX_R);
1490             } else {
1491               emit_opcode(*cbuf, Assembler::REX_RB);
1492             }
1493           }
1494           emit_opcode(*cbuf, 0x0F);
1495           emit_opcode(*cbuf, 0x6E);
1496           emit_rm(*cbuf, 0x3,
1497                   Matcher::_regEncode[dst_first] & 7,
1498                   Matcher::_regEncode[src_first] & 7);
1499 #ifndef PRODUCT
1500         } else if (!do_size) {
1501           st->print("movdl   %s, %s\t# spill",
1502                      Matcher::regName[dst_first],
1503                      Matcher::regName[src_first]);
1504 #endif
1505         }
1506         return
1507           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1508           ? 4
1509           : 5; // REX
1510       }
1511     }
1512   } else if (src_first_rc == rc_float) {
1513     // xmm ->
1514     if (dst_first_rc == rc_stack) {
1515       // xmm -> mem
1516       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1517           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1518         // 64-bit
1519         int offset = ra_->reg2offset(dst_first);
1520         if (cbuf) {
1521           emit_opcode(*cbuf, 0xF2);
1522           if (Matcher::_regEncode[src_first] >= 8) {
1523               emit_opcode(*cbuf, Assembler::REX_R);
1524           }
1525           emit_opcode(*cbuf, 0x0F);
1526           emit_opcode(*cbuf, 0x11);
1527           encode_RegMem(*cbuf,
1528                         Matcher::_regEncode[src_first],
1529                         RSP_enc, 0x4, 0, offset,
1530                         false);
1531 #ifndef PRODUCT
1532         } else if (!do_size) {
1533           st->print("movsd   [rsp + #%d], %s\t# spill",
1534                      offset,
1535                      Matcher::regName[src_first]);
1536 #endif
1537         }
1538         return
1539           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1540           ((Matcher::_regEncode[src_first] < 8)
1541            ? 5
1542            : 6); // REX
1543       } else {
1544         // 32-bit
1545         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1546         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1547         int offset = ra_->reg2offset(dst_first);
1548         if (cbuf) {
1549           emit_opcode(*cbuf, 0xF3);
1550           if (Matcher::_regEncode[src_first] >= 8) {
1551               emit_opcode(*cbuf, Assembler::REX_R);
1552           }
1553           emit_opcode(*cbuf, 0x0F);
1554           emit_opcode(*cbuf, 0x11);
1555           encode_RegMem(*cbuf,
1556                         Matcher::_regEncode[src_first],
1557                         RSP_enc, 0x4, 0, offset,
1558                         false);
1559 #ifndef PRODUCT
1560         } else if (!do_size) {
1561           st->print("movss   [rsp + #%d], %s\t# spill",
1562                      offset,
1563                      Matcher::regName[src_first]);
1564 #endif
1565         }
1566         return
1567           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1568           ((Matcher::_regEncode[src_first] < 8)
1569            ? 5
1570            : 6); // REX
1571       }
1572     } else if (dst_first_rc == rc_int) {
1573       // xmm -> gpr
1574       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1575           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1576         // 64-bit
1577         if (cbuf) {
1578           emit_opcode(*cbuf, 0x66);
1579           if (Matcher::_regEncode[dst_first] < 8) {
1580             if (Matcher::_regEncode[src_first] < 8) {
1581               emit_opcode(*cbuf, Assembler::REX_W);
1582             } else {
1583               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1584             }
1585           } else {
1586             if (Matcher::_regEncode[src_first] < 8) {
1587               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1588             } else {
1589               emit_opcode(*cbuf, Assembler::REX_WRB);
1590             }
1591           }
1592           emit_opcode(*cbuf, 0x0F);
1593           emit_opcode(*cbuf, 0x7E);
1594           emit_rm(*cbuf, 0x3,
1595                   Matcher::_regEncode[src_first] & 7,
1596                   Matcher::_regEncode[dst_first] & 7);
1597 #ifndef PRODUCT
1598         } else if (!do_size) {
1599           st->print("movdq   %s, %s\t# spill",
1600                      Matcher::regName[dst_first],
1601                      Matcher::regName[src_first]);
1602 #endif
1603         }
1604         return 5; // REX
1605       } else {
1606         // 32-bit
1607         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1608         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1609         if (cbuf) {
1610           emit_opcode(*cbuf, 0x66);
1611           if (Matcher::_regEncode[dst_first] < 8) {
1612             if (Matcher::_regEncode[src_first] >= 8) {
1613               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1614             }
1615           } else {
1616             if (Matcher::_regEncode[src_first] < 8) {
1617               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1618             } else {
1619               emit_opcode(*cbuf, Assembler::REX_RB);
1620             }
1621           }
1622           emit_opcode(*cbuf, 0x0F);
1623           emit_opcode(*cbuf, 0x7E);
1624           emit_rm(*cbuf, 0x3,
1625                   Matcher::_regEncode[src_first] & 7,
1626                   Matcher::_regEncode[dst_first] & 7);
1627 #ifndef PRODUCT
1628         } else if (!do_size) {
1629           st->print("movdl   %s, %s\t# spill",
1630                      Matcher::regName[dst_first],
1631                      Matcher::regName[src_first]);
1632 #endif
1633         }
1634         return
1635           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1636           ? 4
1637           : 5; // REX
1638       }
1639     } else if (dst_first_rc == rc_float) {
1640       // xmm -> xmm
1641       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1642           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1643         // 64-bit
1644         if (cbuf) {
1645           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1646           if (Matcher::_regEncode[dst_first] < 8) {
1647             if (Matcher::_regEncode[src_first] >= 8) {
1648               emit_opcode(*cbuf, Assembler::REX_B);
1649             }
1650           } else {
1651             if (Matcher::_regEncode[src_first] < 8) {
1652               emit_opcode(*cbuf, Assembler::REX_R);
1653             } else {
1654               emit_opcode(*cbuf, Assembler::REX_RB);
1655             }
1656           }
1657           emit_opcode(*cbuf, 0x0F);
1658           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1659           emit_rm(*cbuf, 0x3,
1660                   Matcher::_regEncode[dst_first] & 7,
1661                   Matcher::_regEncode[src_first] & 7);
1662 #ifndef PRODUCT
1663         } else if (!do_size) {
1664           st->print("%s  %s, %s\t# spill",
1665                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1666                      Matcher::regName[dst_first],
1667                      Matcher::regName[src_first]);
1668 #endif
1669         }
1670         return
1671           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1672           ? 4
1673           : 5; // REX
1674       } else {
1675         // 32-bit
1676         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1677         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1678         if (cbuf) {
1679           if (!UseXmmRegToRegMoveAll)
1680             emit_opcode(*cbuf, 0xF3);
1681           if (Matcher::_regEncode[dst_first] < 8) {
1682             if (Matcher::_regEncode[src_first] >= 8) {
1683               emit_opcode(*cbuf, Assembler::REX_B);
1684             }
1685           } else {
1686             if (Matcher::_regEncode[src_first] < 8) {
1687               emit_opcode(*cbuf, Assembler::REX_R);
1688             } else {
1689               emit_opcode(*cbuf, Assembler::REX_RB);
1690             }
1691           }
1692           emit_opcode(*cbuf, 0x0F);
1693           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1694           emit_rm(*cbuf, 0x3,
1695                   Matcher::_regEncode[dst_first] & 7,
1696                   Matcher::_regEncode[src_first] & 7);
1697 #ifndef PRODUCT
1698         } else if (!do_size) {
1699           st->print("%s  %s, %s\t# spill",
1700                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1701                      Matcher::regName[dst_first],
1702                      Matcher::regName[src_first]);
1703 #endif
1704         }
1705         return
1706           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1707           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1708           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1709       }
1710     }
1711   }
1712 
1713   assert(0," foo ");
1714   Unimplemented();
1715 
1716   return 0;
1717 }
1718 
1719 #ifndef PRODUCT
1720 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1721 {
1722   implementation(NULL, ra_, false, st);
1723 }
1724 #endif
1725 
1726 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1727 {
1728   implementation(&cbuf, ra_, false, NULL);
1729 }
1730 
1731 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1732 {
1733   return implementation(NULL, ra_, true, NULL);
1734 }
1735 
1736 //=============================================================================
1737 #ifndef PRODUCT
1738 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1739 {
1740   st->print("nop \t# %d bytes pad for loops and calls", _count);
1741 }
1742 #endif
1743 
1744 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1745 {
1746   MacroAssembler _masm(&cbuf);
1747   __ nop(_count);
1748 }
1749 
1750 uint MachNopNode::size(PhaseRegAlloc*) const
1751 {
1752   return _count;
1753 }
1754 
1755 
1756 //=============================================================================
1757 #ifndef PRODUCT
1758 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1759 {
1760   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1761   int reg = ra_->get_reg_first(this);
1762   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1763             Matcher::regName[reg], offset);
1764 }
1765 #endif
1766 
1767 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1768 {
1769   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1770   int reg = ra_->get_encode(this);
1771   if (offset >= 0x80) {
1772     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1773     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1774     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1775     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1776     emit_d32(cbuf, offset);
1777   } else {
1778     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1779     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1780     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1781     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1782     emit_d8(cbuf, offset);
1783   }
1784 }
1785 
1786 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1787 {
1788   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1789   return (offset < 0x80) ? 5 : 8; // REX
1790 }
1791 
1792 //=============================================================================
1793 
1794 // emit call stub, compiled java to interpreter
1795 void emit_java_to_interp(CodeBuffer& cbuf)
1796 {
1797   // Stub is fixed up when the corresponding call is converted from
1798   // calling compiled code to calling interpreted code.
1799   // movq rbx, 0
1800   // jmp -5 # to self
1801 
1802   address mark = cbuf.insts_mark();  // get mark within main instrs section
1803 
1804   // Note that the code buffer's insts_mark is always relative to insts.
1805   // That's why we must use the macroassembler to generate a stub.
1806   MacroAssembler _masm(&cbuf);
1807 
1808   address base =
1809   __ start_a_stub(Compile::MAX_stubs_size);
1810   if (base == NULL)  return;  // CodeBuffer::expand failed
1811   // static stub relocation stores the instruction address of the call
1812   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1813   // static stub relocation also tags the methodOop in the code-stream.
1814   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1815   // This is recognized as unresolved by relocs/nativeinst/ic code
1816   __ jump(RuntimeAddress(__ pc()));
1817 
1818   // Update current stubs pointer and restore insts_end.
1819   __ end_a_stub();
1820 }
1821 
1822 // size of call stub, compiled java to interpretor
1823 uint size_java_to_interp()
1824 {
1825   return 15;  // movq (1+1+8); jmp (1+4)
1826 }
1827 
1828 // relocation entries for call stub, compiled java to interpretor
1829 uint reloc_java_to_interp()
1830 {
1831   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1832 }
1833 
1834 //=============================================================================
1835 #ifndef PRODUCT
1836 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1837 {
1838   if (UseCompressedOops) {
1839     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1840     if (Universe::narrow_oop_shift() != 0) {
1841       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1842     }
1843     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1844   } else {
1845     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1846                  "# Inline cache check");
1847   }
1848   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1849   st->print_cr("\tnop\t# nops to align entry point");
1850 }
1851 #endif
1852 
1853 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1854 {
1855   MacroAssembler masm(&cbuf);
1856   uint insts_size = cbuf.insts_size();
1857   if (UseCompressedOops) {
1858     masm.load_klass(rscratch1, j_rarg0);
1859     masm.cmpptr(rax, rscratch1);
1860   } else {
1861     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1862   }
1863 
1864   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1865 
1866   /* WARNING these NOPs are critical so that verified entry point is properly
1867      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1868   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1869   if (OptoBreakpoint) {
1870     // Leave space for int3
1871     nops_cnt -= 1;
1872   }
1873   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1874   if (nops_cnt > 0)
1875     masm.nop(nops_cnt);
1876 }
1877 
1878 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1879 {
1880   return MachNode::size(ra_); // too many variables; just compute it
1881                               // the hard way
1882 }
1883 
1884 
1885 //=============================================================================
1886 uint size_exception_handler()
1887 {
1888   // NativeCall instruction size is the same as NativeJump.
1889   // Note that this value is also credited (in output.cpp) to
1890   // the size of the code section.
1891   return NativeJump::instruction_size;
1892 }
1893 
1894 // Emit exception handler code.
1895 int emit_exception_handler(CodeBuffer& cbuf)
1896 {
1897 
1898   // Note that the code buffer's insts_mark is always relative to insts.
1899   // That's why we must use the macroassembler to generate a handler.
1900   MacroAssembler _masm(&cbuf);
1901   address base =
1902   __ start_a_stub(size_exception_handler());
1903   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1904   int offset = __ offset();
1905   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1906   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1907   __ end_a_stub();
1908   return offset;
1909 }
1910 
1911 uint size_deopt_handler()
1912 {
1913   // three 5 byte instructions
1914   return 15;
1915 }
1916 
1917 // Emit deopt handler code.
1918 int emit_deopt_handler(CodeBuffer& cbuf)
1919 {
1920 
1921   // Note that the code buffer's insts_mark is always relative to insts.
1922   // That's why we must use the macroassembler to generate a handler.
1923   MacroAssembler _masm(&cbuf);
1924   address base =
1925   __ start_a_stub(size_deopt_handler());
1926   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1927   int offset = __ offset();
1928   address the_pc = (address) __ pc();
1929   Label next;
1930   // push a "the_pc" on the stack without destroying any registers
1931   // as they all may be live.
1932 
1933   // push address of "next"
1934   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1935   __ bind(next);
1936   // adjust it so it matches "the_pc"
1937   __ subptr(Address(rsp, 0), __ offset() - offset);
1938   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1939   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1940   __ end_a_stub();
1941   return offset;
1942 }
1943 
1944 
1945 const bool Matcher::match_rule_supported(int opcode) {
1946   if (!has_match_rule(opcode))
1947     return false;
1948 
1949   return true;  // Per default match rules are supported.
1950 }
1951 
1952 int Matcher::regnum_to_fpu_offset(int regnum)
1953 {
1954   return regnum - 32; // The FP registers are in the second chunk
1955 }
1956 
1957 // This is UltraSparc specific, true just means we have fast l2f conversion
1958 const bool Matcher::convL2FSupported(void) {
1959   return true;
1960 }
1961 
1962 // Vector width in bytes
1963 const uint Matcher::vector_width_in_bytes(void) {
1964   return 8;
1965 }
1966 
1967 // Vector ideal reg
1968 const uint Matcher::vector_ideal_reg(void) {
1969   return Op_RegD;
1970 }
1971 
1972 // Is this branch offset short enough that a short branch can be used?
1973 //
1974 // NOTE: If the platform does not provide any short branch variants, then
1975 //       this method should return false for offset 0.
1976 bool Matcher::is_short_branch_offset(int rule, int offset) {
1977   // the short version of jmpConUCF2 contains multiple branches,
1978   // making the reach slightly less
1979   if (rule == jmpConUCF2_rule)
1980     return (-126 <= offset && offset <= 125);
1981   return (-128 <= offset && offset <= 127);
1982 }
1983 
1984 const bool Matcher::isSimpleConstant64(jlong value) {
1985   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1986   //return value == (int) value;  // Cf. storeImmL and immL32.
1987 
1988   // Probably always true, even if a temp register is required.
1989   return true;
1990 }
1991 
1992 // The ecx parameter to rep stosq for the ClearArray node is in words.
1993 const bool Matcher::init_array_count_is_in_bytes = false;
1994 
1995 // Threshold size for cleararray.
1996 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1997 
1998 // Should the Matcher clone shifts on addressing modes, expecting them
1999 // to be subsumed into complex addressing expressions or compute them
2000 // into registers?  True for Intel but false for most RISCs
2001 const bool Matcher::clone_shift_expressions = true;
2002 
2003 bool Matcher::narrow_oop_use_complex_address() {
2004   assert(UseCompressedOops, "only for compressed oops code");
2005   return (LogMinObjAlignmentInBytes <= 3);
2006 }
2007 
2008 // Is it better to copy float constants, or load them directly from
2009 // memory?  Intel can load a float constant from a direct address,
2010 // requiring no extra registers.  Most RISCs will have to materialize
2011 // an address into a register first, so they would do better to copy
2012 // the constant from stack.
2013 const bool Matcher::rematerialize_float_constants = true; // XXX
2014 
2015 // If CPU can load and store mis-aligned doubles directly then no
2016 // fixup is needed.  Else we split the double into 2 integer pieces
2017 // and move it piece-by-piece.  Only happens when passing doubles into
2018 // C code as the Java calling convention forces doubles to be aligned.
2019 const bool Matcher::misaligned_doubles_ok = true;
2020 
2021 // No-op on amd64
2022 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2023 
2024 // Advertise here if the CPU requires explicit rounding operations to
2025 // implement the UseStrictFP mode.
2026 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2027 
2028 // Are floats conerted to double when stored to stack during deoptimization?
2029 // On x64 it is stored without convertion so we can use normal access.
2030 bool Matcher::float_in_double() { return false; }
2031 
2032 // Do ints take an entire long register or just half?
2033 const bool Matcher::int_in_long = true;
2034 
2035 // Return whether or not this register is ever used as an argument.
2036 // This function is used on startup to build the trampoline stubs in
2037 // generateOptoStub.  Registers not mentioned will be killed by the VM
2038 // call in the trampoline, and arguments in those registers not be
2039 // available to the callee.
2040 bool Matcher::can_be_java_arg(int reg)
2041 {
2042   return
2043     reg ==  RDI_num || reg ==  RDI_H_num ||
2044     reg ==  RSI_num || reg ==  RSI_H_num ||
2045     reg ==  RDX_num || reg ==  RDX_H_num ||
2046     reg ==  RCX_num || reg ==  RCX_H_num ||
2047     reg ==   R8_num || reg ==   R8_H_num ||
2048     reg ==   R9_num || reg ==   R9_H_num ||
2049     reg ==  R12_num || reg ==  R12_H_num ||
2050     reg == XMM0_num || reg == XMM0_H_num ||
2051     reg == XMM1_num || reg == XMM1_H_num ||
2052     reg == XMM2_num || reg == XMM2_H_num ||
2053     reg == XMM3_num || reg == XMM3_H_num ||
2054     reg == XMM4_num || reg == XMM4_H_num ||
2055     reg == XMM5_num || reg == XMM5_H_num ||
2056     reg == XMM6_num || reg == XMM6_H_num ||
2057     reg == XMM7_num || reg == XMM7_H_num;
2058 }
2059 
2060 bool Matcher::is_spillable_arg(int reg)
2061 {
2062   return can_be_java_arg(reg);
2063 }
2064 
2065 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2066   // In 64 bit mode a code which use multiply when
2067   // devisor is constant is faster than hardware
2068   // DIV instruction (it uses MulHiL).
2069   return false;
2070 }
2071 
2072 // Register for DIVI projection of divmodI
2073 RegMask Matcher::divI_proj_mask() {
2074   return INT_RAX_REG_mask;
2075 }
2076 
2077 // Register for MODI projection of divmodI
2078 RegMask Matcher::modI_proj_mask() {
2079   return INT_RDX_REG_mask;
2080 }
2081 
2082 // Register for DIVL projection of divmodL
2083 RegMask Matcher::divL_proj_mask() {
2084   return LONG_RAX_REG_mask;
2085 }
2086 
2087 // Register for MODL projection of divmodL
2088 RegMask Matcher::modL_proj_mask() {
2089   return LONG_RDX_REG_mask;
2090 }
2091 
2092 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2093   return PTR_RBP_REG_mask;
2094 }
2095 
2096 static Address build_address(int b, int i, int s, int d) {
2097   Register index = as_Register(i);
2098   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2099   if (index == rsp) {
2100     index = noreg;
2101     scale = Address::no_scale;
2102   }
2103   Address addr(as_Register(b), index, scale, d);
2104   return addr;
2105 }
2106 
2107 %}
2108 
2109 //----------ENCODING BLOCK-----------------------------------------------------
2110 // This block specifies the encoding classes used by the compiler to
2111 // output byte streams.  Encoding classes are parameterized macros
2112 // used by Machine Instruction Nodes in order to generate the bit
2113 // encoding of the instruction.  Operands specify their base encoding
2114 // interface with the interface keyword.  There are currently
2115 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2116 // COND_INTER.  REG_INTER causes an operand to generate a function
2117 // which returns its register number when queried.  CONST_INTER causes
2118 // an operand to generate a function which returns the value of the
2119 // constant when queried.  MEMORY_INTER causes an operand to generate
2120 // four functions which return the Base Register, the Index Register,
2121 // the Scale Value, and the Offset Value of the operand when queried.
2122 // COND_INTER causes an operand to generate six functions which return
2123 // the encoding code (ie - encoding bits for the instruction)
2124 // associated with each basic boolean condition for a conditional
2125 // instruction.
2126 //
2127 // Instructions specify two basic values for encoding.  Again, a
2128 // function is available to check if the constant displacement is an
2129 // oop. They use the ins_encode keyword to specify their encoding
2130 // classes (which must be a sequence of enc_class names, and their
2131 // parameters, specified in the encoding block), and they use the
2132 // opcode keyword to specify, in order, their primary, secondary, and
2133 // tertiary opcode.  Only the opcode sections which a particular
2134 // instruction needs for encoding need to be specified.
2135 encode %{
2136   // Build emit functions for each basic byte or larger field in the
2137   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2138   // from C++ code in the enc_class source block.  Emit functions will
2139   // live in the main source block for now.  In future, we can
2140   // generalize this by adding a syntax that specifies the sizes of
2141   // fields in an order, so that the adlc can build the emit functions
2142   // automagically
2143 
2144   // Emit primary opcode
2145   enc_class OpcP
2146   %{
2147     emit_opcode(cbuf, $primary);
2148   %}
2149 
2150   // Emit secondary opcode
2151   enc_class OpcS
2152   %{
2153     emit_opcode(cbuf, $secondary);
2154   %}
2155 
2156   // Emit tertiary opcode
2157   enc_class OpcT
2158   %{
2159     emit_opcode(cbuf, $tertiary);
2160   %}
2161 
2162   // Emit opcode directly
2163   enc_class Opcode(immI d8)
2164   %{
2165     emit_opcode(cbuf, $d8$$constant);
2166   %}
2167 
2168   // Emit size prefix
2169   enc_class SizePrefix
2170   %{
2171     emit_opcode(cbuf, 0x66);
2172   %}
2173 
2174   enc_class reg(rRegI reg)
2175   %{
2176     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2177   %}
2178 
2179   enc_class reg_reg(rRegI dst, rRegI src)
2180   %{
2181     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2182   %}
2183 
2184   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2185   %{
2186     emit_opcode(cbuf, $opcode$$constant);
2187     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2188   %}
2189 
2190   enc_class cmpfp_fixup()
2191   %{
2192     // jnp,s exit
2193     emit_opcode(cbuf, 0x7B);
2194     emit_d8(cbuf, 0x0A);
2195 
2196     // pushfq
2197     emit_opcode(cbuf, 0x9C);
2198 
2199     // andq $0xffffff2b, (%rsp)
2200     emit_opcode(cbuf, Assembler::REX_W);
2201     emit_opcode(cbuf, 0x81);
2202     emit_opcode(cbuf, 0x24);
2203     emit_opcode(cbuf, 0x24);
2204     emit_d32(cbuf, 0xffffff2b);
2205 
2206     // popfq
2207     emit_opcode(cbuf, 0x9D);
2208 
2209     // nop (target for branch to avoid branch to branch)
2210     emit_opcode(cbuf, 0x90);
2211   %}
2212 
2213   enc_class cmpfp3(rRegI dst)
2214   %{
2215     int dstenc = $dst$$reg;
2216 
2217     // movl $dst, -1
2218     if (dstenc >= 8) {
2219       emit_opcode(cbuf, Assembler::REX_B);
2220     }
2221     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2222     emit_d32(cbuf, -1);
2223 
2224     // jp,s done
2225     emit_opcode(cbuf, 0x7A);
2226     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2227 
2228     // jb,s done
2229     emit_opcode(cbuf, 0x72);
2230     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2231 
2232     // setne $dst
2233     if (dstenc >= 4) {
2234       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2235     }
2236     emit_opcode(cbuf, 0x0F);
2237     emit_opcode(cbuf, 0x95);
2238     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2239 
2240     // movzbl $dst, $dst
2241     if (dstenc >= 4) {
2242       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2243     }
2244     emit_opcode(cbuf, 0x0F);
2245     emit_opcode(cbuf, 0xB6);
2246     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2247   %}
2248 
2249   enc_class cdql_enc(no_rax_rdx_RegI div)
2250   %{
2251     // Full implementation of Java idiv and irem; checks for
2252     // special case as described in JVM spec., p.243 & p.271.
2253     //
2254     //         normal case                           special case
2255     //
2256     // input : rax: dividend                         min_int
2257     //         reg: divisor                          -1
2258     //
2259     // output: rax: quotient  (= rax idiv reg)       min_int
2260     //         rdx: remainder (= rax irem reg)       0
2261     //
2262     //  Code sequnce:
2263     //
2264     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2265     //    5:   75 07/08                jne    e <normal>
2266     //    7:   33 d2                   xor    %edx,%edx
2267     //  [div >= 8 -> offset + 1]
2268     //  [REX_B]
2269     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2270     //    c:   74 03/04                je     11 <done>
2271     // 000000000000000e <normal>:
2272     //    e:   99                      cltd
2273     //  [div >= 8 -> offset + 1]
2274     //  [REX_B]
2275     //    f:   f7 f9                   idiv   $div
2276     // 0000000000000011 <done>:
2277 
2278     // cmp    $0x80000000,%eax
2279     emit_opcode(cbuf, 0x3d);
2280     emit_d8(cbuf, 0x00);
2281     emit_d8(cbuf, 0x00);
2282     emit_d8(cbuf, 0x00);
2283     emit_d8(cbuf, 0x80);
2284 
2285     // jne    e <normal>
2286     emit_opcode(cbuf, 0x75);
2287     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2288 
2289     // xor    %edx,%edx
2290     emit_opcode(cbuf, 0x33);
2291     emit_d8(cbuf, 0xD2);
2292 
2293     // cmp    $0xffffffffffffffff,%ecx
2294     if ($div$$reg >= 8) {
2295       emit_opcode(cbuf, Assembler::REX_B);
2296     }
2297     emit_opcode(cbuf, 0x83);
2298     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2299     emit_d8(cbuf, 0xFF);
2300 
2301     // je     11 <done>
2302     emit_opcode(cbuf, 0x74);
2303     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2304 
2305     // <normal>
2306     // cltd
2307     emit_opcode(cbuf, 0x99);
2308 
2309     // idivl (note: must be emitted by the user of this rule)
2310     // <done>
2311   %}
2312 
2313   enc_class cdqq_enc(no_rax_rdx_RegL div)
2314   %{
2315     // Full implementation of Java ldiv and lrem; checks for
2316     // special case as described in JVM spec., p.243 & p.271.
2317     //
2318     //         normal case                           special case
2319     //
2320     // input : rax: dividend                         min_long
2321     //         reg: divisor                          -1
2322     //
2323     // output: rax: quotient  (= rax idiv reg)       min_long
2324     //         rdx: remainder (= rax irem reg)       0
2325     //
2326     //  Code sequnce:
2327     //
2328     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2329     //    7:   00 00 80
2330     //    a:   48 39 d0                cmp    %rdx,%rax
2331     //    d:   75 08                   jne    17 <normal>
2332     //    f:   33 d2                   xor    %edx,%edx
2333     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2334     //   15:   74 05                   je     1c <done>
2335     // 0000000000000017 <normal>:
2336     //   17:   48 99                   cqto
2337     //   19:   48 f7 f9                idiv   $div
2338     // 000000000000001c <done>:
2339 
2340     // mov    $0x8000000000000000,%rdx
2341     emit_opcode(cbuf, Assembler::REX_W);
2342     emit_opcode(cbuf, 0xBA);
2343     emit_d8(cbuf, 0x00);
2344     emit_d8(cbuf, 0x00);
2345     emit_d8(cbuf, 0x00);
2346     emit_d8(cbuf, 0x00);
2347     emit_d8(cbuf, 0x00);
2348     emit_d8(cbuf, 0x00);
2349     emit_d8(cbuf, 0x00);
2350     emit_d8(cbuf, 0x80);
2351 
2352     // cmp    %rdx,%rax
2353     emit_opcode(cbuf, Assembler::REX_W);
2354     emit_opcode(cbuf, 0x39);
2355     emit_d8(cbuf, 0xD0);
2356 
2357     // jne    17 <normal>
2358     emit_opcode(cbuf, 0x75);
2359     emit_d8(cbuf, 0x08);
2360 
2361     // xor    %edx,%edx
2362     emit_opcode(cbuf, 0x33);
2363     emit_d8(cbuf, 0xD2);
2364 
2365     // cmp    $0xffffffffffffffff,$div
2366     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2367     emit_opcode(cbuf, 0x83);
2368     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2369     emit_d8(cbuf, 0xFF);
2370 
2371     // je     1e <done>
2372     emit_opcode(cbuf, 0x74);
2373     emit_d8(cbuf, 0x05);
2374 
2375     // <normal>
2376     // cqto
2377     emit_opcode(cbuf, Assembler::REX_W);
2378     emit_opcode(cbuf, 0x99);
2379 
2380     // idivq (note: must be emitted by the user of this rule)
2381     // <done>
2382   %}
2383 
2384   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2385   enc_class OpcSE(immI imm)
2386   %{
2387     // Emit primary opcode and set sign-extend bit
2388     // Check for 8-bit immediate, and set sign extend bit in opcode
2389     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2390       emit_opcode(cbuf, $primary | 0x02);
2391     } else {
2392       // 32-bit immediate
2393       emit_opcode(cbuf, $primary);
2394     }
2395   %}
2396 
2397   enc_class OpcSErm(rRegI dst, immI imm)
2398   %{
2399     // OpcSEr/m
2400     int dstenc = $dst$$reg;
2401     if (dstenc >= 8) {
2402       emit_opcode(cbuf, Assembler::REX_B);
2403       dstenc -= 8;
2404     }
2405     // Emit primary opcode and set sign-extend bit
2406     // Check for 8-bit immediate, and set sign extend bit in opcode
2407     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2408       emit_opcode(cbuf, $primary | 0x02);
2409     } else {
2410       // 32-bit immediate
2411       emit_opcode(cbuf, $primary);
2412     }
2413     // Emit r/m byte with secondary opcode, after primary opcode.
2414     emit_rm(cbuf, 0x3, $secondary, dstenc);
2415   %}
2416 
2417   enc_class OpcSErm_wide(rRegL dst, immI imm)
2418   %{
2419     // OpcSEr/m
2420     int dstenc = $dst$$reg;
2421     if (dstenc < 8) {
2422       emit_opcode(cbuf, Assembler::REX_W);
2423     } else {
2424       emit_opcode(cbuf, Assembler::REX_WB);
2425       dstenc -= 8;
2426     }
2427     // Emit primary opcode and set sign-extend bit
2428     // Check for 8-bit immediate, and set sign extend bit in opcode
2429     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2430       emit_opcode(cbuf, $primary | 0x02);
2431     } else {
2432       // 32-bit immediate
2433       emit_opcode(cbuf, $primary);
2434     }
2435     // Emit r/m byte with secondary opcode, after primary opcode.
2436     emit_rm(cbuf, 0x3, $secondary, dstenc);
2437   %}
2438 
2439   enc_class Con8or32(immI imm)
2440   %{
2441     // Check for 8-bit immediate, and set sign extend bit in opcode
2442     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2443       $$$emit8$imm$$constant;
2444     } else {
2445       // 32-bit immediate
2446       $$$emit32$imm$$constant;
2447     }
2448   %}
2449 
2450   enc_class Lbl(label labl)
2451   %{
2452     // JMP, CALL
2453     Label* l = $labl$$label;
2454     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2455   %}
2456 
2457   enc_class LblShort(label labl)
2458   %{
2459     // JMP, CALL
2460     Label* l = $labl$$label;
2461     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2462     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2463     emit_d8(cbuf, disp);
2464   %}
2465 
2466   enc_class opc2_reg(rRegI dst)
2467   %{
2468     // BSWAP
2469     emit_cc(cbuf, $secondary, $dst$$reg);
2470   %}
2471 
2472   enc_class opc3_reg(rRegI dst)
2473   %{
2474     // BSWAP
2475     emit_cc(cbuf, $tertiary, $dst$$reg);
2476   %}
2477 
2478   enc_class reg_opc(rRegI div)
2479   %{
2480     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2481     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2482   %}
2483 
2484   enc_class Jcc(cmpOp cop, label labl)
2485   %{
2486     // JCC
2487     Label* l = $labl$$label;
2488     $$$emit8$primary;
2489     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2490     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2491   %}
2492 
2493   enc_class JccShort (cmpOp cop, label labl)
2494   %{
2495   // JCC
2496     Label *l = $labl$$label;
2497     emit_cc(cbuf, $primary, $cop$$cmpcode);
2498     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2499     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2500     emit_d8(cbuf, disp);
2501   %}
2502 
2503   enc_class enc_cmov(cmpOp cop)
2504   %{
2505     // CMOV
2506     $$$emit8$primary;
2507     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2508   %}
2509 
2510   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2511   %{
2512     // Invert sense of branch from sense of cmov
2513     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2514     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2515                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2516                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2517     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2518     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2519     if ($dst$$reg < 8) {
2520       if ($src$$reg >= 8) {
2521         emit_opcode(cbuf, Assembler::REX_B);
2522       }
2523     } else {
2524       if ($src$$reg < 8) {
2525         emit_opcode(cbuf, Assembler::REX_R);
2526       } else {
2527         emit_opcode(cbuf, Assembler::REX_RB);
2528       }
2529     }
2530     emit_opcode(cbuf, 0x0F);
2531     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2532     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2533   %}
2534 
2535   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2536   %{
2537     // Invert sense of branch from sense of cmov
2538     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2539     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2540 
2541     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2542     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2543     if ($dst$$reg < 8) {
2544       if ($src$$reg >= 8) {
2545         emit_opcode(cbuf, Assembler::REX_B);
2546       }
2547     } else {
2548       if ($src$$reg < 8) {
2549         emit_opcode(cbuf, Assembler::REX_R);
2550       } else {
2551         emit_opcode(cbuf, Assembler::REX_RB);
2552       }
2553     }
2554     emit_opcode(cbuf, 0x0F);
2555     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2556     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2557   %}
2558 
2559   enc_class enc_PartialSubtypeCheck()
2560   %{
2561     Register Rrdi = as_Register(RDI_enc); // result register
2562     Register Rrax = as_Register(RAX_enc); // super class
2563     Register Rrcx = as_Register(RCX_enc); // killed
2564     Register Rrsi = as_Register(RSI_enc); // sub class
2565     Label miss;
2566     const bool set_cond_codes = true;
2567 
2568     MacroAssembler _masm(&cbuf);
2569     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2570                                      NULL, &miss,
2571                                      /*set_cond_codes:*/ true);
2572     if ($primary) {
2573       __ xorptr(Rrdi, Rrdi);
2574     }
2575     __ bind(miss);
2576   %}
2577 
2578   enc_class Java_To_Interpreter(method meth)
2579   %{
2580     // CALL Java_To_Interpreter
2581     // This is the instruction starting address for relocation info.
2582     cbuf.set_insts_mark();
2583     $$$emit8$primary;
2584     // CALL directly to the runtime
2585     emit_d32_reloc(cbuf,
2586                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2587                    runtime_call_Relocation::spec(),
2588                    RELOC_DISP32);
2589   %}
2590 
2591   enc_class preserve_SP %{
2592     debug_only(int off0 = cbuf.insts_size());
2593     MacroAssembler _masm(&cbuf);
2594     // RBP is preserved across all calls, even compiled calls.
2595     // Use it to preserve RSP in places where the callee might change the SP.
2596     __ movptr(rbp_mh_SP_save, rsp);
2597     debug_only(int off1 = cbuf.insts_size());
2598     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2599   %}
2600 
2601   enc_class restore_SP %{
2602     MacroAssembler _masm(&cbuf);
2603     __ movptr(rsp, rbp_mh_SP_save);
2604   %}
2605 
2606   enc_class Java_Static_Call(method meth)
2607   %{
2608     // JAVA STATIC CALL
2609     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2610     // determine who we intended to call.
2611     cbuf.set_insts_mark();
2612     $$$emit8$primary;
2613 
2614     if (!_method) {
2615       emit_d32_reloc(cbuf,
2616                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2617                      runtime_call_Relocation::spec(),
2618                      RELOC_DISP32);
2619     } else if (_optimized_virtual) {
2620       emit_d32_reloc(cbuf,
2621                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2622                      opt_virtual_call_Relocation::spec(),
2623                      RELOC_DISP32);
2624     } else {
2625       emit_d32_reloc(cbuf,
2626                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2627                      static_call_Relocation::spec(),
2628                      RELOC_DISP32);
2629     }
2630     if (_method) {
2631       // Emit stub for static call
2632       emit_java_to_interp(cbuf);
2633     }
2634   %}
2635 
2636   enc_class Java_Dynamic_Call(method meth)
2637   %{
2638     // JAVA DYNAMIC CALL
2639     // !!!!!
2640     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2641     // emit_call_dynamic_prologue( cbuf );
2642     cbuf.set_insts_mark();
2643 
2644     // movq rax, -1
2645     emit_opcode(cbuf, Assembler::REX_W);
2646     emit_opcode(cbuf, 0xB8 | RAX_enc);
2647     emit_d64_reloc(cbuf,
2648                    (int64_t) Universe::non_oop_word(),
2649                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2650     address virtual_call_oop_addr = cbuf.insts_mark();
2651     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2652     // who we intended to call.
2653     cbuf.set_insts_mark();
2654     $$$emit8$primary;
2655     emit_d32_reloc(cbuf,
2656                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2657                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2658                    RELOC_DISP32);
2659   %}
2660 
2661   enc_class Java_Compiled_Call(method meth)
2662   %{
2663     // JAVA COMPILED CALL
2664     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2665 
2666     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2667     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2668 
2669     // callq *disp(%rax)
2670     cbuf.set_insts_mark();
2671     $$$emit8$primary;
2672     if (disp < 0x80) {
2673       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2674       emit_d8(cbuf, disp); // Displacement
2675     } else {
2676       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2677       emit_d32(cbuf, disp); // Displacement
2678     }
2679   %}
2680 
2681   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2682   %{
2683     // SAL, SAR, SHR
2684     int dstenc = $dst$$reg;
2685     if (dstenc >= 8) {
2686       emit_opcode(cbuf, Assembler::REX_B);
2687       dstenc -= 8;
2688     }
2689     $$$emit8$primary;
2690     emit_rm(cbuf, 0x3, $secondary, dstenc);
2691     $$$emit8$shift$$constant;
2692   %}
2693 
2694   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2695   %{
2696     // SAL, SAR, SHR
2697     int dstenc = $dst$$reg;
2698     if (dstenc < 8) {
2699       emit_opcode(cbuf, Assembler::REX_W);
2700     } else {
2701       emit_opcode(cbuf, Assembler::REX_WB);
2702       dstenc -= 8;
2703     }
2704     $$$emit8$primary;
2705     emit_rm(cbuf, 0x3, $secondary, dstenc);
2706     $$$emit8$shift$$constant;
2707   %}
2708 
2709   enc_class load_immI(rRegI dst, immI src)
2710   %{
2711     int dstenc = $dst$$reg;
2712     if (dstenc >= 8) {
2713       emit_opcode(cbuf, Assembler::REX_B);
2714       dstenc -= 8;
2715     }
2716     emit_opcode(cbuf, 0xB8 | dstenc);
2717     $$$emit32$src$$constant;
2718   %}
2719 
2720   enc_class load_immL(rRegL dst, immL src)
2721   %{
2722     int dstenc = $dst$$reg;
2723     if (dstenc < 8) {
2724       emit_opcode(cbuf, Assembler::REX_W);
2725     } else {
2726       emit_opcode(cbuf, Assembler::REX_WB);
2727       dstenc -= 8;
2728     }
2729     emit_opcode(cbuf, 0xB8 | dstenc);
2730     emit_d64(cbuf, $src$$constant);
2731   %}
2732 
2733   enc_class load_immUL32(rRegL dst, immUL32 src)
2734   %{
2735     // same as load_immI, but this time we care about zeroes in the high word
2736     int dstenc = $dst$$reg;
2737     if (dstenc >= 8) {
2738       emit_opcode(cbuf, Assembler::REX_B);
2739       dstenc -= 8;
2740     }
2741     emit_opcode(cbuf, 0xB8 | dstenc);
2742     $$$emit32$src$$constant;
2743   %}
2744 
2745   enc_class load_immL32(rRegL dst, immL32 src)
2746   %{
2747     int dstenc = $dst$$reg;
2748     if (dstenc < 8) {
2749       emit_opcode(cbuf, Assembler::REX_W);
2750     } else {
2751       emit_opcode(cbuf, Assembler::REX_WB);
2752       dstenc -= 8;
2753     }
2754     emit_opcode(cbuf, 0xC7);
2755     emit_rm(cbuf, 0x03, 0x00, dstenc);
2756     $$$emit32$src$$constant;
2757   %}
2758 
2759   enc_class load_immP31(rRegP dst, immP32 src)
2760   %{
2761     // same as load_immI, but this time we care about zeroes in the high word
2762     int dstenc = $dst$$reg;
2763     if (dstenc >= 8) {
2764       emit_opcode(cbuf, Assembler::REX_B);
2765       dstenc -= 8;
2766     }
2767     emit_opcode(cbuf, 0xB8 | dstenc);
2768     $$$emit32$src$$constant;
2769   %}
2770 
2771   enc_class load_immP(rRegP dst, immP src)
2772   %{
2773     int dstenc = $dst$$reg;
2774     if (dstenc < 8) {
2775       emit_opcode(cbuf, Assembler::REX_W);
2776     } else {
2777       emit_opcode(cbuf, Assembler::REX_WB);
2778       dstenc -= 8;
2779     }
2780     emit_opcode(cbuf, 0xB8 | dstenc);
2781     // This next line should be generated from ADLC
2782     if ($src->constant_is_oop()) {
2783       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2784     } else {
2785       emit_d64(cbuf, $src$$constant);
2786     }
2787   %}
2788 
2789   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2790   enc_class enc_copy(rRegI dst, rRegI src)
2791   %{
2792     encode_copy(cbuf, $dst$$reg, $src$$reg);
2793   %}
2794 
2795   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2796   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2797     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2798   %}
2799 
2800   enc_class enc_copy_always(rRegI dst, rRegI src)
2801   %{
2802     int srcenc = $src$$reg;
2803     int dstenc = $dst$$reg;
2804 
2805     if (dstenc < 8) {
2806       if (srcenc >= 8) {
2807         emit_opcode(cbuf, Assembler::REX_B);
2808         srcenc -= 8;
2809       }
2810     } else {
2811       if (srcenc < 8) {
2812         emit_opcode(cbuf, Assembler::REX_R);
2813       } else {
2814         emit_opcode(cbuf, Assembler::REX_RB);
2815         srcenc -= 8;
2816       }
2817       dstenc -= 8;
2818     }
2819 
2820     emit_opcode(cbuf, 0x8B);
2821     emit_rm(cbuf, 0x3, dstenc, srcenc);
2822   %}
2823 
2824   enc_class enc_copy_wide(rRegL dst, rRegL src)
2825   %{
2826     int srcenc = $src$$reg;
2827     int dstenc = $dst$$reg;
2828 
2829     if (dstenc != srcenc) {
2830       if (dstenc < 8) {
2831         if (srcenc < 8) {
2832           emit_opcode(cbuf, Assembler::REX_W);
2833         } else {
2834           emit_opcode(cbuf, Assembler::REX_WB);
2835           srcenc -= 8;
2836         }
2837       } else {
2838         if (srcenc < 8) {
2839           emit_opcode(cbuf, Assembler::REX_WR);
2840         } else {
2841           emit_opcode(cbuf, Assembler::REX_WRB);
2842           srcenc -= 8;
2843         }
2844         dstenc -= 8;
2845       }
2846       emit_opcode(cbuf, 0x8B);
2847       emit_rm(cbuf, 0x3, dstenc, srcenc);
2848     }
2849   %}
2850 
2851   enc_class Con32(immI src)
2852   %{
2853     // Output immediate
2854     $$$emit32$src$$constant;
2855   %}
2856 
2857   enc_class Con64(immL src)
2858   %{
2859     // Output immediate
2860     emit_d64($src$$constant);
2861   %}
2862 
2863   enc_class Con32F_as_bits(immF src)
2864   %{
2865     // Output Float immediate bits
2866     jfloat jf = $src$$constant;
2867     jint jf_as_bits = jint_cast(jf);
2868     emit_d32(cbuf, jf_as_bits);
2869   %}
2870 
2871   enc_class Con16(immI src)
2872   %{
2873     // Output immediate
2874     $$$emit16$src$$constant;
2875   %}
2876 
2877   // How is this different from Con32??? XXX
2878   enc_class Con_d32(immI src)
2879   %{
2880     emit_d32(cbuf,$src$$constant);
2881   %}
2882 
2883   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2884     // Output immediate memory reference
2885     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2886     emit_d32(cbuf, 0x00);
2887   %}
2888 
2889   enc_class lock_prefix()
2890   %{
2891     if (os::is_MP()) {
2892       emit_opcode(cbuf, 0xF0); // lock
2893     }
2894   %}
2895 
2896   enc_class REX_mem(memory mem)
2897   %{
2898     if ($mem$$base >= 8) {
2899       if ($mem$$index < 8) {
2900         emit_opcode(cbuf, Assembler::REX_B);
2901       } else {
2902         emit_opcode(cbuf, Assembler::REX_XB);
2903       }
2904     } else {
2905       if ($mem$$index >= 8) {
2906         emit_opcode(cbuf, Assembler::REX_X);
2907       }
2908     }
2909   %}
2910 
2911   enc_class REX_mem_wide(memory mem)
2912   %{
2913     if ($mem$$base >= 8) {
2914       if ($mem$$index < 8) {
2915         emit_opcode(cbuf, Assembler::REX_WB);
2916       } else {
2917         emit_opcode(cbuf, Assembler::REX_WXB);
2918       }
2919     } else {
2920       if ($mem$$index < 8) {
2921         emit_opcode(cbuf, Assembler::REX_W);
2922       } else {
2923         emit_opcode(cbuf, Assembler::REX_WX);
2924       }
2925     }
2926   %}
2927 
2928   // for byte regs
2929   enc_class REX_breg(rRegI reg)
2930   %{
2931     if ($reg$$reg >= 4) {
2932       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2933     }
2934   %}
2935 
2936   // for byte regs
2937   enc_class REX_reg_breg(rRegI dst, rRegI src)
2938   %{
2939     if ($dst$$reg < 8) {
2940       if ($src$$reg >= 4) {
2941         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2942       }
2943     } else {
2944       if ($src$$reg < 8) {
2945         emit_opcode(cbuf, Assembler::REX_R);
2946       } else {
2947         emit_opcode(cbuf, Assembler::REX_RB);
2948       }
2949     }
2950   %}
2951 
2952   // for byte regs
2953   enc_class REX_breg_mem(rRegI reg, memory mem)
2954   %{
2955     if ($reg$$reg < 8) {
2956       if ($mem$$base < 8) {
2957         if ($mem$$index >= 8) {
2958           emit_opcode(cbuf, Assembler::REX_X);
2959         } else if ($reg$$reg >= 4) {
2960           emit_opcode(cbuf, Assembler::REX);
2961         }
2962       } else {
2963         if ($mem$$index < 8) {
2964           emit_opcode(cbuf, Assembler::REX_B);
2965         } else {
2966           emit_opcode(cbuf, Assembler::REX_XB);
2967         }
2968       }
2969     } else {
2970       if ($mem$$base < 8) {
2971         if ($mem$$index < 8) {
2972           emit_opcode(cbuf, Assembler::REX_R);
2973         } else {
2974           emit_opcode(cbuf, Assembler::REX_RX);
2975         }
2976       } else {
2977         if ($mem$$index < 8) {
2978           emit_opcode(cbuf, Assembler::REX_RB);
2979         } else {
2980           emit_opcode(cbuf, Assembler::REX_RXB);
2981         }
2982       }
2983     }
2984   %}
2985 
2986   enc_class REX_reg(rRegI reg)
2987   %{
2988     if ($reg$$reg >= 8) {
2989       emit_opcode(cbuf, Assembler::REX_B);
2990     }
2991   %}
2992 
2993   enc_class REX_reg_wide(rRegI reg)
2994   %{
2995     if ($reg$$reg < 8) {
2996       emit_opcode(cbuf, Assembler::REX_W);
2997     } else {
2998       emit_opcode(cbuf, Assembler::REX_WB);
2999     }
3000   %}
3001 
3002   enc_class REX_reg_reg(rRegI dst, rRegI src)
3003   %{
3004     if ($dst$$reg < 8) {
3005       if ($src$$reg >= 8) {
3006         emit_opcode(cbuf, Assembler::REX_B);
3007       }
3008     } else {
3009       if ($src$$reg < 8) {
3010         emit_opcode(cbuf, Assembler::REX_R);
3011       } else {
3012         emit_opcode(cbuf, Assembler::REX_RB);
3013       }
3014     }
3015   %}
3016 
3017   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3018   %{
3019     if ($dst$$reg < 8) {
3020       if ($src$$reg < 8) {
3021         emit_opcode(cbuf, Assembler::REX_W);
3022       } else {
3023         emit_opcode(cbuf, Assembler::REX_WB);
3024       }
3025     } else {
3026       if ($src$$reg < 8) {
3027         emit_opcode(cbuf, Assembler::REX_WR);
3028       } else {
3029         emit_opcode(cbuf, Assembler::REX_WRB);
3030       }
3031     }
3032   %}
3033 
3034   enc_class REX_reg_mem(rRegI reg, memory mem)
3035   %{
3036     if ($reg$$reg < 8) {
3037       if ($mem$$base < 8) {
3038         if ($mem$$index >= 8) {
3039           emit_opcode(cbuf, Assembler::REX_X);
3040         }
3041       } else {
3042         if ($mem$$index < 8) {
3043           emit_opcode(cbuf, Assembler::REX_B);
3044         } else {
3045           emit_opcode(cbuf, Assembler::REX_XB);
3046         }
3047       }
3048     } else {
3049       if ($mem$$base < 8) {
3050         if ($mem$$index < 8) {
3051           emit_opcode(cbuf, Assembler::REX_R);
3052         } else {
3053           emit_opcode(cbuf, Assembler::REX_RX);
3054         }
3055       } else {
3056         if ($mem$$index < 8) {
3057           emit_opcode(cbuf, Assembler::REX_RB);
3058         } else {
3059           emit_opcode(cbuf, Assembler::REX_RXB);
3060         }
3061       }
3062     }
3063   %}
3064 
3065   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3066   %{
3067     if ($reg$$reg < 8) {
3068       if ($mem$$base < 8) {
3069         if ($mem$$index < 8) {
3070           emit_opcode(cbuf, Assembler::REX_W);
3071         } else {
3072           emit_opcode(cbuf, Assembler::REX_WX);
3073         }
3074       } else {
3075         if ($mem$$index < 8) {
3076           emit_opcode(cbuf, Assembler::REX_WB);
3077         } else {
3078           emit_opcode(cbuf, Assembler::REX_WXB);
3079         }
3080       }
3081     } else {
3082       if ($mem$$base < 8) {
3083         if ($mem$$index < 8) {
3084           emit_opcode(cbuf, Assembler::REX_WR);
3085         } else {
3086           emit_opcode(cbuf, Assembler::REX_WRX);
3087         }
3088       } else {
3089         if ($mem$$index < 8) {
3090           emit_opcode(cbuf, Assembler::REX_WRB);
3091         } else {
3092           emit_opcode(cbuf, Assembler::REX_WRXB);
3093         }
3094       }
3095     }
3096   %}
3097 
3098   enc_class reg_mem(rRegI ereg, memory mem)
3099   %{
3100     // High registers handle in encode_RegMem
3101     int reg = $ereg$$reg;
3102     int base = $mem$$base;
3103     int index = $mem$$index;
3104     int scale = $mem$$scale;
3105     int disp = $mem$$disp;
3106     bool disp_is_oop = $mem->disp_is_oop();
3107 
3108     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3109   %}
3110 
3111   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3112   %{
3113     int rm_byte_opcode = $rm_opcode$$constant;
3114 
3115     // High registers handle in encode_RegMem
3116     int base = $mem$$base;
3117     int index = $mem$$index;
3118     int scale = $mem$$scale;
3119     int displace = $mem$$disp;
3120 
3121     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3122                                             // working with static
3123                                             // globals
3124     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3125                   disp_is_oop);
3126   %}
3127 
3128   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3129   %{
3130     int reg_encoding = $dst$$reg;
3131     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3132     int index        = 0x04;            // 0x04 indicates no index
3133     int scale        = 0x00;            // 0x00 indicates no scale
3134     int displace     = $src1$$constant; // 0x00 indicates no displacement
3135     bool disp_is_oop = false;
3136     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3137                   disp_is_oop);
3138   %}
3139 
3140   enc_class neg_reg(rRegI dst)
3141   %{
3142     int dstenc = $dst$$reg;
3143     if (dstenc >= 8) {
3144       emit_opcode(cbuf, Assembler::REX_B);
3145       dstenc -= 8;
3146     }
3147     // NEG $dst
3148     emit_opcode(cbuf, 0xF7);
3149     emit_rm(cbuf, 0x3, 0x03, dstenc);
3150   %}
3151 
3152   enc_class neg_reg_wide(rRegI dst)
3153   %{
3154     int dstenc = $dst$$reg;
3155     if (dstenc < 8) {
3156       emit_opcode(cbuf, Assembler::REX_W);
3157     } else {
3158       emit_opcode(cbuf, Assembler::REX_WB);
3159       dstenc -= 8;
3160     }
3161     // NEG $dst
3162     emit_opcode(cbuf, 0xF7);
3163     emit_rm(cbuf, 0x3, 0x03, dstenc);
3164   %}
3165 
3166   enc_class setLT_reg(rRegI dst)
3167   %{
3168     int dstenc = $dst$$reg;
3169     if (dstenc >= 8) {
3170       emit_opcode(cbuf, Assembler::REX_B);
3171       dstenc -= 8;
3172     } else if (dstenc >= 4) {
3173       emit_opcode(cbuf, Assembler::REX);
3174     }
3175     // SETLT $dst
3176     emit_opcode(cbuf, 0x0F);
3177     emit_opcode(cbuf, 0x9C);
3178     emit_rm(cbuf, 0x3, 0x0, dstenc);
3179   %}
3180 
3181   enc_class setNZ_reg(rRegI dst)
3182   %{
3183     int dstenc = $dst$$reg;
3184     if (dstenc >= 8) {
3185       emit_opcode(cbuf, Assembler::REX_B);
3186       dstenc -= 8;
3187     } else if (dstenc >= 4) {
3188       emit_opcode(cbuf, Assembler::REX);
3189     }
3190     // SETNZ $dst
3191     emit_opcode(cbuf, 0x0F);
3192     emit_opcode(cbuf, 0x95);
3193     emit_rm(cbuf, 0x3, 0x0, dstenc);
3194   %}
3195 
3196   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3197                        rcx_RegI tmp)
3198   %{
3199     // cadd_cmpLT
3200 
3201     int tmpReg = $tmp$$reg;
3202 
3203     int penc = $p$$reg;
3204     int qenc = $q$$reg;
3205     int yenc = $y$$reg;
3206 
3207     // subl $p,$q
3208     if (penc < 8) {
3209       if (qenc >= 8) {
3210         emit_opcode(cbuf, Assembler::REX_B);
3211       }
3212     } else {
3213       if (qenc < 8) {
3214         emit_opcode(cbuf, Assembler::REX_R);
3215       } else {
3216         emit_opcode(cbuf, Assembler::REX_RB);
3217       }
3218     }
3219     emit_opcode(cbuf, 0x2B);
3220     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3221 
3222     // sbbl $tmp, $tmp
3223     emit_opcode(cbuf, 0x1B);
3224     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3225 
3226     // andl $tmp, $y
3227     if (yenc >= 8) {
3228       emit_opcode(cbuf, Assembler::REX_B);
3229     }
3230     emit_opcode(cbuf, 0x23);
3231     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3232 
3233     // addl $p,$tmp
3234     if (penc >= 8) {
3235         emit_opcode(cbuf, Assembler::REX_R);
3236     }
3237     emit_opcode(cbuf, 0x03);
3238     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3239   %}
3240 
3241   // Compare the lonogs and set -1, 0, or 1 into dst
3242   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3243   %{
3244     int src1enc = $src1$$reg;
3245     int src2enc = $src2$$reg;
3246     int dstenc = $dst$$reg;
3247 
3248     // cmpq $src1, $src2
3249     if (src1enc < 8) {
3250       if (src2enc < 8) {
3251         emit_opcode(cbuf, Assembler::REX_W);
3252       } else {
3253         emit_opcode(cbuf, Assembler::REX_WB);
3254       }
3255     } else {
3256       if (src2enc < 8) {
3257         emit_opcode(cbuf, Assembler::REX_WR);
3258       } else {
3259         emit_opcode(cbuf, Assembler::REX_WRB);
3260       }
3261     }
3262     emit_opcode(cbuf, 0x3B);
3263     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3264 
3265     // movl $dst, -1
3266     if (dstenc >= 8) {
3267       emit_opcode(cbuf, Assembler::REX_B);
3268     }
3269     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3270     emit_d32(cbuf, -1);
3271 
3272     // jl,s done
3273     emit_opcode(cbuf, 0x7C);
3274     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3275 
3276     // setne $dst
3277     if (dstenc >= 4) {
3278       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3279     }
3280     emit_opcode(cbuf, 0x0F);
3281     emit_opcode(cbuf, 0x95);
3282     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3283 
3284     // movzbl $dst, $dst
3285     if (dstenc >= 4) {
3286       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3287     }
3288     emit_opcode(cbuf, 0x0F);
3289     emit_opcode(cbuf, 0xB6);
3290     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3291   %}
3292 
3293   enc_class Push_ResultXD(regD dst) %{
3294     int dstenc = $dst$$reg;
3295 
3296     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3297 
3298     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3299     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3300     if (dstenc >= 8) {
3301       emit_opcode(cbuf, Assembler::REX_R);
3302     }
3303     emit_opcode  (cbuf, 0x0F );
3304     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3305     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3306 
3307     // add rsp,8
3308     emit_opcode(cbuf, Assembler::REX_W);
3309     emit_opcode(cbuf,0x83);
3310     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3311     emit_d8(cbuf,0x08);
3312   %}
3313 
3314   enc_class Push_SrcXD(regD src) %{
3315     int srcenc = $src$$reg;
3316 
3317     // subq rsp,#8
3318     emit_opcode(cbuf, Assembler::REX_W);
3319     emit_opcode(cbuf, 0x83);
3320     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3321     emit_d8(cbuf, 0x8);
3322 
3323     // movsd [rsp],src
3324     emit_opcode(cbuf, 0xF2);
3325     if (srcenc >= 8) {
3326       emit_opcode(cbuf, Assembler::REX_R);
3327     }
3328     emit_opcode(cbuf, 0x0F);
3329     emit_opcode(cbuf, 0x11);
3330     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3331 
3332     // fldd [rsp]
3333     emit_opcode(cbuf, 0x66);
3334     emit_opcode(cbuf, 0xDD);
3335     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3336   %}
3337 
3338 
3339   enc_class movq_ld(regD dst, memory mem) %{
3340     MacroAssembler _masm(&cbuf);
3341     __ movq($dst$$XMMRegister, $mem$$Address);
3342   %}
3343 
3344   enc_class movq_st(memory mem, regD src) %{
3345     MacroAssembler _masm(&cbuf);
3346     __ movq($mem$$Address, $src$$XMMRegister);
3347   %}
3348 
3349   enc_class pshufd_8x8(regF dst, regF src) %{
3350     MacroAssembler _masm(&cbuf);
3351 
3352     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3353     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3354     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3355   %}
3356 
3357   enc_class pshufd_4x16(regF dst, regF src) %{
3358     MacroAssembler _masm(&cbuf);
3359 
3360     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3361   %}
3362 
3363   enc_class pshufd(regD dst, regD src, int mode) %{
3364     MacroAssembler _masm(&cbuf);
3365 
3366     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3367   %}
3368 
3369   enc_class pxor(regD dst, regD src) %{
3370     MacroAssembler _masm(&cbuf);
3371 
3372     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3373   %}
3374 
3375   enc_class mov_i2x(regD dst, rRegI src) %{
3376     MacroAssembler _masm(&cbuf);
3377 
3378     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3379   %}
3380 
3381   // obj: object to lock
3382   // box: box address (header location) -- killed
3383   // tmp: rax -- killed
3384   // scr: rbx -- killed
3385   //
3386   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3387   // from i486.ad.  See that file for comments.
3388   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3389   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3390 
3391 
3392   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3393   %{
3394     Register objReg = as_Register((int)$obj$$reg);
3395     Register boxReg = as_Register((int)$box$$reg);
3396     Register tmpReg = as_Register($tmp$$reg);
3397     Register scrReg = as_Register($scr$$reg);
3398     MacroAssembler masm(&cbuf);
3399 
3400     // Verify uniqueness of register assignments -- necessary but not sufficient
3401     assert (objReg != boxReg && objReg != tmpReg &&
3402             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3403 
3404     if (_counters != NULL) {
3405       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3406     }
3407     if (EmitSync & 1) {
3408         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3409         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3410         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3411     } else
3412     if (EmitSync & 2) {
3413         Label DONE_LABEL;
3414         if (UseBiasedLocking) {
3415            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3416           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3417         }
3418         // QQQ was movl...
3419         masm.movptr(tmpReg, 0x1);
3420         masm.orptr(tmpReg, Address(objReg, 0));
3421         masm.movptr(Address(boxReg, 0), tmpReg);
3422         if (os::is_MP()) {
3423           masm.lock();
3424         }
3425         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3426         masm.jcc(Assembler::equal, DONE_LABEL);
3427 
3428         // Recursive locking
3429         masm.subptr(tmpReg, rsp);
3430         masm.andptr(tmpReg, 7 - os::vm_page_size());
3431         masm.movptr(Address(boxReg, 0), tmpReg);
3432 
3433         masm.bind(DONE_LABEL);
3434         masm.nop(); // avoid branch to branch
3435     } else {
3436         Label DONE_LABEL, IsInflated, Egress;
3437 
3438         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3439         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3440         masm.jcc   (Assembler::notZero, IsInflated) ; 
3441          
3442         // it's stack-locked, biased or neutral
3443         // TODO: optimize markword triage order to reduce the number of
3444         // conditional branches in the most common cases.
3445         // Beware -- there's a subtle invariant that fetch of the markword
3446         // at [FETCH], below, will never observe a biased encoding (*101b).
3447         // If this invariant is not held we'll suffer exclusion (safety) failure.
3448 
3449         if (UseBiasedLocking && !UseOptoBiasInlining) {
3450           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3451           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3452         }
3453 
3454         // was q will it destroy high?
3455         masm.orl   (tmpReg, 1) ; 
3456         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3457         if (os::is_MP()) { masm.lock(); } 
3458         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3459         if (_counters != NULL) {
3460            masm.cond_inc32(Assembler::equal,
3461                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3462         }
3463         masm.jcc   (Assembler::equal, DONE_LABEL);
3464 
3465         // Recursive locking
3466         masm.subptr(tmpReg, rsp);
3467         masm.andptr(tmpReg, 7 - os::vm_page_size());
3468         masm.movptr(Address(boxReg, 0), tmpReg);
3469         if (_counters != NULL) {
3470            masm.cond_inc32(Assembler::equal,
3471                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3472         }
3473         masm.jmp   (DONE_LABEL) ;
3474 
3475         masm.bind  (IsInflated) ;
3476         // It's inflated
3477 
3478         // TODO: someday avoid the ST-before-CAS penalty by
3479         // relocating (deferring) the following ST.
3480         // We should also think about trying a CAS without having
3481         // fetched _owner.  If the CAS is successful we may
3482         // avoid an RTO->RTS upgrade on the $line.
3483         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3484         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3485 
3486         masm.mov    (boxReg, tmpReg) ; 
3487         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3488         masm.testptr(tmpReg, tmpReg) ;   
3489         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3490 
3491         // It's inflated and appears unlocked
3492         if (os::is_MP()) { masm.lock(); } 
3493         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3494         // Intentional fall-through into DONE_LABEL ...
3495 
3496         masm.bind  (DONE_LABEL) ;
3497         masm.nop   () ;                 // avoid jmp to jmp
3498     }
3499   %}
3500 
3501   // obj: object to unlock
3502   // box: box address (displaced header location), killed
3503   // RBX: killed tmp; cannot be obj nor box
3504   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3505   %{
3506 
3507     Register objReg = as_Register($obj$$reg);
3508     Register boxReg = as_Register($box$$reg);
3509     Register tmpReg = as_Register($tmp$$reg);
3510     MacroAssembler masm(&cbuf);
3511 
3512     if (EmitSync & 4) { 
3513        masm.cmpptr(rsp, 0) ; 
3514     } else
3515     if (EmitSync & 8) {
3516        Label DONE_LABEL;
3517        if (UseBiasedLocking) {
3518          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3519        }
3520 
3521        // Check whether the displaced header is 0
3522        //(=> recursive unlock)
3523        masm.movptr(tmpReg, Address(boxReg, 0));
3524        masm.testptr(tmpReg, tmpReg);
3525        masm.jcc(Assembler::zero, DONE_LABEL);
3526 
3527        // If not recursive lock, reset the header to displaced header
3528        if (os::is_MP()) {
3529          masm.lock();
3530        }
3531        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3532        masm.bind(DONE_LABEL);
3533        masm.nop(); // avoid branch to branch
3534     } else {
3535        Label DONE_LABEL, Stacked, CheckSucc ;
3536 
3537        if (UseBiasedLocking && !UseOptoBiasInlining) {
3538          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3539        }
3540         
3541        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3542        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3543        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3544        masm.testl (tmpReg, 0x02) ; 
3545        masm.jcc   (Assembler::zero, Stacked) ; 
3546         
3547        // It's inflated
3548        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3549        masm.xorptr(boxReg, r15_thread) ; 
3550        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3551        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3552        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3553        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3554        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3555        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3556        masm.jmp   (DONE_LABEL) ; 
3557         
3558        if ((EmitSync & 65536) == 0) { 
3559          Label LSuccess, LGoSlowPath ;
3560          masm.bind  (CheckSucc) ;
3561          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3562          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3563 
3564          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3565          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3566          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3567          // are all faster when the write buffer is populated.
3568          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3569          if (os::is_MP()) {
3570             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3571          }
3572          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3573          masm.jcc   (Assembler::notZero, LSuccess) ;
3574 
3575          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3576          if (os::is_MP()) { masm.lock(); }
3577          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3578          masm.jcc   (Assembler::notEqual, LSuccess) ;
3579          // Intentional fall-through into slow-path
3580 
3581          masm.bind  (LGoSlowPath) ;
3582          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3583          masm.jmp   (DONE_LABEL) ;
3584 
3585          masm.bind  (LSuccess) ;
3586          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3587          masm.jmp   (DONE_LABEL) ;
3588        }
3589 
3590        masm.bind  (Stacked) ; 
3591        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3592        if (os::is_MP()) { masm.lock(); } 
3593        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3594 
3595        if (EmitSync & 65536) {
3596           masm.bind (CheckSucc) ;
3597        }
3598        masm.bind(DONE_LABEL);
3599        if (EmitSync & 32768) {
3600           masm.nop();                      // avoid branch to branch
3601        }
3602     }
3603   %}
3604 
3605 
3606   enc_class enc_rethrow()
3607   %{
3608     cbuf.set_insts_mark();
3609     emit_opcode(cbuf, 0xE9); // jmp entry
3610     emit_d32_reloc(cbuf,
3611                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3612                    runtime_call_Relocation::spec(),
3613                    RELOC_DISP32);
3614   %}
3615 
3616   enc_class absF_encoding(regF dst)
3617   %{
3618     int dstenc = $dst$$reg;
3619     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3620 
3621     cbuf.set_insts_mark();
3622     if (dstenc >= 8) {
3623       emit_opcode(cbuf, Assembler::REX_R);
3624       dstenc -= 8;
3625     }
3626     // XXX reg_mem doesn't support RIP-relative addressing yet
3627     emit_opcode(cbuf, 0x0F);
3628     emit_opcode(cbuf, 0x54);
3629     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3630     emit_d32_reloc(cbuf, signmask_address);
3631   %}
3632 
3633   enc_class absD_encoding(regD dst)
3634   %{
3635     int dstenc = $dst$$reg;
3636     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3637 
3638     cbuf.set_insts_mark();
3639     emit_opcode(cbuf, 0x66);
3640     if (dstenc >= 8) {
3641       emit_opcode(cbuf, Assembler::REX_R);
3642       dstenc -= 8;
3643     }
3644     // XXX reg_mem doesn't support RIP-relative addressing yet
3645     emit_opcode(cbuf, 0x0F);
3646     emit_opcode(cbuf, 0x54);
3647     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3648     emit_d32_reloc(cbuf, signmask_address);
3649   %}
3650 
3651   enc_class negF_encoding(regF dst)
3652   %{
3653     int dstenc = $dst$$reg;
3654     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3655 
3656     cbuf.set_insts_mark();
3657     if (dstenc >= 8) {
3658       emit_opcode(cbuf, Assembler::REX_R);
3659       dstenc -= 8;
3660     }
3661     // XXX reg_mem doesn't support RIP-relative addressing yet
3662     emit_opcode(cbuf, 0x0F);
3663     emit_opcode(cbuf, 0x57);
3664     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3665     emit_d32_reloc(cbuf, signflip_address);
3666   %}
3667 
3668   enc_class negD_encoding(regD dst)
3669   %{
3670     int dstenc = $dst$$reg;
3671     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3672 
3673     cbuf.set_insts_mark();
3674     emit_opcode(cbuf, 0x66);
3675     if (dstenc >= 8) {
3676       emit_opcode(cbuf, Assembler::REX_R);
3677       dstenc -= 8;
3678     }
3679     // XXX reg_mem doesn't support RIP-relative addressing yet
3680     emit_opcode(cbuf, 0x0F);
3681     emit_opcode(cbuf, 0x57);
3682     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3683     emit_d32_reloc(cbuf, signflip_address);
3684   %}
3685 
3686   enc_class f2i_fixup(rRegI dst, regF src)
3687   %{
3688     int dstenc = $dst$$reg;
3689     int srcenc = $src$$reg;
3690 
3691     // cmpl $dst, #0x80000000
3692     if (dstenc >= 8) {
3693       emit_opcode(cbuf, Assembler::REX_B);
3694     }
3695     emit_opcode(cbuf, 0x81);
3696     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3697     emit_d32(cbuf, 0x80000000);
3698 
3699     // jne,s done
3700     emit_opcode(cbuf, 0x75);
3701     if (srcenc < 8 && dstenc < 8) {
3702       emit_d8(cbuf, 0xF);
3703     } else if (srcenc >= 8 && dstenc >= 8) {
3704       emit_d8(cbuf, 0x11);
3705     } else {
3706       emit_d8(cbuf, 0x10);
3707     }
3708 
3709     // subq rsp, #8
3710     emit_opcode(cbuf, Assembler::REX_W);
3711     emit_opcode(cbuf, 0x83);
3712     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3713     emit_d8(cbuf, 8);
3714 
3715     // movss [rsp], $src
3716     emit_opcode(cbuf, 0xF3);
3717     if (srcenc >= 8) {
3718       emit_opcode(cbuf, Assembler::REX_R);
3719     }
3720     emit_opcode(cbuf, 0x0F);
3721     emit_opcode(cbuf, 0x11);
3722     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3723 
3724     // call f2i_fixup
3725     cbuf.set_insts_mark();
3726     emit_opcode(cbuf, 0xE8);
3727     emit_d32_reloc(cbuf,
3728                    (int)
3729                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3730                    runtime_call_Relocation::spec(),
3731                    RELOC_DISP32);
3732 
3733     // popq $dst
3734     if (dstenc >= 8) {
3735       emit_opcode(cbuf, Assembler::REX_B);
3736     }
3737     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3738 
3739     // done:
3740   %}
3741 
3742   enc_class f2l_fixup(rRegL dst, regF src)
3743   %{
3744     int dstenc = $dst$$reg;
3745     int srcenc = $src$$reg;
3746     address const_address = (address) StubRoutines::x86::double_sign_flip();
3747 
3748     // cmpq $dst, [0x8000000000000000]
3749     cbuf.set_insts_mark();
3750     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3751     emit_opcode(cbuf, 0x39);
3752     // XXX reg_mem doesn't support RIP-relative addressing yet
3753     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3754     emit_d32_reloc(cbuf, const_address);
3755 
3756 
3757     // jne,s done
3758     emit_opcode(cbuf, 0x75);
3759     if (srcenc < 8 && dstenc < 8) {
3760       emit_d8(cbuf, 0xF);
3761     } else if (srcenc >= 8 && dstenc >= 8) {
3762       emit_d8(cbuf, 0x11);
3763     } else {
3764       emit_d8(cbuf, 0x10);
3765     }
3766 
3767     // subq rsp, #8
3768     emit_opcode(cbuf, Assembler::REX_W);
3769     emit_opcode(cbuf, 0x83);
3770     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3771     emit_d8(cbuf, 8);
3772 
3773     // movss [rsp], $src
3774     emit_opcode(cbuf, 0xF3);
3775     if (srcenc >= 8) {
3776       emit_opcode(cbuf, Assembler::REX_R);
3777     }
3778     emit_opcode(cbuf, 0x0F);
3779     emit_opcode(cbuf, 0x11);
3780     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3781 
3782     // call f2l_fixup
3783     cbuf.set_insts_mark();
3784     emit_opcode(cbuf, 0xE8);
3785     emit_d32_reloc(cbuf,
3786                    (int)
3787                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3788                    runtime_call_Relocation::spec(),
3789                    RELOC_DISP32);
3790 
3791     // popq $dst
3792     if (dstenc >= 8) {
3793       emit_opcode(cbuf, Assembler::REX_B);
3794     }
3795     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3796 
3797     // done:
3798   %}
3799 
3800   enc_class d2i_fixup(rRegI dst, regD src)
3801   %{
3802     int dstenc = $dst$$reg;
3803     int srcenc = $src$$reg;
3804 
3805     // cmpl $dst, #0x80000000
3806     if (dstenc >= 8) {
3807       emit_opcode(cbuf, Assembler::REX_B);
3808     }
3809     emit_opcode(cbuf, 0x81);
3810     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3811     emit_d32(cbuf, 0x80000000);
3812 
3813     // jne,s done
3814     emit_opcode(cbuf, 0x75);
3815     if (srcenc < 8 && dstenc < 8) {
3816       emit_d8(cbuf, 0xF);
3817     } else if (srcenc >= 8 && dstenc >= 8) {
3818       emit_d8(cbuf, 0x11);
3819     } else {
3820       emit_d8(cbuf, 0x10);
3821     }
3822 
3823     // subq rsp, #8
3824     emit_opcode(cbuf, Assembler::REX_W);
3825     emit_opcode(cbuf, 0x83);
3826     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3827     emit_d8(cbuf, 8);
3828 
3829     // movsd [rsp], $src
3830     emit_opcode(cbuf, 0xF2);
3831     if (srcenc >= 8) {
3832       emit_opcode(cbuf, Assembler::REX_R);
3833     }
3834     emit_opcode(cbuf, 0x0F);
3835     emit_opcode(cbuf, 0x11);
3836     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3837 
3838     // call d2i_fixup
3839     cbuf.set_insts_mark();
3840     emit_opcode(cbuf, 0xE8);
3841     emit_d32_reloc(cbuf,
3842                    (int)
3843                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3844                    runtime_call_Relocation::spec(),
3845                    RELOC_DISP32);
3846 
3847     // popq $dst
3848     if (dstenc >= 8) {
3849       emit_opcode(cbuf, Assembler::REX_B);
3850     }
3851     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3852 
3853     // done:
3854   %}
3855 
3856   enc_class d2l_fixup(rRegL dst, regD src)
3857   %{
3858     int dstenc = $dst$$reg;
3859     int srcenc = $src$$reg;
3860     address const_address = (address) StubRoutines::x86::double_sign_flip();
3861 
3862     // cmpq $dst, [0x8000000000000000]
3863     cbuf.set_insts_mark();
3864     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3865     emit_opcode(cbuf, 0x39);
3866     // XXX reg_mem doesn't support RIP-relative addressing yet
3867     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3868     emit_d32_reloc(cbuf, const_address);
3869 
3870 
3871     // jne,s done
3872     emit_opcode(cbuf, 0x75);
3873     if (srcenc < 8 && dstenc < 8) {
3874       emit_d8(cbuf, 0xF);
3875     } else if (srcenc >= 8 && dstenc >= 8) {
3876       emit_d8(cbuf, 0x11);
3877     } else {
3878       emit_d8(cbuf, 0x10);
3879     }
3880 
3881     // subq rsp, #8
3882     emit_opcode(cbuf, Assembler::REX_W);
3883     emit_opcode(cbuf, 0x83);
3884     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3885     emit_d8(cbuf, 8);
3886 
3887     // movsd [rsp], $src
3888     emit_opcode(cbuf, 0xF2);
3889     if (srcenc >= 8) {
3890       emit_opcode(cbuf, Assembler::REX_R);
3891     }
3892     emit_opcode(cbuf, 0x0F);
3893     emit_opcode(cbuf, 0x11);
3894     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3895 
3896     // call d2l_fixup
3897     cbuf.set_insts_mark();
3898     emit_opcode(cbuf, 0xE8);
3899     emit_d32_reloc(cbuf,
3900                    (int)
3901                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3902                    runtime_call_Relocation::spec(),
3903                    RELOC_DISP32);
3904 
3905     // popq $dst
3906     if (dstenc >= 8) {
3907       emit_opcode(cbuf, Assembler::REX_B);
3908     }
3909     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3910 
3911     // done:
3912   %}
3913 
3914   // Safepoint Poll.  This polls the safepoint page, and causes an
3915   // exception if it is not readable. Unfortunately, it kills
3916   // RFLAGS in the process.
3917   enc_class enc_safepoint_poll
3918   %{
3919     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
3920     // XXX reg_mem doesn't support RIP-relative addressing yet
3921     cbuf.set_insts_mark();
3922     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); // XXX
3923     emit_opcode(cbuf, 0x85); // testl
3924     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
3925     // cbuf.insts_mark() is beginning of instruction
3926     emit_d32_reloc(cbuf, os::get_polling_page());
3927 //                    relocInfo::poll_type,
3928   %}
3929 %}
3930 
3931 
3932 
3933 //----------FRAME--------------------------------------------------------------
3934 // Definition of frame structure and management information.
3935 //
3936 //  S T A C K   L A Y O U T    Allocators stack-slot number
3937 //                             |   (to get allocators register number
3938 //  G  Owned by    |        |  v    add OptoReg::stack0())
3939 //  r   CALLER     |        |
3940 //  o     |        +--------+      pad to even-align allocators stack-slot
3941 //  w     V        |  pad0  |        numbers; owned by CALLER
3942 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3943 //  h     ^        |   in   |  5
3944 //        |        |  args  |  4   Holes in incoming args owned by SELF
3945 //  |     |        |        |  3
3946 //  |     |        +--------+
3947 //  V     |        | old out|      Empty on Intel, window on Sparc
3948 //        |    old |preserve|      Must be even aligned.
3949 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3950 //        |        |   in   |  3   area for Intel ret address
3951 //     Owned by    |preserve|      Empty on Sparc.
3952 //       SELF      +--------+
3953 //        |        |  pad2  |  2   pad to align old SP
3954 //        |        +--------+  1
3955 //        |        | locks  |  0
3956 //        |        +--------+----> OptoReg::stack0(), even aligned
3957 //        |        |  pad1  | 11   pad to align new SP
3958 //        |        +--------+
3959 //        |        |        | 10
3960 //        |        | spills |  9   spills
3961 //        V        |        |  8   (pad0 slot for callee)
3962 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3963 //        ^        |  out   |  7
3964 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3965 //     Owned by    +--------+
3966 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3967 //        |    new |preserve|      Must be even-aligned.
3968 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3969 //        |        |        |
3970 //
3971 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3972 //         known from SELF's arguments and the Java calling convention.
3973 //         Region 6-7 is determined per call site.
3974 // Note 2: If the calling convention leaves holes in the incoming argument
3975 //         area, those holes are owned by SELF.  Holes in the outgoing area
3976 //         are owned by the CALLEE.  Holes should not be nessecary in the
3977 //         incoming area, as the Java calling convention is completely under
3978 //         the control of the AD file.  Doubles can be sorted and packed to
3979 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3980 //         varargs C calling conventions.
3981 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3982 //         even aligned with pad0 as needed.
3983 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3984 //         region 6-11 is even aligned; it may be padded out more so that
3985 //         the region from SP to FP meets the minimum stack alignment.
3986 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3987 //         alignment.  Region 11, pad1, may be dynamically extended so that
3988 //         SP meets the minimum alignment.
3989 
3990 frame
3991 %{
3992   // What direction does stack grow in (assumed to be same for C & Java)
3993   stack_direction(TOWARDS_LOW);
3994 
3995   // These three registers define part of the calling convention
3996   // between compiled code and the interpreter.
3997   inline_cache_reg(RAX);                // Inline Cache Register
3998   interpreter_method_oop_reg(RBX);      // Method Oop Register when
3999                                         // calling interpreter
4000 
4001   // Optional: name the operand used by cisc-spilling to access
4002   // [stack_pointer + offset]
4003   cisc_spilling_operand_name(indOffset32);
4004 
4005   // Number of stack slots consumed by locking an object
4006   sync_stack_slots(2);
4007 
4008   // Compiled code's Frame Pointer
4009   frame_pointer(RSP);
4010 
4011   // Interpreter stores its frame pointer in a register which is
4012   // stored to the stack by I2CAdaptors.
4013   // I2CAdaptors convert from interpreted java to compiled java.
4014   interpreter_frame_pointer(RBP);
4015 
4016   // Stack alignment requirement
4017   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4018 
4019   // Number of stack slots between incoming argument block and the start of
4020   // a new frame.  The PROLOG must add this many slots to the stack.  The
4021   // EPILOG must remove this many slots.  amd64 needs two slots for
4022   // return address.
4023   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4024 
4025   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4026   // for calls to C.  Supports the var-args backing area for register parms.
4027   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4028 
4029   // The after-PROLOG location of the return address.  Location of
4030   // return address specifies a type (REG or STACK) and a number
4031   // representing the register number (i.e. - use a register name) or
4032   // stack slot.
4033   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4034   // Otherwise, it is above the locks and verification slot and alignment word
4035   return_addr(STACK - 2 +
4036               round_to(2 + 2 * VerifyStackAtCalls +
4037                        Compile::current()->fixed_slots(),
4038                        WordsPerLong * 2));
4039 
4040   // Body of function which returns an integer array locating
4041   // arguments either in registers or in stack slots.  Passed an array
4042   // of ideal registers called "sig" and a "length" count.  Stack-slot
4043   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4044   // arguments for a CALLEE.  Incoming stack arguments are
4045   // automatically biased by the preserve_stack_slots field above.
4046 
4047   calling_convention
4048   %{
4049     // No difference between ingoing/outgoing just pass false
4050     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4051   %}
4052 
4053   c_calling_convention
4054   %{
4055     // This is obviously always outgoing
4056     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4057   %}
4058 
4059   // Location of compiled Java return values.  Same as C for now.
4060   return_value
4061   %{
4062     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4063            "only return normal values");
4064 
4065     static const int lo[Op_RegL + 1] = {
4066       0,
4067       0,
4068       RAX_num,  // Op_RegN
4069       RAX_num,  // Op_RegI
4070       RAX_num,  // Op_RegP
4071       XMM0_num, // Op_RegF
4072       XMM0_num, // Op_RegD
4073       RAX_num   // Op_RegL
4074     };
4075     static const int hi[Op_RegL + 1] = {
4076       0,
4077       0,
4078       OptoReg::Bad, // Op_RegN
4079       OptoReg::Bad, // Op_RegI
4080       RAX_H_num,    // Op_RegP
4081       OptoReg::Bad, // Op_RegF
4082       XMM0_H_num,   // Op_RegD
4083       RAX_H_num     // Op_RegL
4084     };
4085     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4086     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4087   %}
4088 %}
4089 
4090 //----------ATTRIBUTES---------------------------------------------------------
4091 //----------Operand Attributes-------------------------------------------------
4092 op_attrib op_cost(0);        // Required cost attribute
4093 
4094 //----------Instruction Attributes---------------------------------------------
4095 ins_attrib ins_cost(100);       // Required cost attribute
4096 ins_attrib ins_size(8);         // Required size attribute (in bits)
4097 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4098 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4099                                 // a non-matching short branch variant
4100                                 // of some long branch?
4101 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4102                                 // be a power of 2) specifies the
4103                                 // alignment that some part of the
4104                                 // instruction (not necessarily the
4105                                 // start) requires.  If > 1, a
4106                                 // compute_padding() function must be
4107                                 // provided for the instruction
4108 
4109 //----------OPERANDS-----------------------------------------------------------
4110 // Operand definitions must precede instruction definitions for correct parsing
4111 // in the ADLC because operands constitute user defined types which are used in
4112 // instruction definitions.
4113 
4114 //----------Simple Operands----------------------------------------------------
4115 // Immediate Operands
4116 // Integer Immediate
4117 operand immI()
4118 %{
4119   match(ConI);
4120 
4121   op_cost(10);
4122   format %{ %}
4123   interface(CONST_INTER);
4124 %}
4125 
4126 // Constant for test vs zero
4127 operand immI0()
4128 %{
4129   predicate(n->get_int() == 0);
4130   match(ConI);
4131 
4132   op_cost(0);
4133   format %{ %}
4134   interface(CONST_INTER);
4135 %}
4136 
4137 // Constant for increment
4138 operand immI1()
4139 %{
4140   predicate(n->get_int() == 1);
4141   match(ConI);
4142 
4143   op_cost(0);
4144   format %{ %}
4145   interface(CONST_INTER);
4146 %}
4147 
4148 // Constant for decrement
4149 operand immI_M1()
4150 %{
4151   predicate(n->get_int() == -1);
4152   match(ConI);
4153 
4154   op_cost(0);
4155   format %{ %}
4156   interface(CONST_INTER);
4157 %}
4158 
4159 // Valid scale values for addressing modes
4160 operand immI2()
4161 %{
4162   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4163   match(ConI);
4164 
4165   format %{ %}
4166   interface(CONST_INTER);
4167 %}
4168 
4169 operand immI8()
4170 %{
4171   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4172   match(ConI);
4173 
4174   op_cost(5);
4175   format %{ %}
4176   interface(CONST_INTER);
4177 %}
4178 
4179 operand immI16()
4180 %{
4181   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4182   match(ConI);
4183 
4184   op_cost(10);
4185   format %{ %}
4186   interface(CONST_INTER);
4187 %}
4188 
4189 // Constant for long shifts
4190 operand immI_32()
4191 %{
4192   predicate( n->get_int() == 32 );
4193   match(ConI);
4194 
4195   op_cost(0);
4196   format %{ %}
4197   interface(CONST_INTER);
4198 %}
4199 
4200 // Constant for long shifts
4201 operand immI_64()
4202 %{
4203   predicate( n->get_int() == 64 );
4204   match(ConI);
4205 
4206   op_cost(0);
4207   format %{ %}
4208   interface(CONST_INTER);
4209 %}
4210 
4211 // Pointer Immediate
4212 operand immP()
4213 %{
4214   match(ConP);
4215 
4216   op_cost(10);
4217   format %{ %}
4218   interface(CONST_INTER);
4219 %}
4220 
4221 // NULL Pointer Immediate
4222 operand immP0()
4223 %{
4224   predicate(n->get_ptr() == 0);
4225   match(ConP);
4226 
4227   op_cost(5);
4228   format %{ %}
4229   interface(CONST_INTER);
4230 %}
4231 
4232 // Pointer Immediate
4233 operand immN() %{
4234   match(ConN);
4235 
4236   op_cost(10);
4237   format %{ %}
4238   interface(CONST_INTER);
4239 %}
4240 
4241 // NULL Pointer Immediate
4242 operand immN0() %{
4243   predicate(n->get_narrowcon() == 0);
4244   match(ConN);
4245 
4246   op_cost(5);
4247   format %{ %}
4248   interface(CONST_INTER);
4249 %}
4250 
4251 operand immP31()
4252 %{
4253   predicate(!n->as_Type()->type()->isa_oopptr()
4254             && (n->get_ptr() >> 31) == 0);
4255   match(ConP);
4256 
4257   op_cost(5);
4258   format %{ %}
4259   interface(CONST_INTER);
4260 %}
4261 
4262 
4263 // Long Immediate
4264 operand immL()
4265 %{
4266   match(ConL);
4267 
4268   op_cost(20);
4269   format %{ %}
4270   interface(CONST_INTER);
4271 %}
4272 
4273 // Long Immediate 8-bit
4274 operand immL8()
4275 %{
4276   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4277   match(ConL);
4278 
4279   op_cost(5);
4280   format %{ %}
4281   interface(CONST_INTER);
4282 %}
4283 
4284 // Long Immediate 32-bit unsigned
4285 operand immUL32()
4286 %{
4287   predicate(n->get_long() == (unsigned int) (n->get_long()));
4288   match(ConL);
4289 
4290   op_cost(10);
4291   format %{ %}
4292   interface(CONST_INTER);
4293 %}
4294 
4295 // Long Immediate 32-bit signed
4296 operand immL32()
4297 %{
4298   predicate(n->get_long() == (int) (n->get_long()));
4299   match(ConL);
4300 
4301   op_cost(15);
4302   format %{ %}
4303   interface(CONST_INTER);
4304 %}
4305 
4306 // Long Immediate zero
4307 operand immL0()
4308 %{
4309   predicate(n->get_long() == 0L);
4310   match(ConL);
4311 
4312   op_cost(10);
4313   format %{ %}
4314   interface(CONST_INTER);
4315 %}
4316 
4317 // Constant for increment
4318 operand immL1()
4319 %{
4320   predicate(n->get_long() == 1);
4321   match(ConL);
4322 
4323   format %{ %}
4324   interface(CONST_INTER);
4325 %}
4326 
4327 // Constant for decrement
4328 operand immL_M1()
4329 %{
4330   predicate(n->get_long() == -1);
4331   match(ConL);
4332 
4333   format %{ %}
4334   interface(CONST_INTER);
4335 %}
4336 
4337 // Long Immediate: the value 10
4338 operand immL10()
4339 %{
4340   predicate(n->get_long() == 10);
4341   match(ConL);
4342 
4343   format %{ %}
4344   interface(CONST_INTER);
4345 %}
4346 
4347 // Long immediate from 0 to 127.
4348 // Used for a shorter form of long mul by 10.
4349 operand immL_127()
4350 %{
4351   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4352   match(ConL);
4353 
4354   op_cost(10);
4355   format %{ %}
4356   interface(CONST_INTER);
4357 %}
4358 
4359 // Long Immediate: low 32-bit mask
4360 operand immL_32bits()
4361 %{
4362   predicate(n->get_long() == 0xFFFFFFFFL);
4363   match(ConL);
4364   op_cost(20);
4365 
4366   format %{ %}
4367   interface(CONST_INTER);
4368 %}
4369 
4370 // Float Immediate zero
4371 operand immF0()
4372 %{
4373   predicate(jint_cast(n->getf()) == 0);
4374   match(ConF);
4375 
4376   op_cost(5);
4377   format %{ %}
4378   interface(CONST_INTER);
4379 %}
4380 
4381 // Float Immediate
4382 operand immF()
4383 %{
4384   match(ConF);
4385 
4386   op_cost(15);
4387   format %{ %}
4388   interface(CONST_INTER);
4389 %}
4390 
4391 // Double Immediate zero
4392 operand immD0()
4393 %{
4394   predicate(jlong_cast(n->getd()) == 0);
4395   match(ConD);
4396 
4397   op_cost(5);
4398   format %{ %}
4399   interface(CONST_INTER);
4400 %}
4401 
4402 // Double Immediate
4403 operand immD()
4404 %{
4405   match(ConD);
4406 
4407   op_cost(15);
4408   format %{ %}
4409   interface(CONST_INTER);
4410 %}
4411 
4412 // Immediates for special shifts (sign extend)
4413 
4414 // Constants for increment
4415 operand immI_16()
4416 %{
4417   predicate(n->get_int() == 16);
4418   match(ConI);
4419 
4420   format %{ %}
4421   interface(CONST_INTER);
4422 %}
4423 
4424 operand immI_24()
4425 %{
4426   predicate(n->get_int() == 24);
4427   match(ConI);
4428 
4429   format %{ %}
4430   interface(CONST_INTER);
4431 %}
4432 
4433 // Constant for byte-wide masking
4434 operand immI_255()
4435 %{
4436   predicate(n->get_int() == 255);
4437   match(ConI);
4438 
4439   format %{ %}
4440   interface(CONST_INTER);
4441 %}
4442 
4443 // Constant for short-wide masking
4444 operand immI_65535()
4445 %{
4446   predicate(n->get_int() == 65535);
4447   match(ConI);
4448 
4449   format %{ %}
4450   interface(CONST_INTER);
4451 %}
4452 
4453 // Constant for byte-wide masking
4454 operand immL_255()
4455 %{
4456   predicate(n->get_long() == 255);
4457   match(ConL);
4458 
4459   format %{ %}
4460   interface(CONST_INTER);
4461 %}
4462 
4463 // Constant for short-wide masking
4464 operand immL_65535()
4465 %{
4466   predicate(n->get_long() == 65535);
4467   match(ConL);
4468 
4469   format %{ %}
4470   interface(CONST_INTER);
4471 %}
4472 
4473 // Register Operands
4474 // Integer Register
4475 operand rRegI()
4476 %{
4477   constraint(ALLOC_IN_RC(int_reg));
4478   match(RegI);
4479 
4480   match(rax_RegI);
4481   match(rbx_RegI);
4482   match(rcx_RegI);
4483   match(rdx_RegI);
4484   match(rdi_RegI);
4485 
4486   format %{ %}
4487   interface(REG_INTER);
4488 %}
4489 
4490 // Special Registers
4491 operand rax_RegI()
4492 %{
4493   constraint(ALLOC_IN_RC(int_rax_reg));
4494   match(RegI);
4495   match(rRegI);
4496 
4497   format %{ "RAX" %}
4498   interface(REG_INTER);
4499 %}
4500 
4501 // Special Registers
4502 operand rbx_RegI()
4503 %{
4504   constraint(ALLOC_IN_RC(int_rbx_reg));
4505   match(RegI);
4506   match(rRegI);
4507 
4508   format %{ "RBX" %}
4509   interface(REG_INTER);
4510 %}
4511 
4512 operand rcx_RegI()
4513 %{
4514   constraint(ALLOC_IN_RC(int_rcx_reg));
4515   match(RegI);
4516   match(rRegI);
4517 
4518   format %{ "RCX" %}
4519   interface(REG_INTER);
4520 %}
4521 
4522 operand rdx_RegI()
4523 %{
4524   constraint(ALLOC_IN_RC(int_rdx_reg));
4525   match(RegI);
4526   match(rRegI);
4527 
4528   format %{ "RDX" %}
4529   interface(REG_INTER);
4530 %}
4531 
4532 operand rdi_RegI()
4533 %{
4534   constraint(ALLOC_IN_RC(int_rdi_reg));
4535   match(RegI);
4536   match(rRegI);
4537 
4538   format %{ "RDI" %}
4539   interface(REG_INTER);
4540 %}
4541 
4542 operand no_rcx_RegI()
4543 %{
4544   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4545   match(RegI);
4546   match(rax_RegI);
4547   match(rbx_RegI);
4548   match(rdx_RegI);
4549   match(rdi_RegI);
4550 
4551   format %{ %}
4552   interface(REG_INTER);
4553 %}
4554 
4555 operand no_rax_rdx_RegI()
4556 %{
4557   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4558   match(RegI);
4559   match(rbx_RegI);
4560   match(rcx_RegI);
4561   match(rdi_RegI);
4562 
4563   format %{ %}
4564   interface(REG_INTER);
4565 %}
4566 
4567 // Pointer Register
4568 operand any_RegP()
4569 %{
4570   constraint(ALLOC_IN_RC(any_reg));
4571   match(RegP);
4572   match(rax_RegP);
4573   match(rbx_RegP);
4574   match(rdi_RegP);
4575   match(rsi_RegP);
4576   match(rbp_RegP);
4577   match(r15_RegP);
4578   match(rRegP);
4579 
4580   format %{ %}
4581   interface(REG_INTER);
4582 %}
4583 
4584 operand rRegP()
4585 %{
4586   constraint(ALLOC_IN_RC(ptr_reg));
4587   match(RegP);
4588   match(rax_RegP);
4589   match(rbx_RegP);
4590   match(rdi_RegP);
4591   match(rsi_RegP);
4592   match(rbp_RegP);
4593   match(r15_RegP);  // See Q&A below about r15_RegP.
4594 
4595   format %{ %}
4596   interface(REG_INTER);
4597 %}
4598 
4599 operand rRegN() %{
4600   constraint(ALLOC_IN_RC(int_reg));
4601   match(RegN);
4602 
4603   format %{ %}
4604   interface(REG_INTER);
4605 %}
4606 
4607 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4608 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4609 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4610 // The output of an instruction is controlled by the allocator, which respects
4611 // register class masks, not match rules.  Unless an instruction mentions
4612 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4613 // by the allocator as an input.
4614 
4615 operand no_rax_RegP()
4616 %{
4617   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4618   match(RegP);
4619   match(rbx_RegP);
4620   match(rsi_RegP);
4621   match(rdi_RegP);
4622 
4623   format %{ %}
4624   interface(REG_INTER);
4625 %}
4626 
4627 operand no_rbp_RegP()
4628 %{
4629   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4630   match(RegP);
4631   match(rbx_RegP);
4632   match(rsi_RegP);
4633   match(rdi_RegP);
4634 
4635   format %{ %}
4636   interface(REG_INTER);
4637 %}
4638 
4639 operand no_rax_rbx_RegP()
4640 %{
4641   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4642   match(RegP);
4643   match(rsi_RegP);
4644   match(rdi_RegP);
4645 
4646   format %{ %}
4647   interface(REG_INTER);
4648 %}
4649 
4650 // Special Registers
4651 // Return a pointer value
4652 operand rax_RegP()
4653 %{
4654   constraint(ALLOC_IN_RC(ptr_rax_reg));
4655   match(RegP);
4656   match(rRegP);
4657 
4658   format %{ %}
4659   interface(REG_INTER);
4660 %}
4661 
4662 // Special Registers
4663 // Return a compressed pointer value
4664 operand rax_RegN()
4665 %{
4666   constraint(ALLOC_IN_RC(int_rax_reg));
4667   match(RegN);
4668   match(rRegN);
4669 
4670   format %{ %}
4671   interface(REG_INTER);
4672 %}
4673 
4674 // Used in AtomicAdd
4675 operand rbx_RegP()
4676 %{
4677   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4678   match(RegP);
4679   match(rRegP);
4680 
4681   format %{ %}
4682   interface(REG_INTER);
4683 %}
4684 
4685 operand rsi_RegP()
4686 %{
4687   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4688   match(RegP);
4689   match(rRegP);
4690 
4691   format %{ %}
4692   interface(REG_INTER);
4693 %}
4694 
4695 // Used in rep stosq
4696 operand rdi_RegP()
4697 %{
4698   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4699   match(RegP);
4700   match(rRegP);
4701 
4702   format %{ %}
4703   interface(REG_INTER);
4704 %}
4705 
4706 operand rbp_RegP()
4707 %{
4708   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4709   match(RegP);
4710   match(rRegP);
4711 
4712   format %{ %}
4713   interface(REG_INTER);
4714 %}
4715 
4716 operand r15_RegP()
4717 %{
4718   constraint(ALLOC_IN_RC(ptr_r15_reg));
4719   match(RegP);
4720   match(rRegP);
4721 
4722   format %{ %}
4723   interface(REG_INTER);
4724 %}
4725 
4726 operand rRegL()
4727 %{
4728   constraint(ALLOC_IN_RC(long_reg));
4729   match(RegL);
4730   match(rax_RegL);
4731   match(rdx_RegL);
4732 
4733   format %{ %}
4734   interface(REG_INTER);
4735 %}
4736 
4737 // Special Registers
4738 operand no_rax_rdx_RegL()
4739 %{
4740   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4741   match(RegL);
4742   match(rRegL);
4743 
4744   format %{ %}
4745   interface(REG_INTER);
4746 %}
4747 
4748 operand no_rax_RegL()
4749 %{
4750   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4751   match(RegL);
4752   match(rRegL);
4753   match(rdx_RegL);
4754 
4755   format %{ %}
4756   interface(REG_INTER);
4757 %}
4758 
4759 operand no_rcx_RegL()
4760 %{
4761   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4762   match(RegL);
4763   match(rRegL);
4764 
4765   format %{ %}
4766   interface(REG_INTER);
4767 %}
4768 
4769 operand rax_RegL()
4770 %{
4771   constraint(ALLOC_IN_RC(long_rax_reg));
4772   match(RegL);
4773   match(rRegL);
4774 
4775   format %{ "RAX" %}
4776   interface(REG_INTER);
4777 %}
4778 
4779 operand rcx_RegL()
4780 %{
4781   constraint(ALLOC_IN_RC(long_rcx_reg));
4782   match(RegL);
4783   match(rRegL);
4784 
4785   format %{ %}
4786   interface(REG_INTER);
4787 %}
4788 
4789 operand rdx_RegL()
4790 %{
4791   constraint(ALLOC_IN_RC(long_rdx_reg));
4792   match(RegL);
4793   match(rRegL);
4794 
4795   format %{ %}
4796   interface(REG_INTER);
4797 %}
4798 
4799 // Flags register, used as output of compare instructions
4800 operand rFlagsReg()
4801 %{
4802   constraint(ALLOC_IN_RC(int_flags));
4803   match(RegFlags);
4804 
4805   format %{ "RFLAGS" %}
4806   interface(REG_INTER);
4807 %}
4808 
4809 // Flags register, used as output of FLOATING POINT compare instructions
4810 operand rFlagsRegU()
4811 %{
4812   constraint(ALLOC_IN_RC(int_flags));
4813   match(RegFlags);
4814 
4815   format %{ "RFLAGS_U" %}
4816   interface(REG_INTER);
4817 %}
4818 
4819 operand rFlagsRegUCF() %{
4820   constraint(ALLOC_IN_RC(int_flags));
4821   match(RegFlags);
4822   predicate(false);
4823 
4824   format %{ "RFLAGS_U_CF" %}
4825   interface(REG_INTER);
4826 %}
4827 
4828 // Float register operands
4829 operand regF()
4830 %{
4831   constraint(ALLOC_IN_RC(float_reg));
4832   match(RegF);
4833 
4834   format %{ %}
4835   interface(REG_INTER);
4836 %}
4837 
4838 // Double register operands
4839 operand regD() 
4840 %{
4841   constraint(ALLOC_IN_RC(double_reg));
4842   match(RegD);
4843 
4844   format %{ %}
4845   interface(REG_INTER);
4846 %}
4847 
4848 
4849 //----------Memory Operands----------------------------------------------------
4850 // Direct Memory Operand
4851 // operand direct(immP addr)
4852 // %{
4853 //   match(addr);
4854 
4855 //   format %{ "[$addr]" %}
4856 //   interface(MEMORY_INTER) %{
4857 //     base(0xFFFFFFFF);
4858 //     index(0x4);
4859 //     scale(0x0);
4860 //     disp($addr);
4861 //   %}
4862 // %}
4863 
4864 // Indirect Memory Operand
4865 operand indirect(any_RegP reg)
4866 %{
4867   constraint(ALLOC_IN_RC(ptr_reg));
4868   match(reg);
4869 
4870   format %{ "[$reg]" %}
4871   interface(MEMORY_INTER) %{
4872     base($reg);
4873     index(0x4);
4874     scale(0x0);
4875     disp(0x0);
4876   %}
4877 %}
4878 
4879 // Indirect Memory Plus Short Offset Operand
4880 operand indOffset8(any_RegP reg, immL8 off)
4881 %{
4882   constraint(ALLOC_IN_RC(ptr_reg));
4883   match(AddP reg off);
4884 
4885   format %{ "[$reg + $off (8-bit)]" %}
4886   interface(MEMORY_INTER) %{
4887     base($reg);
4888     index(0x4);
4889     scale(0x0);
4890     disp($off);
4891   %}
4892 %}
4893 
4894 // Indirect Memory Plus Long Offset Operand
4895 operand indOffset32(any_RegP reg, immL32 off)
4896 %{
4897   constraint(ALLOC_IN_RC(ptr_reg));
4898   match(AddP reg off);
4899 
4900   format %{ "[$reg + $off (32-bit)]" %}
4901   interface(MEMORY_INTER) %{
4902     base($reg);
4903     index(0x4);
4904     scale(0x0);
4905     disp($off);
4906   %}
4907 %}
4908 
4909 // Indirect Memory Plus Index Register Plus Offset Operand
4910 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4911 %{
4912   constraint(ALLOC_IN_RC(ptr_reg));
4913   match(AddP (AddP reg lreg) off);
4914 
4915   op_cost(10);
4916   format %{"[$reg + $off + $lreg]" %}
4917   interface(MEMORY_INTER) %{
4918     base($reg);
4919     index($lreg);
4920     scale(0x0);
4921     disp($off);
4922   %}
4923 %}
4924 
4925 // Indirect Memory Plus Index Register Plus Offset Operand
4926 operand indIndex(any_RegP reg, rRegL lreg)
4927 %{
4928   constraint(ALLOC_IN_RC(ptr_reg));
4929   match(AddP reg lreg);
4930 
4931   op_cost(10);
4932   format %{"[$reg + $lreg]" %}
4933   interface(MEMORY_INTER) %{
4934     base($reg);
4935     index($lreg);
4936     scale(0x0);
4937     disp(0x0);
4938   %}
4939 %}
4940 
4941 // Indirect Memory Times Scale Plus Index Register
4942 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4943 %{
4944   constraint(ALLOC_IN_RC(ptr_reg));
4945   match(AddP reg (LShiftL lreg scale));
4946 
4947   op_cost(10);
4948   format %{"[$reg + $lreg << $scale]" %}
4949   interface(MEMORY_INTER) %{
4950     base($reg);
4951     index($lreg);
4952     scale($scale);
4953     disp(0x0);
4954   %}
4955 %}
4956 
4957 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4958 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4959 %{
4960   constraint(ALLOC_IN_RC(ptr_reg));
4961   match(AddP (AddP reg (LShiftL lreg scale)) off);
4962 
4963   op_cost(10);
4964   format %{"[$reg + $off + $lreg << $scale]" %}
4965   interface(MEMORY_INTER) %{
4966     base($reg);
4967     index($lreg);
4968     scale($scale);
4969     disp($off);
4970   %}
4971 %}
4972 
4973 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4974 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4975 %{
4976   constraint(ALLOC_IN_RC(ptr_reg));
4977   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4978   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4979 
4980   op_cost(10);
4981   format %{"[$reg + $off + $idx << $scale]" %}
4982   interface(MEMORY_INTER) %{
4983     base($reg);
4984     index($idx);
4985     scale($scale);
4986     disp($off);
4987   %}
4988 %}
4989 
4990 // Indirect Narrow Oop Plus Offset Operand
4991 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4992 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4993 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4994   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
4995   constraint(ALLOC_IN_RC(ptr_reg));
4996   match(AddP (DecodeN reg) off);
4997 
4998   op_cost(10);
4999   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5000   interface(MEMORY_INTER) %{
5001     base(0xc); // R12
5002     index($reg);
5003     scale(0x3);
5004     disp($off);
5005   %}
5006 %}
5007 
5008 // Indirect Memory Operand
5009 operand indirectNarrow(rRegN reg)
5010 %{
5011   predicate(Universe::narrow_oop_shift() == 0);
5012   constraint(ALLOC_IN_RC(ptr_reg));
5013   match(DecodeN reg);
5014 
5015   format %{ "[$reg]" %}
5016   interface(MEMORY_INTER) %{
5017     base($reg);
5018     index(0x4);
5019     scale(0x0);
5020     disp(0x0);
5021   %}
5022 %}
5023 
5024 // Indirect Memory Plus Short Offset Operand
5025 operand indOffset8Narrow(rRegN reg, immL8 off)
5026 %{
5027   predicate(Universe::narrow_oop_shift() == 0);
5028   constraint(ALLOC_IN_RC(ptr_reg));
5029   match(AddP (DecodeN reg) off);
5030 
5031   format %{ "[$reg + $off (8-bit)]" %}
5032   interface(MEMORY_INTER) %{
5033     base($reg);
5034     index(0x4);
5035     scale(0x0);
5036     disp($off);
5037   %}
5038 %}
5039 
5040 // Indirect Memory Plus Long Offset Operand
5041 operand indOffset32Narrow(rRegN reg, immL32 off)
5042 %{
5043   predicate(Universe::narrow_oop_shift() == 0);
5044   constraint(ALLOC_IN_RC(ptr_reg));
5045   match(AddP (DecodeN reg) off);
5046 
5047   format %{ "[$reg + $off (32-bit)]" %}
5048   interface(MEMORY_INTER) %{
5049     base($reg);
5050     index(0x4);
5051     scale(0x0);
5052     disp($off);
5053   %}
5054 %}
5055 
5056 // Indirect Memory Plus Index Register Plus Offset Operand
5057 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5058 %{
5059   predicate(Universe::narrow_oop_shift() == 0);
5060   constraint(ALLOC_IN_RC(ptr_reg));
5061   match(AddP (AddP (DecodeN reg) lreg) off);
5062 
5063   op_cost(10);
5064   format %{"[$reg + $off + $lreg]" %}
5065   interface(MEMORY_INTER) %{
5066     base($reg);
5067     index($lreg);
5068     scale(0x0);
5069     disp($off);
5070   %}
5071 %}
5072 
5073 // Indirect Memory Plus Index Register Plus Offset Operand
5074 operand indIndexNarrow(rRegN reg, rRegL lreg)
5075 %{
5076   predicate(Universe::narrow_oop_shift() == 0);
5077   constraint(ALLOC_IN_RC(ptr_reg));
5078   match(AddP (DecodeN reg) lreg);
5079 
5080   op_cost(10);
5081   format %{"[$reg + $lreg]" %}
5082   interface(MEMORY_INTER) %{
5083     base($reg);
5084     index($lreg);
5085     scale(0x0);
5086     disp(0x0);
5087   %}
5088 %}
5089 
5090 // Indirect Memory Times Scale Plus Index Register
5091 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5092 %{
5093   predicate(Universe::narrow_oop_shift() == 0);
5094   constraint(ALLOC_IN_RC(ptr_reg));
5095   match(AddP (DecodeN reg) (LShiftL lreg scale));
5096 
5097   op_cost(10);
5098   format %{"[$reg + $lreg << $scale]" %}
5099   interface(MEMORY_INTER) %{
5100     base($reg);
5101     index($lreg);
5102     scale($scale);
5103     disp(0x0);
5104   %}
5105 %}
5106 
5107 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5108 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5109 %{
5110   predicate(Universe::narrow_oop_shift() == 0);
5111   constraint(ALLOC_IN_RC(ptr_reg));
5112   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5113 
5114   op_cost(10);
5115   format %{"[$reg + $off + $lreg << $scale]" %}
5116   interface(MEMORY_INTER) %{
5117     base($reg);
5118     index($lreg);
5119     scale($scale);
5120     disp($off);
5121   %}
5122 %}
5123 
5124 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5125 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5126 %{
5127   constraint(ALLOC_IN_RC(ptr_reg));
5128   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5129   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5130 
5131   op_cost(10);
5132   format %{"[$reg + $off + $idx << $scale]" %}
5133   interface(MEMORY_INTER) %{
5134     base($reg);
5135     index($idx);
5136     scale($scale);
5137     disp($off);
5138   %}
5139 %}
5140 
5141 
5142 //----------Special Memory Operands--------------------------------------------
5143 // Stack Slot Operand - This operand is used for loading and storing temporary
5144 //                      values on the stack where a match requires a value to
5145 //                      flow through memory.
5146 operand stackSlotP(sRegP reg)
5147 %{
5148   constraint(ALLOC_IN_RC(stack_slots));
5149   // No match rule because this operand is only generated in matching
5150 
5151   format %{ "[$reg]" %}
5152   interface(MEMORY_INTER) %{
5153     base(0x4);   // RSP
5154     index(0x4);  // No Index
5155     scale(0x0);  // No Scale
5156     disp($reg);  // Stack Offset
5157   %}
5158 %}
5159 
5160 operand stackSlotI(sRegI reg)
5161 %{
5162   constraint(ALLOC_IN_RC(stack_slots));
5163   // No match rule because this operand is only generated in matching
5164 
5165   format %{ "[$reg]" %}
5166   interface(MEMORY_INTER) %{
5167     base(0x4);   // RSP
5168     index(0x4);  // No Index
5169     scale(0x0);  // No Scale
5170     disp($reg);  // Stack Offset
5171   %}
5172 %}
5173 
5174 operand stackSlotF(sRegF reg)
5175 %{
5176   constraint(ALLOC_IN_RC(stack_slots));
5177   // No match rule because this operand is only generated in matching
5178 
5179   format %{ "[$reg]" %}
5180   interface(MEMORY_INTER) %{
5181     base(0x4);   // RSP
5182     index(0x4);  // No Index
5183     scale(0x0);  // No Scale
5184     disp($reg);  // Stack Offset
5185   %}
5186 %}
5187 
5188 operand stackSlotD(sRegD reg)
5189 %{
5190   constraint(ALLOC_IN_RC(stack_slots));
5191   // No match rule because this operand is only generated in matching
5192 
5193   format %{ "[$reg]" %}
5194   interface(MEMORY_INTER) %{
5195     base(0x4);   // RSP
5196     index(0x4);  // No Index
5197     scale(0x0);  // No Scale
5198     disp($reg);  // Stack Offset
5199   %}
5200 %}
5201 operand stackSlotL(sRegL reg)
5202 %{
5203   constraint(ALLOC_IN_RC(stack_slots));
5204   // No match rule because this operand is only generated in matching
5205 
5206   format %{ "[$reg]" %}
5207   interface(MEMORY_INTER) %{
5208     base(0x4);   // RSP
5209     index(0x4);  // No Index
5210     scale(0x0);  // No Scale
5211     disp($reg);  // Stack Offset
5212   %}
5213 %}
5214 
5215 //----------Conditional Branch Operands----------------------------------------
5216 // Comparison Op  - This is the operation of the comparison, and is limited to
5217 //                  the following set of codes:
5218 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5219 //
5220 // Other attributes of the comparison, such as unsignedness, are specified
5221 // by the comparison instruction that sets a condition code flags register.
5222 // That result is represented by a flags operand whose subtype is appropriate
5223 // to the unsignedness (etc.) of the comparison.
5224 //
5225 // Later, the instruction which matches both the Comparison Op (a Bool) and
5226 // the flags (produced by the Cmp) specifies the coding of the comparison op
5227 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5228 
5229 // Comparision Code
5230 operand cmpOp()
5231 %{
5232   match(Bool);
5233 
5234   format %{ "" %}
5235   interface(COND_INTER) %{
5236     equal(0x4, "e");
5237     not_equal(0x5, "ne");
5238     less(0xC, "l");
5239     greater_equal(0xD, "ge");
5240     less_equal(0xE, "le");
5241     greater(0xF, "g");
5242   %}
5243 %}
5244 
5245 // Comparison Code, unsigned compare.  Used by FP also, with
5246 // C2 (unordered) turned into GT or LT already.  The other bits
5247 // C0 and C3 are turned into Carry & Zero flags.
5248 operand cmpOpU()
5249 %{
5250   match(Bool);
5251 
5252   format %{ "" %}
5253   interface(COND_INTER) %{
5254     equal(0x4, "e");
5255     not_equal(0x5, "ne");
5256     less(0x2, "b");
5257     greater_equal(0x3, "nb");
5258     less_equal(0x6, "be");
5259     greater(0x7, "nbe");
5260   %}
5261 %}
5262 
5263 
5264 // Floating comparisons that don't require any fixup for the unordered case
5265 operand cmpOpUCF() %{
5266   match(Bool);
5267   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5268             n->as_Bool()->_test._test == BoolTest::ge ||
5269             n->as_Bool()->_test._test == BoolTest::le ||
5270             n->as_Bool()->_test._test == BoolTest::gt);
5271   format %{ "" %}
5272   interface(COND_INTER) %{
5273     equal(0x4, "e");
5274     not_equal(0x5, "ne");
5275     less(0x2, "b");
5276     greater_equal(0x3, "nb");
5277     less_equal(0x6, "be");
5278     greater(0x7, "nbe");
5279   %}
5280 %}
5281 
5282 
5283 // Floating comparisons that can be fixed up with extra conditional jumps
5284 operand cmpOpUCF2() %{
5285   match(Bool);
5286   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5287             n->as_Bool()->_test._test == BoolTest::eq);
5288   format %{ "" %}
5289   interface(COND_INTER) %{
5290     equal(0x4, "e");
5291     not_equal(0x5, "ne");
5292     less(0x2, "b");
5293     greater_equal(0x3, "nb");
5294     less_equal(0x6, "be");
5295     greater(0x7, "nbe");
5296   %}
5297 %}
5298 
5299 
5300 //----------OPERAND CLASSES----------------------------------------------------
5301 // Operand Classes are groups of operands that are used as to simplify
5302 // instruction definitions by not requiring the AD writer to specify separate
5303 // instructions for every form of operand when the instruction accepts
5304 // multiple operand types with the same basic encoding and format.  The classic
5305 // case of this is memory operands.
5306 
5307 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5308                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5309                indCompressedOopOffset,
5310                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5311                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5312                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5313 
5314 //----------PIPELINE-----------------------------------------------------------
5315 // Rules which define the behavior of the target architectures pipeline.
5316 pipeline %{
5317 
5318 //----------ATTRIBUTES---------------------------------------------------------
5319 attributes %{
5320   variable_size_instructions;        // Fixed size instructions
5321   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5322   instruction_unit_size = 1;         // An instruction is 1 bytes long
5323   instruction_fetch_unit_size = 16;  // The processor fetches one line
5324   instruction_fetch_units = 1;       // of 16 bytes
5325 
5326   // List of nop instructions
5327   nops( MachNop );
5328 %}
5329 
5330 //----------RESOURCES----------------------------------------------------------
5331 // Resources are the functional units available to the machine
5332 
5333 // Generic P2/P3 pipeline
5334 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5335 // 3 instructions decoded per cycle.
5336 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5337 // 3 ALU op, only ALU0 handles mul instructions.
5338 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5339            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5340            BR, FPU,
5341            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5342 
5343 //----------PIPELINE DESCRIPTION-----------------------------------------------
5344 // Pipeline Description specifies the stages in the machine's pipeline
5345 
5346 // Generic P2/P3 pipeline
5347 pipe_desc(S0, S1, S2, S3, S4, S5);
5348 
5349 //----------PIPELINE CLASSES---------------------------------------------------
5350 // Pipeline Classes describe the stages in which input and output are
5351 // referenced by the hardware pipeline.
5352 
5353 // Naming convention: ialu or fpu
5354 // Then: _reg
5355 // Then: _reg if there is a 2nd register
5356 // Then: _long if it's a pair of instructions implementing a long
5357 // Then: _fat if it requires the big decoder
5358 //   Or: _mem if it requires the big decoder and a memory unit.
5359 
5360 // Integer ALU reg operation
5361 pipe_class ialu_reg(rRegI dst)
5362 %{
5363     single_instruction;
5364     dst    : S4(write);
5365     dst    : S3(read);
5366     DECODE : S0;        // any decoder
5367     ALU    : S3;        // any alu
5368 %}
5369 
5370 // Long ALU reg operation
5371 pipe_class ialu_reg_long(rRegL dst)
5372 %{
5373     instruction_count(2);
5374     dst    : S4(write);
5375     dst    : S3(read);
5376     DECODE : S0(2);     // any 2 decoders
5377     ALU    : S3(2);     // both alus
5378 %}
5379 
5380 // Integer ALU reg operation using big decoder
5381 pipe_class ialu_reg_fat(rRegI dst)
5382 %{
5383     single_instruction;
5384     dst    : S4(write);
5385     dst    : S3(read);
5386     D0     : S0;        // big decoder only
5387     ALU    : S3;        // any alu
5388 %}
5389 
5390 // Long ALU reg operation using big decoder
5391 pipe_class ialu_reg_long_fat(rRegL dst)
5392 %{
5393     instruction_count(2);
5394     dst    : S4(write);
5395     dst    : S3(read);
5396     D0     : S0(2);     // big decoder only; twice
5397     ALU    : S3(2);     // any 2 alus
5398 %}
5399 
5400 // Integer ALU reg-reg operation
5401 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5402 %{
5403     single_instruction;
5404     dst    : S4(write);
5405     src    : S3(read);
5406     DECODE : S0;        // any decoder
5407     ALU    : S3;        // any alu
5408 %}
5409 
5410 // Long ALU reg-reg operation
5411 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5412 %{
5413     instruction_count(2);
5414     dst    : S4(write);
5415     src    : S3(read);
5416     DECODE : S0(2);     // any 2 decoders
5417     ALU    : S3(2);     // both alus
5418 %}
5419 
5420 // Integer ALU reg-reg operation
5421 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5422 %{
5423     single_instruction;
5424     dst    : S4(write);
5425     src    : S3(read);
5426     D0     : S0;        // big decoder only
5427     ALU    : S3;        // any alu
5428 %}
5429 
5430 // Long ALU reg-reg operation
5431 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5432 %{
5433     instruction_count(2);
5434     dst    : S4(write);
5435     src    : S3(read);
5436     D0     : S0(2);     // big decoder only; twice
5437     ALU    : S3(2);     // both alus
5438 %}
5439 
5440 // Integer ALU reg-mem operation
5441 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5442 %{
5443     single_instruction;
5444     dst    : S5(write);
5445     mem    : S3(read);
5446     D0     : S0;        // big decoder only
5447     ALU    : S4;        // any alu
5448     MEM    : S3;        // any mem
5449 %}
5450 
5451 // Integer mem operation (prefetch)
5452 pipe_class ialu_mem(memory mem)
5453 %{
5454     single_instruction;
5455     mem    : S3(read);
5456     D0     : S0;        // big decoder only
5457     MEM    : S3;        // any mem
5458 %}
5459 
5460 // Integer Store to Memory
5461 pipe_class ialu_mem_reg(memory mem, rRegI src)
5462 %{
5463     single_instruction;
5464     mem    : S3(read);
5465     src    : S5(read);
5466     D0     : S0;        // big decoder only
5467     ALU    : S4;        // any alu
5468     MEM    : S3;
5469 %}
5470 
5471 // // Long Store to Memory
5472 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5473 // %{
5474 //     instruction_count(2);
5475 //     mem    : S3(read);
5476 //     src    : S5(read);
5477 //     D0     : S0(2);          // big decoder only; twice
5478 //     ALU    : S4(2);     // any 2 alus
5479 //     MEM    : S3(2);  // Both mems
5480 // %}
5481 
5482 // Integer Store to Memory
5483 pipe_class ialu_mem_imm(memory mem)
5484 %{
5485     single_instruction;
5486     mem    : S3(read);
5487     D0     : S0;        // big decoder only
5488     ALU    : S4;        // any alu
5489     MEM    : S3;
5490 %}
5491 
5492 // Integer ALU0 reg-reg operation
5493 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5494 %{
5495     single_instruction;
5496     dst    : S4(write);
5497     src    : S3(read);
5498     D0     : S0;        // Big decoder only
5499     ALU0   : S3;        // only alu0
5500 %}
5501 
5502 // Integer ALU0 reg-mem operation
5503 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5504 %{
5505     single_instruction;
5506     dst    : S5(write);
5507     mem    : S3(read);
5508     D0     : S0;        // big decoder only
5509     ALU0   : S4;        // ALU0 only
5510     MEM    : S3;        // any mem
5511 %}
5512 
5513 // Integer ALU reg-reg operation
5514 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5515 %{
5516     single_instruction;
5517     cr     : S4(write);
5518     src1   : S3(read);
5519     src2   : S3(read);
5520     DECODE : S0;        // any decoder
5521     ALU    : S3;        // any alu
5522 %}
5523 
5524 // Integer ALU reg-imm operation
5525 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5526 %{
5527     single_instruction;
5528     cr     : S4(write);
5529     src1   : S3(read);
5530     DECODE : S0;        // any decoder
5531     ALU    : S3;        // any alu
5532 %}
5533 
5534 // Integer ALU reg-mem operation
5535 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5536 %{
5537     single_instruction;
5538     cr     : S4(write);
5539     src1   : S3(read);
5540     src2   : S3(read);
5541     D0     : S0;        // big decoder only
5542     ALU    : S4;        // any alu
5543     MEM    : S3;
5544 %}
5545 
5546 // Conditional move reg-reg
5547 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5548 %{
5549     instruction_count(4);
5550     y      : S4(read);
5551     q      : S3(read);
5552     p      : S3(read);
5553     DECODE : S0(4);     // any decoder
5554 %}
5555 
5556 // Conditional move reg-reg
5557 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5558 %{
5559     single_instruction;
5560     dst    : S4(write);
5561     src    : S3(read);
5562     cr     : S3(read);
5563     DECODE : S0;        // any decoder
5564 %}
5565 
5566 // Conditional move reg-mem
5567 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5568 %{
5569     single_instruction;
5570     dst    : S4(write);
5571     src    : S3(read);
5572     cr     : S3(read);
5573     DECODE : S0;        // any decoder
5574     MEM    : S3;
5575 %}
5576 
5577 // Conditional move reg-reg long
5578 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5579 %{
5580     single_instruction;
5581     dst    : S4(write);
5582     src    : S3(read);
5583     cr     : S3(read);
5584     DECODE : S0(2);     // any 2 decoders
5585 %}
5586 
5587 // XXX
5588 // // Conditional move double reg-reg
5589 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5590 // %{
5591 //     single_instruction;
5592 //     dst    : S4(write);
5593 //     src    : S3(read);
5594 //     cr     : S3(read);
5595 //     DECODE : S0;     // any decoder
5596 // %}
5597 
5598 // Float reg-reg operation
5599 pipe_class fpu_reg(regD dst)
5600 %{
5601     instruction_count(2);
5602     dst    : S3(read);
5603     DECODE : S0(2);     // any 2 decoders
5604     FPU    : S3;
5605 %}
5606 
5607 // Float reg-reg operation
5608 pipe_class fpu_reg_reg(regD dst, regD src)
5609 %{
5610     instruction_count(2);
5611     dst    : S4(write);
5612     src    : S3(read);
5613     DECODE : S0(2);     // any 2 decoders
5614     FPU    : S3;
5615 %}
5616 
5617 // Float reg-reg operation
5618 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5619 %{
5620     instruction_count(3);
5621     dst    : S4(write);
5622     src1   : S3(read);
5623     src2   : S3(read);
5624     DECODE : S0(3);     // any 3 decoders
5625     FPU    : S3(2);
5626 %}
5627 
5628 // Float reg-reg operation
5629 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5630 %{
5631     instruction_count(4);
5632     dst    : S4(write);
5633     src1   : S3(read);
5634     src2   : S3(read);
5635     src3   : S3(read);
5636     DECODE : S0(4);     // any 3 decoders
5637     FPU    : S3(2);
5638 %}
5639 
5640 // Float reg-reg operation
5641 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5642 %{
5643     instruction_count(4);
5644     dst    : S4(write);
5645     src1   : S3(read);
5646     src2   : S3(read);
5647     src3   : S3(read);
5648     DECODE : S1(3);     // any 3 decoders
5649     D0     : S0;        // Big decoder only
5650     FPU    : S3(2);
5651     MEM    : S3;
5652 %}
5653 
5654 // Float reg-mem operation
5655 pipe_class fpu_reg_mem(regD dst, memory mem)
5656 %{
5657     instruction_count(2);
5658     dst    : S5(write);
5659     mem    : S3(read);
5660     D0     : S0;        // big decoder only
5661     DECODE : S1;        // any decoder for FPU POP
5662     FPU    : S4;
5663     MEM    : S3;        // any mem
5664 %}
5665 
5666 // Float reg-mem operation
5667 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5668 %{
5669     instruction_count(3);
5670     dst    : S5(write);
5671     src1   : S3(read);
5672     mem    : S3(read);
5673     D0     : S0;        // big decoder only
5674     DECODE : S1(2);     // any decoder for FPU POP
5675     FPU    : S4;
5676     MEM    : S3;        // any mem
5677 %}
5678 
5679 // Float mem-reg operation
5680 pipe_class fpu_mem_reg(memory mem, regD src)
5681 %{
5682     instruction_count(2);
5683     src    : S5(read);
5684     mem    : S3(read);
5685     DECODE : S0;        // any decoder for FPU PUSH
5686     D0     : S1;        // big decoder only
5687     FPU    : S4;
5688     MEM    : S3;        // any mem
5689 %}
5690 
5691 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5692 %{
5693     instruction_count(3);
5694     src1   : S3(read);
5695     src2   : S3(read);
5696     mem    : S3(read);
5697     DECODE : S0(2);     // any decoder for FPU PUSH
5698     D0     : S1;        // big decoder only
5699     FPU    : S4;
5700     MEM    : S3;        // any mem
5701 %}
5702 
5703 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5704 %{
5705     instruction_count(3);
5706     src1   : S3(read);
5707     src2   : S3(read);
5708     mem    : S4(read);
5709     DECODE : S0;        // any decoder for FPU PUSH
5710     D0     : S0(2);     // big decoder only
5711     FPU    : S4;
5712     MEM    : S3(2);     // any mem
5713 %}
5714 
5715 pipe_class fpu_mem_mem(memory dst, memory src1)
5716 %{
5717     instruction_count(2);
5718     src1   : S3(read);
5719     dst    : S4(read);
5720     D0     : S0(2);     // big decoder only
5721     MEM    : S3(2);     // any mem
5722 %}
5723 
5724 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5725 %{
5726     instruction_count(3);
5727     src1   : S3(read);
5728     src2   : S3(read);
5729     dst    : S4(read);
5730     D0     : S0(3);     // big decoder only
5731     FPU    : S4;
5732     MEM    : S3(3);     // any mem
5733 %}
5734 
5735 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5736 %{
5737     instruction_count(3);
5738     src1   : S4(read);
5739     mem    : S4(read);
5740     DECODE : S0;        // any decoder for FPU PUSH
5741     D0     : S0(2);     // big decoder only
5742     FPU    : S4;
5743     MEM    : S3(2);     // any mem
5744 %}
5745 
5746 // Float load constant
5747 pipe_class fpu_reg_con(regD dst)
5748 %{
5749     instruction_count(2);
5750     dst    : S5(write);
5751     D0     : S0;        // big decoder only for the load
5752     DECODE : S1;        // any decoder for FPU POP
5753     FPU    : S4;
5754     MEM    : S3;        // any mem
5755 %}
5756 
5757 // Float load constant
5758 pipe_class fpu_reg_reg_con(regD dst, regD src)
5759 %{
5760     instruction_count(3);
5761     dst    : S5(write);
5762     src    : S3(read);
5763     D0     : S0;        // big decoder only for the load
5764     DECODE : S1(2);     // any decoder for FPU POP
5765     FPU    : S4;
5766     MEM    : S3;        // any mem
5767 %}
5768 
5769 // UnConditional branch
5770 pipe_class pipe_jmp(label labl)
5771 %{
5772     single_instruction;
5773     BR   : S3;
5774 %}
5775 
5776 // Conditional branch
5777 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5778 %{
5779     single_instruction;
5780     cr    : S1(read);
5781     BR    : S3;
5782 %}
5783 
5784 // Allocation idiom
5785 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5786 %{
5787     instruction_count(1); force_serialization;
5788     fixed_latency(6);
5789     heap_ptr : S3(read);
5790     DECODE   : S0(3);
5791     D0       : S2;
5792     MEM      : S3;
5793     ALU      : S3(2);
5794     dst      : S5(write);
5795     BR       : S5;
5796 %}
5797 
5798 // Generic big/slow expanded idiom
5799 pipe_class pipe_slow()
5800 %{
5801     instruction_count(10); multiple_bundles; force_serialization;
5802     fixed_latency(100);
5803     D0  : S0(2);
5804     MEM : S3(2);
5805 %}
5806 
5807 // The real do-nothing guy
5808 pipe_class empty()
5809 %{
5810     instruction_count(0);
5811 %}
5812 
5813 // Define the class for the Nop node
5814 define
5815 %{
5816    MachNop = empty;
5817 %}
5818 
5819 %}
5820 
5821 //----------INSTRUCTIONS-------------------------------------------------------
5822 //
5823 // match      -- States which machine-independent subtree may be replaced
5824 //               by this instruction.
5825 // ins_cost   -- The estimated cost of this instruction is used by instruction
5826 //               selection to identify a minimum cost tree of machine
5827 //               instructions that matches a tree of machine-independent
5828 //               instructions.
5829 // format     -- A string providing the disassembly for this instruction.
5830 //               The value of an instruction's operand may be inserted
5831 //               by referring to it with a '$' prefix.
5832 // opcode     -- Three instruction opcodes may be provided.  These are referred
5833 //               to within an encode class as $primary, $secondary, and $tertiary
5834 //               rrspectively.  The primary opcode is commonly used to
5835 //               indicate the type of machine instruction, while secondary
5836 //               and tertiary are often used for prefix options or addressing
5837 //               modes.
5838 // ins_encode -- A list of encode classes with parameters. The encode class
5839 //               name must have been defined in an 'enc_class' specification
5840 //               in the encode section of the architecture description.
5841 
5842 
5843 //----------Load/Store/Move Instructions---------------------------------------
5844 //----------Load Instructions--------------------------------------------------
5845 
5846 // Load Byte (8 bit signed)
5847 instruct loadB(rRegI dst, memory mem)
5848 %{
5849   match(Set dst (LoadB mem));
5850 
5851   ins_cost(125);
5852   format %{ "movsbl  $dst, $mem\t# byte" %}
5853 
5854   ins_encode %{
5855     __ movsbl($dst$$Register, $mem$$Address);
5856   %}
5857 
5858   ins_pipe(ialu_reg_mem);
5859 %}
5860 
5861 // Load Byte (8 bit signed) into Long Register
5862 instruct loadB2L(rRegL dst, memory mem)
5863 %{
5864   match(Set dst (ConvI2L (LoadB mem)));
5865 
5866   ins_cost(125);
5867   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5868 
5869   ins_encode %{
5870     __ movsbq($dst$$Register, $mem$$Address);
5871   %}
5872 
5873   ins_pipe(ialu_reg_mem);
5874 %}
5875 
5876 // Load Unsigned Byte (8 bit UNsigned)
5877 instruct loadUB(rRegI dst, memory mem)
5878 %{
5879   match(Set dst (LoadUB mem));
5880 
5881   ins_cost(125);
5882   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5883 
5884   ins_encode %{
5885     __ movzbl($dst$$Register, $mem$$Address);
5886   %}
5887 
5888   ins_pipe(ialu_reg_mem);
5889 %}
5890 
5891 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5892 instruct loadUB2L(rRegL dst, memory mem)
5893 %{
5894   match(Set dst (ConvI2L (LoadUB mem)));
5895 
5896   ins_cost(125);
5897   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5898 
5899   ins_encode %{
5900     __ movzbq($dst$$Register, $mem$$Address);
5901   %}
5902 
5903   ins_pipe(ialu_reg_mem);
5904 %}
5905 
5906 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5907 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5908   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5909   effect(KILL cr);
5910 
5911   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5912             "andl    $dst, $mask" %}
5913   ins_encode %{
5914     Register Rdst = $dst$$Register;
5915     __ movzbq(Rdst, $mem$$Address);
5916     __ andl(Rdst, $mask$$constant);
5917   %}
5918   ins_pipe(ialu_reg_mem);
5919 %}
5920 
5921 // Load Short (16 bit signed)
5922 instruct loadS(rRegI dst, memory mem)
5923 %{
5924   match(Set dst (LoadS mem));
5925 
5926   ins_cost(125);
5927   format %{ "movswl $dst, $mem\t# short" %}
5928 
5929   ins_encode %{
5930     __ movswl($dst$$Register, $mem$$Address);
5931   %}
5932 
5933   ins_pipe(ialu_reg_mem);
5934 %}
5935 
5936 // Load Short (16 bit signed) to Byte (8 bit signed)
5937 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5938   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5939 
5940   ins_cost(125);
5941   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5942   ins_encode %{
5943     __ movsbl($dst$$Register, $mem$$Address);
5944   %}
5945   ins_pipe(ialu_reg_mem);
5946 %}
5947 
5948 // Load Short (16 bit signed) into Long Register
5949 instruct loadS2L(rRegL dst, memory mem)
5950 %{
5951   match(Set dst (ConvI2L (LoadS mem)));
5952 
5953   ins_cost(125);
5954   format %{ "movswq $dst, $mem\t# short -> long" %}
5955 
5956   ins_encode %{
5957     __ movswq($dst$$Register, $mem$$Address);
5958   %}
5959 
5960   ins_pipe(ialu_reg_mem);
5961 %}
5962 
5963 // Load Unsigned Short/Char (16 bit UNsigned)
5964 instruct loadUS(rRegI dst, memory mem)
5965 %{
5966   match(Set dst (LoadUS mem));
5967 
5968   ins_cost(125);
5969   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5970 
5971   ins_encode %{
5972     __ movzwl($dst$$Register, $mem$$Address);
5973   %}
5974 
5975   ins_pipe(ialu_reg_mem);
5976 %}
5977 
5978 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5979 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5980   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5981 
5982   ins_cost(125);
5983   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5984   ins_encode %{
5985     __ movsbl($dst$$Register, $mem$$Address);
5986   %}
5987   ins_pipe(ialu_reg_mem);
5988 %}
5989 
5990 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5991 instruct loadUS2L(rRegL dst, memory mem)
5992 %{
5993   match(Set dst (ConvI2L (LoadUS mem)));
5994 
5995   ins_cost(125);
5996   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5997 
5998   ins_encode %{
5999     __ movzwq($dst$$Register, $mem$$Address);
6000   %}
6001 
6002   ins_pipe(ialu_reg_mem);
6003 %}
6004 
6005 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6006 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6007   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6008 
6009   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6010   ins_encode %{
6011     __ movzbq($dst$$Register, $mem$$Address);
6012   %}
6013   ins_pipe(ialu_reg_mem);
6014 %}
6015 
6016 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6017 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6018   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6019   effect(KILL cr);
6020 
6021   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6022             "andl    $dst, $mask" %}
6023   ins_encode %{
6024     Register Rdst = $dst$$Register;
6025     __ movzwq(Rdst, $mem$$Address);
6026     __ andl(Rdst, $mask$$constant);
6027   %}
6028   ins_pipe(ialu_reg_mem);
6029 %}
6030 
6031 // Load Integer
6032 instruct loadI(rRegI dst, memory mem)
6033 %{
6034   match(Set dst (LoadI mem));
6035 
6036   ins_cost(125);
6037   format %{ "movl    $dst, $mem\t# int" %}
6038 
6039   ins_encode %{
6040     __ movl($dst$$Register, $mem$$Address);
6041   %}
6042 
6043   ins_pipe(ialu_reg_mem);
6044 %}
6045 
6046 // Load Integer (32 bit signed) to Byte (8 bit signed)
6047 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6048   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6049 
6050   ins_cost(125);
6051   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6052   ins_encode %{
6053     __ movsbl($dst$$Register, $mem$$Address);
6054   %}
6055   ins_pipe(ialu_reg_mem);
6056 %}
6057 
6058 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6059 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6060   match(Set dst (AndI (LoadI mem) mask));
6061 
6062   ins_cost(125);
6063   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6064   ins_encode %{
6065     __ movzbl($dst$$Register, $mem$$Address);
6066   %}
6067   ins_pipe(ialu_reg_mem);
6068 %}
6069 
6070 // Load Integer (32 bit signed) to Short (16 bit signed)
6071 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6072   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6073 
6074   ins_cost(125);
6075   format %{ "movswl  $dst, $mem\t# int -> short" %}
6076   ins_encode %{
6077     __ movswl($dst$$Register, $mem$$Address);
6078   %}
6079   ins_pipe(ialu_reg_mem);
6080 %}
6081 
6082 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6083 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6084   match(Set dst (AndI (LoadI mem) mask));
6085 
6086   ins_cost(125);
6087   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6088   ins_encode %{
6089     __ movzwl($dst$$Register, $mem$$Address);
6090   %}
6091   ins_pipe(ialu_reg_mem);
6092 %}
6093 
6094 // Load Integer into Long Register
6095 instruct loadI2L(rRegL dst, memory mem)
6096 %{
6097   match(Set dst (ConvI2L (LoadI mem)));
6098 
6099   ins_cost(125);
6100   format %{ "movslq  $dst, $mem\t# int -> long" %}
6101 
6102   ins_encode %{
6103     __ movslq($dst$$Register, $mem$$Address);
6104   %}
6105 
6106   ins_pipe(ialu_reg_mem);
6107 %}
6108 
6109 // Load Integer with mask 0xFF into Long Register
6110 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6111   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6112 
6113   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6114   ins_encode %{
6115     __ movzbq($dst$$Register, $mem$$Address);
6116   %}
6117   ins_pipe(ialu_reg_mem);
6118 %}
6119 
6120 // Load Integer with mask 0xFFFF into Long Register
6121 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6122   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6123 
6124   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6125   ins_encode %{
6126     __ movzwq($dst$$Register, $mem$$Address);
6127   %}
6128   ins_pipe(ialu_reg_mem);
6129 %}
6130 
6131 // Load Integer with a 32-bit mask into Long Register
6132 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6133   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6134   effect(KILL cr);
6135 
6136   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6137             "andl    $dst, $mask" %}
6138   ins_encode %{
6139     Register Rdst = $dst$$Register;
6140     __ movl(Rdst, $mem$$Address);
6141     __ andl(Rdst, $mask$$constant);
6142   %}
6143   ins_pipe(ialu_reg_mem);
6144 %}
6145 
6146 // Load Unsigned Integer into Long Register
6147 instruct loadUI2L(rRegL dst, memory mem)
6148 %{
6149   match(Set dst (LoadUI2L mem));
6150 
6151   ins_cost(125);
6152   format %{ "movl    $dst, $mem\t# uint -> long" %}
6153 
6154   ins_encode %{
6155     __ movl($dst$$Register, $mem$$Address);
6156   %}
6157 
6158   ins_pipe(ialu_reg_mem);
6159 %}
6160 
6161 // Load Long
6162 instruct loadL(rRegL dst, memory mem)
6163 %{
6164   match(Set dst (LoadL mem));
6165 
6166   ins_cost(125);
6167   format %{ "movq    $dst, $mem\t# long" %}
6168 
6169   ins_encode %{
6170     __ movq($dst$$Register, $mem$$Address);
6171   %}
6172 
6173   ins_pipe(ialu_reg_mem); // XXX
6174 %}
6175 
6176 // Load Range
6177 instruct loadRange(rRegI dst, memory mem)
6178 %{
6179   match(Set dst (LoadRange mem));
6180 
6181   ins_cost(125); // XXX
6182   format %{ "movl    $dst, $mem\t# range" %}
6183   opcode(0x8B);
6184   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6185   ins_pipe(ialu_reg_mem);
6186 %}
6187 
6188 // Load Pointer
6189 instruct loadP(rRegP dst, memory mem)
6190 %{
6191   match(Set dst (LoadP mem));
6192 
6193   ins_cost(125); // XXX
6194   format %{ "movq    $dst, $mem\t# ptr" %}
6195   opcode(0x8B);
6196   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6197   ins_pipe(ialu_reg_mem); // XXX
6198 %}
6199 
6200 // Load Compressed Pointer
6201 instruct loadN(rRegN dst, memory mem)
6202 %{
6203    match(Set dst (LoadN mem));
6204 
6205    ins_cost(125); // XXX
6206    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6207    ins_encode %{
6208      __ movl($dst$$Register, $mem$$Address);
6209    %}
6210    ins_pipe(ialu_reg_mem); // XXX
6211 %}
6212 
6213 
6214 // Load Klass Pointer
6215 instruct loadKlass(rRegP dst, memory mem)
6216 %{
6217   match(Set dst (LoadKlass mem));
6218 
6219   ins_cost(125); // XXX
6220   format %{ "movq    $dst, $mem\t# class" %}
6221   opcode(0x8B);
6222   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6223   ins_pipe(ialu_reg_mem); // XXX
6224 %}
6225 
6226 // Load narrow Klass Pointer
6227 instruct loadNKlass(rRegN dst, memory mem)
6228 %{
6229   match(Set dst (LoadNKlass mem));
6230 
6231   ins_cost(125); // XXX
6232   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6233   ins_encode %{
6234     __ movl($dst$$Register, $mem$$Address);
6235   %}
6236   ins_pipe(ialu_reg_mem); // XXX
6237 %}
6238 
6239 // Load Float
6240 instruct loadF(regF dst, memory mem)
6241 %{
6242   match(Set dst (LoadF mem));
6243 
6244   ins_cost(145); // XXX
6245   format %{ "movss   $dst, $mem\t# float" %}
6246   opcode(0xF3, 0x0F, 0x10);
6247   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6248   ins_pipe(pipe_slow); // XXX
6249 %}
6250 
6251 // Load Double
6252 instruct loadD_partial(regD dst, memory mem)
6253 %{
6254   predicate(!UseXmmLoadAndClearUpper);
6255   match(Set dst (LoadD mem));
6256 
6257   ins_cost(145); // XXX
6258   format %{ "movlpd  $dst, $mem\t# double" %}
6259   opcode(0x66, 0x0F, 0x12);
6260   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6261   ins_pipe(pipe_slow); // XXX
6262 %}
6263 
6264 instruct loadD(regD dst, memory mem)
6265 %{
6266   predicate(UseXmmLoadAndClearUpper);
6267   match(Set dst (LoadD mem));
6268 
6269   ins_cost(145); // XXX
6270   format %{ "movsd   $dst, $mem\t# double" %}
6271   opcode(0xF2, 0x0F, 0x10);
6272   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6273   ins_pipe(pipe_slow); // XXX
6274 %}
6275 
6276 // Load Aligned Packed Byte to XMM register
6277 instruct loadA8B(regD dst, memory mem) %{
6278   match(Set dst (Load8B mem));
6279   ins_cost(125);
6280   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6281   ins_encode( movq_ld(dst, mem));
6282   ins_pipe( pipe_slow );
6283 %}
6284 
6285 // Load Aligned Packed Short to XMM register
6286 instruct loadA4S(regD dst, memory mem) %{
6287   match(Set dst (Load4S mem));
6288   ins_cost(125);
6289   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6290   ins_encode( movq_ld(dst, mem));
6291   ins_pipe( pipe_slow );
6292 %}
6293 
6294 // Load Aligned Packed Char to XMM register
6295 instruct loadA4C(regD dst, memory mem) %{
6296   match(Set dst (Load4C mem));
6297   ins_cost(125);
6298   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6299   ins_encode( movq_ld(dst, mem));
6300   ins_pipe( pipe_slow );
6301 %}
6302 
6303 // Load Aligned Packed Integer to XMM register
6304 instruct load2IU(regD dst, memory mem) %{
6305   match(Set dst (Load2I mem));
6306   ins_cost(125);
6307   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6308   ins_encode( movq_ld(dst, mem));
6309   ins_pipe( pipe_slow );
6310 %}
6311 
6312 // Load Aligned Packed Single to XMM
6313 instruct loadA2F(regD dst, memory mem) %{
6314   match(Set dst (Load2F mem));
6315   ins_cost(145);
6316   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6317   ins_encode( movq_ld(dst, mem));
6318   ins_pipe( pipe_slow );
6319 %}
6320 
6321 // Load Effective Address
6322 instruct leaP8(rRegP dst, indOffset8 mem)
6323 %{
6324   match(Set dst mem);
6325 
6326   ins_cost(110); // XXX
6327   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6328   opcode(0x8D);
6329   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6330   ins_pipe(ialu_reg_reg_fat);
6331 %}
6332 
6333 instruct leaP32(rRegP dst, indOffset32 mem)
6334 %{
6335   match(Set dst mem);
6336 
6337   ins_cost(110);
6338   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6339   opcode(0x8D);
6340   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6341   ins_pipe(ialu_reg_reg_fat);
6342 %}
6343 
6344 // instruct leaPIdx(rRegP dst, indIndex mem)
6345 // %{
6346 //   match(Set dst mem);
6347 
6348 //   ins_cost(110);
6349 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6350 //   opcode(0x8D);
6351 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6352 //   ins_pipe(ialu_reg_reg_fat);
6353 // %}
6354 
6355 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6356 %{
6357   match(Set dst mem);
6358 
6359   ins_cost(110);
6360   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6361   opcode(0x8D);
6362   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6363   ins_pipe(ialu_reg_reg_fat);
6364 %}
6365 
6366 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6367 %{
6368   match(Set dst mem);
6369 
6370   ins_cost(110);
6371   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6372   opcode(0x8D);
6373   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6374   ins_pipe(ialu_reg_reg_fat);
6375 %}
6376 
6377 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6378 %{
6379   match(Set dst mem);
6380 
6381   ins_cost(110);
6382   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6383   opcode(0x8D);
6384   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6385   ins_pipe(ialu_reg_reg_fat);
6386 %}
6387 
6388 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6389 %{
6390   match(Set dst mem);
6391 
6392   ins_cost(110);
6393   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6394   opcode(0x8D);
6395   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6396   ins_pipe(ialu_reg_reg_fat);
6397 %}
6398 
6399 // Load Effective Address which uses Narrow (32-bits) oop
6400 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6401 %{
6402   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6403   match(Set dst mem);
6404 
6405   ins_cost(110);
6406   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6407   opcode(0x8D);
6408   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6409   ins_pipe(ialu_reg_reg_fat);
6410 %}
6411 
6412 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6413 %{
6414   predicate(Universe::narrow_oop_shift() == 0);
6415   match(Set dst mem);
6416 
6417   ins_cost(110); // XXX
6418   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6419   opcode(0x8D);
6420   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6421   ins_pipe(ialu_reg_reg_fat);
6422 %}
6423 
6424 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6425 %{
6426   predicate(Universe::narrow_oop_shift() == 0);
6427   match(Set dst mem);
6428 
6429   ins_cost(110);
6430   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6431   opcode(0x8D);
6432   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6433   ins_pipe(ialu_reg_reg_fat);
6434 %}
6435 
6436 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6437 %{
6438   predicate(Universe::narrow_oop_shift() == 0);
6439   match(Set dst mem);
6440 
6441   ins_cost(110);
6442   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6443   opcode(0x8D);
6444   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6445   ins_pipe(ialu_reg_reg_fat);
6446 %}
6447 
6448 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6449 %{
6450   predicate(Universe::narrow_oop_shift() == 0);
6451   match(Set dst mem);
6452 
6453   ins_cost(110);
6454   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6455   opcode(0x8D);
6456   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6457   ins_pipe(ialu_reg_reg_fat);
6458 %}
6459 
6460 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6461 %{
6462   predicate(Universe::narrow_oop_shift() == 0);
6463   match(Set dst mem);
6464 
6465   ins_cost(110);
6466   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6467   opcode(0x8D);
6468   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6469   ins_pipe(ialu_reg_reg_fat);
6470 %}
6471 
6472 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6473 %{
6474   predicate(Universe::narrow_oop_shift() == 0);
6475   match(Set dst mem);
6476 
6477   ins_cost(110);
6478   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6479   opcode(0x8D);
6480   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6481   ins_pipe(ialu_reg_reg_fat);
6482 %}
6483 
6484 instruct loadConI(rRegI dst, immI src)
6485 %{
6486   match(Set dst src);
6487 
6488   format %{ "movl    $dst, $src\t# int" %}
6489   ins_encode(load_immI(dst, src));
6490   ins_pipe(ialu_reg_fat); // XXX
6491 %}
6492 
6493 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6494 %{
6495   match(Set dst src);
6496   effect(KILL cr);
6497 
6498   ins_cost(50);
6499   format %{ "xorl    $dst, $dst\t# int" %}
6500   opcode(0x33); /* + rd */
6501   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6502   ins_pipe(ialu_reg);
6503 %}
6504 
6505 instruct loadConL(rRegL dst, immL src)
6506 %{
6507   match(Set dst src);
6508 
6509   ins_cost(150);
6510   format %{ "movq    $dst, $src\t# long" %}
6511   ins_encode(load_immL(dst, src));
6512   ins_pipe(ialu_reg);
6513 %}
6514 
6515 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6516 %{
6517   match(Set dst src);
6518   effect(KILL cr);
6519 
6520   ins_cost(50);
6521   format %{ "xorl    $dst, $dst\t# long" %}
6522   opcode(0x33); /* + rd */
6523   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6524   ins_pipe(ialu_reg); // XXX
6525 %}
6526 
6527 instruct loadConUL32(rRegL dst, immUL32 src)
6528 %{
6529   match(Set dst src);
6530 
6531   ins_cost(60);
6532   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6533   ins_encode(load_immUL32(dst, src));
6534   ins_pipe(ialu_reg);
6535 %}
6536 
6537 instruct loadConL32(rRegL dst, immL32 src)
6538 %{
6539   match(Set dst src);
6540 
6541   ins_cost(70);
6542   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6543   ins_encode(load_immL32(dst, src));
6544   ins_pipe(ialu_reg);
6545 %}
6546 
6547 instruct loadConP(rRegP dst, immP con) %{
6548   match(Set dst con);
6549 
6550   format %{ "movq    $dst, $con\t# ptr" %}
6551   ins_encode(load_immP(dst, con));
6552   ins_pipe(ialu_reg_fat); // XXX
6553 %}
6554 
6555 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6556 %{
6557   match(Set dst src);
6558   effect(KILL cr);
6559 
6560   ins_cost(50);
6561   format %{ "xorl    $dst, $dst\t# ptr" %}
6562   opcode(0x33); /* + rd */
6563   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6564   ins_pipe(ialu_reg);
6565 %}
6566 
6567 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6568 %{
6569   match(Set dst src);
6570   effect(KILL cr);
6571 
6572   ins_cost(60);
6573   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6574   ins_encode(load_immP31(dst, src));
6575   ins_pipe(ialu_reg);
6576 %}
6577 
6578 instruct loadConF(regF dst, immF con) %{
6579   match(Set dst con);
6580   ins_cost(125);
6581   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
6582   ins_encode %{
6583     __ movflt($dst$$XMMRegister, $constantaddress($con));
6584   %}
6585   ins_pipe(pipe_slow);
6586 %}
6587 
6588 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6589   match(Set dst src);
6590   effect(KILL cr);
6591   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6592   ins_encode %{
6593     __ xorq($dst$$Register, $dst$$Register);
6594   %}
6595   ins_pipe(ialu_reg);
6596 %}
6597 
6598 instruct loadConN(rRegN dst, immN src) %{
6599   match(Set dst src);
6600 
6601   ins_cost(125);
6602   format %{ "movl    $dst, $src\t# compressed ptr" %}
6603   ins_encode %{
6604     address con = (address)$src$$constant;
6605     if (con == NULL) {
6606       ShouldNotReachHere();
6607     } else {
6608       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6609     }
6610   %}
6611   ins_pipe(ialu_reg_fat); // XXX
6612 %}
6613 
6614 instruct loadConF0(regF dst, immF0 src)
6615 %{
6616   match(Set dst src);
6617   ins_cost(100);
6618 
6619   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6620   opcode(0x0F, 0x57);
6621   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6622   ins_pipe(pipe_slow);
6623 %}
6624 
6625 // Use the same format since predicate() can not be used here.
6626 instruct loadConD(regD dst, immD con) %{
6627   match(Set dst con);
6628   ins_cost(125);
6629   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6630   ins_encode %{
6631     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6632   %}
6633   ins_pipe(pipe_slow);
6634 %}
6635 
6636 instruct loadConD0(regD dst, immD0 src)
6637 %{
6638   match(Set dst src);
6639   ins_cost(100);
6640 
6641   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6642   opcode(0x66, 0x0F, 0x57);
6643   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6644   ins_pipe(pipe_slow);
6645 %}
6646 
6647 instruct loadSSI(rRegI dst, stackSlotI src)
6648 %{
6649   match(Set dst src);
6650 
6651   ins_cost(125);
6652   format %{ "movl    $dst, $src\t# int stk" %}
6653   opcode(0x8B);
6654   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6655   ins_pipe(ialu_reg_mem);
6656 %}
6657 
6658 instruct loadSSL(rRegL dst, stackSlotL src)
6659 %{
6660   match(Set dst src);
6661 
6662   ins_cost(125);
6663   format %{ "movq    $dst, $src\t# long stk" %}
6664   opcode(0x8B);
6665   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6666   ins_pipe(ialu_reg_mem);
6667 %}
6668 
6669 instruct loadSSP(rRegP dst, stackSlotP src)
6670 %{
6671   match(Set dst src);
6672 
6673   ins_cost(125);
6674   format %{ "movq    $dst, $src\t# ptr stk" %}
6675   opcode(0x8B);
6676   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6677   ins_pipe(ialu_reg_mem);
6678 %}
6679 
6680 instruct loadSSF(regF dst, stackSlotF src)
6681 %{
6682   match(Set dst src);
6683 
6684   ins_cost(125);
6685   format %{ "movss   $dst, $src\t# float stk" %}
6686   opcode(0xF3, 0x0F, 0x10);
6687   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6688   ins_pipe(pipe_slow); // XXX
6689 %}
6690 
6691 // Use the same format since predicate() can not be used here.
6692 instruct loadSSD(regD dst, stackSlotD src)
6693 %{
6694   match(Set dst src);
6695 
6696   ins_cost(125);
6697   format %{ "movsd   $dst, $src\t# double stk" %}
6698   ins_encode  %{
6699     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6700   %}
6701   ins_pipe(pipe_slow); // XXX
6702 %}
6703 
6704 // Prefetch instructions.
6705 // Must be safe to execute with invalid address (cannot fault).
6706 
6707 instruct prefetchr( memory mem ) %{
6708   predicate(ReadPrefetchInstr==3);
6709   match(PrefetchRead mem);
6710   ins_cost(125);
6711 
6712   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6713   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6714   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6715   ins_pipe(ialu_mem);
6716 %}
6717 
6718 instruct prefetchrNTA( memory mem ) %{
6719   predicate(ReadPrefetchInstr==0);
6720   match(PrefetchRead mem);
6721   ins_cost(125);
6722 
6723   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6724   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6725   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6726   ins_pipe(ialu_mem);
6727 %}
6728 
6729 instruct prefetchrT0( memory mem ) %{
6730   predicate(ReadPrefetchInstr==1);
6731   match(PrefetchRead mem);
6732   ins_cost(125);
6733 
6734   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6735   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6736   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6737   ins_pipe(ialu_mem);
6738 %}
6739 
6740 instruct prefetchrT2( memory mem ) %{
6741   predicate(ReadPrefetchInstr==2);
6742   match(PrefetchRead mem);
6743   ins_cost(125);
6744 
6745   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6746   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6747   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6748   ins_pipe(ialu_mem);
6749 %}
6750 
6751 instruct prefetchw( memory mem ) %{
6752   predicate(AllocatePrefetchInstr==3);
6753   match(PrefetchWrite mem);
6754   ins_cost(125);
6755 
6756   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6757   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6758   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6759   ins_pipe(ialu_mem);
6760 %}
6761 
6762 instruct prefetchwNTA( memory mem ) %{
6763   predicate(AllocatePrefetchInstr==0);
6764   match(PrefetchWrite mem);
6765   ins_cost(125);
6766 
6767   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6768   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6769   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6770   ins_pipe(ialu_mem);
6771 %}
6772 
6773 instruct prefetchwT0( memory mem ) %{
6774   predicate(AllocatePrefetchInstr==1);
6775   match(PrefetchWrite mem);
6776   ins_cost(125);
6777 
6778   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6779   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6780   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6781   ins_pipe(ialu_mem);
6782 %}
6783 
6784 instruct prefetchwT2( memory mem ) %{
6785   predicate(AllocatePrefetchInstr==2);
6786   match(PrefetchWrite mem);
6787   ins_cost(125);
6788 
6789   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6790   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6791   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6792   ins_pipe(ialu_mem);
6793 %}
6794 
6795 //----------Store Instructions-------------------------------------------------
6796 
6797 // Store Byte
6798 instruct storeB(memory mem, rRegI src)
6799 %{
6800   match(Set mem (StoreB mem src));
6801 
6802   ins_cost(125); // XXX
6803   format %{ "movb    $mem, $src\t# byte" %}
6804   opcode(0x88);
6805   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6806   ins_pipe(ialu_mem_reg);
6807 %}
6808 
6809 // Store Char/Short
6810 instruct storeC(memory mem, rRegI src)
6811 %{
6812   match(Set mem (StoreC mem src));
6813 
6814   ins_cost(125); // XXX
6815   format %{ "movw    $mem, $src\t# char/short" %}
6816   opcode(0x89);
6817   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6818   ins_pipe(ialu_mem_reg);
6819 %}
6820 
6821 // Store Integer
6822 instruct storeI(memory mem, rRegI src)
6823 %{
6824   match(Set mem (StoreI mem src));
6825 
6826   ins_cost(125); // XXX
6827   format %{ "movl    $mem, $src\t# int" %}
6828   opcode(0x89);
6829   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6830   ins_pipe(ialu_mem_reg);
6831 %}
6832 
6833 // Store Long
6834 instruct storeL(memory mem, rRegL src)
6835 %{
6836   match(Set mem (StoreL mem src));
6837 
6838   ins_cost(125); // XXX
6839   format %{ "movq    $mem, $src\t# long" %}
6840   opcode(0x89);
6841   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6842   ins_pipe(ialu_mem_reg); // XXX
6843 %}
6844 
6845 // Store Pointer
6846 instruct storeP(memory mem, any_RegP src)
6847 %{
6848   match(Set mem (StoreP mem src));
6849 
6850   ins_cost(125); // XXX
6851   format %{ "movq    $mem, $src\t# ptr" %}
6852   opcode(0x89);
6853   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6854   ins_pipe(ialu_mem_reg);
6855 %}
6856 
6857 instruct storeImmP0(memory mem, immP0 zero)
6858 %{
6859   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6860   match(Set mem (StoreP mem zero));
6861 
6862   ins_cost(125); // XXX
6863   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6864   ins_encode %{
6865     __ movq($mem$$Address, r12);
6866   %}
6867   ins_pipe(ialu_mem_reg);
6868 %}
6869 
6870 // Store NULL Pointer, mark word, or other simple pointer constant.
6871 instruct storeImmP(memory mem, immP31 src)
6872 %{
6873   match(Set mem (StoreP mem src));
6874 
6875   ins_cost(150); // XXX
6876   format %{ "movq    $mem, $src\t# ptr" %}
6877   opcode(0xC7); /* C7 /0 */
6878   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6879   ins_pipe(ialu_mem_imm);
6880 %}
6881 
6882 // Store Compressed Pointer
6883 instruct storeN(memory mem, rRegN src)
6884 %{
6885   match(Set mem (StoreN mem src));
6886 
6887   ins_cost(125); // XXX
6888   format %{ "movl    $mem, $src\t# compressed ptr" %}
6889   ins_encode %{
6890     __ movl($mem$$Address, $src$$Register);
6891   %}
6892   ins_pipe(ialu_mem_reg);
6893 %}
6894 
6895 instruct storeImmN0(memory mem, immN0 zero)
6896 %{
6897   predicate(Universe::narrow_oop_base() == NULL);
6898   match(Set mem (StoreN mem zero));
6899 
6900   ins_cost(125); // XXX
6901   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6902   ins_encode %{
6903     __ movl($mem$$Address, r12);
6904   %}
6905   ins_pipe(ialu_mem_reg);
6906 %}
6907 
6908 instruct storeImmN(memory mem, immN src)
6909 %{
6910   match(Set mem (StoreN mem src));
6911 
6912   ins_cost(150); // XXX
6913   format %{ "movl    $mem, $src\t# compressed ptr" %}
6914   ins_encode %{
6915     address con = (address)$src$$constant;
6916     if (con == NULL) {
6917       __ movl($mem$$Address, (int32_t)0);
6918     } else {
6919       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6920     }
6921   %}
6922   ins_pipe(ialu_mem_imm);
6923 %}
6924 
6925 // Store Integer Immediate
6926 instruct storeImmI0(memory mem, immI0 zero)
6927 %{
6928   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6929   match(Set mem (StoreI mem zero));
6930 
6931   ins_cost(125); // XXX
6932   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6933   ins_encode %{
6934     __ movl($mem$$Address, r12);
6935   %}
6936   ins_pipe(ialu_mem_reg);
6937 %}
6938 
6939 instruct storeImmI(memory mem, immI src)
6940 %{
6941   match(Set mem (StoreI mem src));
6942 
6943   ins_cost(150);
6944   format %{ "movl    $mem, $src\t# int" %}
6945   opcode(0xC7); /* C7 /0 */
6946   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6947   ins_pipe(ialu_mem_imm);
6948 %}
6949 
6950 // Store Long Immediate
6951 instruct storeImmL0(memory mem, immL0 zero)
6952 %{
6953   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6954   match(Set mem (StoreL mem zero));
6955 
6956   ins_cost(125); // XXX
6957   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6958   ins_encode %{
6959     __ movq($mem$$Address, r12);
6960   %}
6961   ins_pipe(ialu_mem_reg);
6962 %}
6963 
6964 instruct storeImmL(memory mem, immL32 src)
6965 %{
6966   match(Set mem (StoreL mem src));
6967 
6968   ins_cost(150);
6969   format %{ "movq    $mem, $src\t# long" %}
6970   opcode(0xC7); /* C7 /0 */
6971   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6972   ins_pipe(ialu_mem_imm);
6973 %}
6974 
6975 // Store Short/Char Immediate
6976 instruct storeImmC0(memory mem, immI0 zero)
6977 %{
6978   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6979   match(Set mem (StoreC mem zero));
6980 
6981   ins_cost(125); // XXX
6982   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6983   ins_encode %{
6984     __ movw($mem$$Address, r12);
6985   %}
6986   ins_pipe(ialu_mem_reg);
6987 %}
6988 
6989 instruct storeImmI16(memory mem, immI16 src)
6990 %{
6991   predicate(UseStoreImmI16);
6992   match(Set mem (StoreC mem src));
6993 
6994   ins_cost(150);
6995   format %{ "movw    $mem, $src\t# short/char" %}
6996   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6997   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6998   ins_pipe(ialu_mem_imm);
6999 %}
7000 
7001 // Store Byte Immediate
7002 instruct storeImmB0(memory mem, immI0 zero)
7003 %{
7004   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7005   match(Set mem (StoreB mem zero));
7006 
7007   ins_cost(125); // XXX
7008   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7009   ins_encode %{
7010     __ movb($mem$$Address, r12);
7011   %}
7012   ins_pipe(ialu_mem_reg);
7013 %}
7014 
7015 instruct storeImmB(memory mem, immI8 src)
7016 %{
7017   match(Set mem (StoreB mem src));
7018 
7019   ins_cost(150); // XXX
7020   format %{ "movb    $mem, $src\t# byte" %}
7021   opcode(0xC6); /* C6 /0 */
7022   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7023   ins_pipe(ialu_mem_imm);
7024 %}
7025 
7026 // Store Aligned Packed Byte XMM register to memory
7027 instruct storeA8B(memory mem, regD src) %{
7028   match(Set mem (Store8B mem src));
7029   ins_cost(145);
7030   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7031   ins_encode( movq_st(mem, src));
7032   ins_pipe( pipe_slow );
7033 %}
7034 
7035 // Store Aligned Packed Char/Short XMM register to memory
7036 instruct storeA4C(memory mem, regD src) %{
7037   match(Set mem (Store4C mem src));
7038   ins_cost(145);
7039   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7040   ins_encode( movq_st(mem, src));
7041   ins_pipe( pipe_slow );
7042 %}
7043 
7044 // Store Aligned Packed Integer XMM register to memory
7045 instruct storeA2I(memory mem, regD src) %{
7046   match(Set mem (Store2I mem src));
7047   ins_cost(145);
7048   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7049   ins_encode( movq_st(mem, src));
7050   ins_pipe( pipe_slow );
7051 %}
7052 
7053 // Store CMS card-mark Immediate
7054 instruct storeImmCM0_reg(memory mem, immI0 zero)
7055 %{
7056   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7057   match(Set mem (StoreCM mem zero));
7058 
7059   ins_cost(125); // XXX
7060   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7061   ins_encode %{
7062     __ movb($mem$$Address, r12);
7063   %}
7064   ins_pipe(ialu_mem_reg);
7065 %}
7066 
7067 instruct storeImmCM0(memory mem, immI0 src)
7068 %{
7069   match(Set mem (StoreCM mem src));
7070 
7071   ins_cost(150); // XXX
7072   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7073   opcode(0xC6); /* C6 /0 */
7074   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7075   ins_pipe(ialu_mem_imm);
7076 %}
7077 
7078 // Store Aligned Packed Single Float XMM register to memory
7079 instruct storeA2F(memory mem, regD src) %{
7080   match(Set mem (Store2F mem src));
7081   ins_cost(145);
7082   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7083   ins_encode( movq_st(mem, src));
7084   ins_pipe( pipe_slow );
7085 %}
7086 
7087 // Store Float
7088 instruct storeF(memory mem, regF src)
7089 %{
7090   match(Set mem (StoreF mem src));
7091 
7092   ins_cost(95); // XXX
7093   format %{ "movss   $mem, $src\t# float" %}
7094   opcode(0xF3, 0x0F, 0x11);
7095   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7096   ins_pipe(pipe_slow); // XXX
7097 %}
7098 
7099 // Store immediate Float value (it is faster than store from XMM register)
7100 instruct storeF0(memory mem, immF0 zero)
7101 %{
7102   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7103   match(Set mem (StoreF mem zero));
7104 
7105   ins_cost(25); // XXX
7106   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7107   ins_encode %{
7108     __ movl($mem$$Address, r12);
7109   %}
7110   ins_pipe(ialu_mem_reg);
7111 %}
7112 
7113 instruct storeF_imm(memory mem, immF src)
7114 %{
7115   match(Set mem (StoreF mem src));
7116 
7117   ins_cost(50);
7118   format %{ "movl    $mem, $src\t# float" %}
7119   opcode(0xC7); /* C7 /0 */
7120   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7121   ins_pipe(ialu_mem_imm);
7122 %}
7123 
7124 // Store Double
7125 instruct storeD(memory mem, regD src)
7126 %{
7127   match(Set mem (StoreD mem src));
7128 
7129   ins_cost(95); // XXX
7130   format %{ "movsd   $mem, $src\t# double" %}
7131   opcode(0xF2, 0x0F, 0x11);
7132   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7133   ins_pipe(pipe_slow); // XXX
7134 %}
7135 
7136 // Store immediate double 0.0 (it is faster than store from XMM register)
7137 instruct storeD0_imm(memory mem, immD0 src)
7138 %{
7139   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7140   match(Set mem (StoreD mem src));
7141 
7142   ins_cost(50);
7143   format %{ "movq    $mem, $src\t# double 0." %}
7144   opcode(0xC7); /* C7 /0 */
7145   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7146   ins_pipe(ialu_mem_imm);
7147 %}
7148 
7149 instruct storeD0(memory mem, immD0 zero)
7150 %{
7151   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7152   match(Set mem (StoreD mem zero));
7153 
7154   ins_cost(25); // XXX
7155   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7156   ins_encode %{
7157     __ movq($mem$$Address, r12);
7158   %}
7159   ins_pipe(ialu_mem_reg);
7160 %}
7161 
7162 instruct storeSSI(stackSlotI dst, rRegI src)
7163 %{
7164   match(Set dst src);
7165 
7166   ins_cost(100);
7167   format %{ "movl    $dst, $src\t# int stk" %}
7168   opcode(0x89);
7169   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7170   ins_pipe( ialu_mem_reg );
7171 %}
7172 
7173 instruct storeSSL(stackSlotL dst, rRegL src)
7174 %{
7175   match(Set dst src);
7176 
7177   ins_cost(100);
7178   format %{ "movq    $dst, $src\t# long stk" %}
7179   opcode(0x89);
7180   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7181   ins_pipe(ialu_mem_reg);
7182 %}
7183 
7184 instruct storeSSP(stackSlotP dst, rRegP src)
7185 %{
7186   match(Set dst src);
7187 
7188   ins_cost(100);
7189   format %{ "movq    $dst, $src\t# ptr stk" %}
7190   opcode(0x89);
7191   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7192   ins_pipe(ialu_mem_reg);
7193 %}
7194 
7195 instruct storeSSF(stackSlotF dst, regF src)
7196 %{
7197   match(Set dst src);
7198 
7199   ins_cost(95); // XXX
7200   format %{ "movss   $dst, $src\t# float stk" %}
7201   opcode(0xF3, 0x0F, 0x11);
7202   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7203   ins_pipe(pipe_slow); // XXX
7204 %}
7205 
7206 instruct storeSSD(stackSlotD dst, regD src)
7207 %{
7208   match(Set dst src);
7209 
7210   ins_cost(95); // XXX
7211   format %{ "movsd   $dst, $src\t# double stk" %}
7212   opcode(0xF2, 0x0F, 0x11);
7213   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7214   ins_pipe(pipe_slow); // XXX
7215 %}
7216 
7217 //----------BSWAP Instructions-------------------------------------------------
7218 instruct bytes_reverse_int(rRegI dst) %{
7219   match(Set dst (ReverseBytesI dst));
7220 
7221   format %{ "bswapl  $dst" %}
7222   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7223   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7224   ins_pipe( ialu_reg );
7225 %}
7226 
7227 instruct bytes_reverse_long(rRegL dst) %{
7228   match(Set dst (ReverseBytesL dst));
7229 
7230   format %{ "bswapq  $dst" %}
7231 
7232   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7233   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7234   ins_pipe( ialu_reg);
7235 %}
7236 
7237 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7238   match(Set dst (ReverseBytesUS dst));
7239 
7240   format %{ "bswapl  $dst\n\t" 
7241             "shrl    $dst,16\n\t" %}
7242   ins_encode %{
7243     __ bswapl($dst$$Register);
7244     __ shrl($dst$$Register, 16); 
7245   %}
7246   ins_pipe( ialu_reg );
7247 %}
7248 
7249 instruct bytes_reverse_short(rRegI dst) %{
7250   match(Set dst (ReverseBytesS dst));
7251 
7252   format %{ "bswapl  $dst\n\t" 
7253             "sar     $dst,16\n\t" %}
7254   ins_encode %{
7255     __ bswapl($dst$$Register);
7256     __ sarl($dst$$Register, 16); 
7257   %}
7258   ins_pipe( ialu_reg );
7259 %}
7260 
7261 //---------- Zeros Count Instructions ------------------------------------------
7262 
7263 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7264   predicate(UseCountLeadingZerosInstruction);
7265   match(Set dst (CountLeadingZerosI src));
7266   effect(KILL cr);
7267 
7268   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7269   ins_encode %{
7270     __ lzcntl($dst$$Register, $src$$Register);
7271   %}
7272   ins_pipe(ialu_reg);
7273 %}
7274 
7275 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7276   predicate(!UseCountLeadingZerosInstruction);
7277   match(Set dst (CountLeadingZerosI src));
7278   effect(KILL cr);
7279 
7280   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7281             "jnz     skip\n\t"
7282             "movl    $dst, -1\n"
7283       "skip:\n\t"
7284             "negl    $dst\n\t"
7285             "addl    $dst, 31" %}
7286   ins_encode %{
7287     Register Rdst = $dst$$Register;
7288     Register Rsrc = $src$$Register;
7289     Label skip;
7290     __ bsrl(Rdst, Rsrc);
7291     __ jccb(Assembler::notZero, skip);
7292     __ movl(Rdst, -1);
7293     __ bind(skip);
7294     __ negl(Rdst);
7295     __ addl(Rdst, BitsPerInt - 1);
7296   %}
7297   ins_pipe(ialu_reg);
7298 %}
7299 
7300 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7301   predicate(UseCountLeadingZerosInstruction);
7302   match(Set dst (CountLeadingZerosL src));
7303   effect(KILL cr);
7304 
7305   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7306   ins_encode %{
7307     __ lzcntq($dst$$Register, $src$$Register);
7308   %}
7309   ins_pipe(ialu_reg);
7310 %}
7311 
7312 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7313   predicate(!UseCountLeadingZerosInstruction);
7314   match(Set dst (CountLeadingZerosL src));
7315   effect(KILL cr);
7316 
7317   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7318             "jnz     skip\n\t"
7319             "movl    $dst, -1\n"
7320       "skip:\n\t"
7321             "negl    $dst\n\t"
7322             "addl    $dst, 63" %}
7323   ins_encode %{
7324     Register Rdst = $dst$$Register;
7325     Register Rsrc = $src$$Register;
7326     Label skip;
7327     __ bsrq(Rdst, Rsrc);
7328     __ jccb(Assembler::notZero, skip);
7329     __ movl(Rdst, -1);
7330     __ bind(skip);
7331     __ negl(Rdst);
7332     __ addl(Rdst, BitsPerLong - 1);
7333   %}
7334   ins_pipe(ialu_reg);
7335 %}
7336 
7337 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7338   match(Set dst (CountTrailingZerosI src));
7339   effect(KILL cr);
7340 
7341   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7342             "jnz     done\n\t"
7343             "movl    $dst, 32\n"
7344       "done:" %}
7345   ins_encode %{
7346     Register Rdst = $dst$$Register;
7347     Label done;
7348     __ bsfl(Rdst, $src$$Register);
7349     __ jccb(Assembler::notZero, done);
7350     __ movl(Rdst, BitsPerInt);
7351     __ bind(done);
7352   %}
7353   ins_pipe(ialu_reg);
7354 %}
7355 
7356 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7357   match(Set dst (CountTrailingZerosL src));
7358   effect(KILL cr);
7359 
7360   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7361             "jnz     done\n\t"
7362             "movl    $dst, 64\n"
7363       "done:" %}
7364   ins_encode %{
7365     Register Rdst = $dst$$Register;
7366     Label done;
7367     __ bsfq(Rdst, $src$$Register);
7368     __ jccb(Assembler::notZero, done);
7369     __ movl(Rdst, BitsPerLong);
7370     __ bind(done);
7371   %}
7372   ins_pipe(ialu_reg);
7373 %}
7374 
7375 
7376 //---------- Population Count Instructions -------------------------------------
7377 
7378 instruct popCountI(rRegI dst, rRegI src) %{
7379   predicate(UsePopCountInstruction);
7380   match(Set dst (PopCountI src));
7381 
7382   format %{ "popcnt  $dst, $src" %}
7383   ins_encode %{
7384     __ popcntl($dst$$Register, $src$$Register);
7385   %}
7386   ins_pipe(ialu_reg);
7387 %}
7388 
7389 instruct popCountI_mem(rRegI dst, memory mem) %{
7390   predicate(UsePopCountInstruction);
7391   match(Set dst (PopCountI (LoadI mem)));
7392 
7393   format %{ "popcnt  $dst, $mem" %}
7394   ins_encode %{
7395     __ popcntl($dst$$Register, $mem$$Address);
7396   %}
7397   ins_pipe(ialu_reg);
7398 %}
7399 
7400 // Note: Long.bitCount(long) returns an int.
7401 instruct popCountL(rRegI dst, rRegL src) %{
7402   predicate(UsePopCountInstruction);
7403   match(Set dst (PopCountL src));
7404 
7405   format %{ "popcnt  $dst, $src" %}
7406   ins_encode %{
7407     __ popcntq($dst$$Register, $src$$Register);
7408   %}
7409   ins_pipe(ialu_reg);
7410 %}
7411 
7412 // Note: Long.bitCount(long) returns an int.
7413 instruct popCountL_mem(rRegI dst, memory mem) %{
7414   predicate(UsePopCountInstruction);
7415   match(Set dst (PopCountL (LoadL mem)));
7416 
7417   format %{ "popcnt  $dst, $mem" %}
7418   ins_encode %{
7419     __ popcntq($dst$$Register, $mem$$Address);
7420   %}
7421   ins_pipe(ialu_reg);
7422 %}
7423 
7424 
7425 //----------MemBar Instructions-----------------------------------------------
7426 // Memory barrier flavors
7427 
7428 instruct membar_acquire()
7429 %{
7430   match(MemBarAcquire);
7431   ins_cost(0);
7432 
7433   size(0);
7434   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7435   ins_encode();
7436   ins_pipe(empty);
7437 %}
7438 
7439 instruct membar_acquire_lock()
7440 %{
7441   match(MemBarAcquire);
7442   predicate(Matcher::prior_fast_lock(n));
7443   ins_cost(0);
7444 
7445   size(0);
7446   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7447   ins_encode();
7448   ins_pipe(empty);
7449 %}
7450 
7451 instruct membar_release()
7452 %{
7453   match(MemBarRelease);
7454   ins_cost(0);
7455 
7456   size(0);
7457   format %{ "MEMBAR-release ! (empty encoding)" %}
7458   ins_encode();
7459   ins_pipe(empty);
7460 %}
7461 
7462 instruct membar_release_lock()
7463 %{
7464   match(MemBarRelease);
7465   predicate(Matcher::post_fast_unlock(n));
7466   ins_cost(0);
7467 
7468   size(0);
7469   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7470   ins_encode();
7471   ins_pipe(empty);
7472 %}
7473 
7474 instruct membar_volatile(rFlagsReg cr) %{
7475   match(MemBarVolatile);
7476   effect(KILL cr);
7477   ins_cost(400);
7478 
7479   format %{ 
7480     $$template
7481     if (os::is_MP()) {
7482       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7483     } else {
7484       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7485     }
7486   %}
7487   ins_encode %{
7488     __ membar(Assembler::StoreLoad);
7489   %}
7490   ins_pipe(pipe_slow);
7491 %}
7492 
7493 instruct unnecessary_membar_volatile()
7494 %{
7495   match(MemBarVolatile);
7496   predicate(Matcher::post_store_load_barrier(n));
7497   ins_cost(0);
7498 
7499   size(0);
7500   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7501   ins_encode();
7502   ins_pipe(empty);
7503 %}
7504 
7505 //----------Move Instructions--------------------------------------------------
7506 
7507 instruct castX2P(rRegP dst, rRegL src)
7508 %{
7509   match(Set dst (CastX2P src));
7510 
7511   format %{ "movq    $dst, $src\t# long->ptr" %}
7512   ins_encode(enc_copy_wide(dst, src));
7513   ins_pipe(ialu_reg_reg); // XXX
7514 %}
7515 
7516 instruct castP2X(rRegL dst, rRegP src)
7517 %{
7518   match(Set dst (CastP2X src));
7519 
7520   format %{ "movq    $dst, $src\t# ptr -> long" %}
7521   ins_encode(enc_copy_wide(dst, src));
7522   ins_pipe(ialu_reg_reg); // XXX
7523 %}
7524 
7525 
7526 // Convert oop pointer into compressed form
7527 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7528   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7529   match(Set dst (EncodeP src));
7530   effect(KILL cr);
7531   format %{ "encode_heap_oop $dst,$src" %}
7532   ins_encode %{
7533     Register s = $src$$Register;
7534     Register d = $dst$$Register;
7535     if (s != d) {
7536       __ movq(d, s);
7537     }
7538     __ encode_heap_oop(d);
7539   %}
7540   ins_pipe(ialu_reg_long);
7541 %}
7542 
7543 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7544   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7545   match(Set dst (EncodeP src));
7546   effect(KILL cr);
7547   format %{ "encode_heap_oop_not_null $dst,$src" %}
7548   ins_encode %{
7549     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7550   %}
7551   ins_pipe(ialu_reg_long);
7552 %}
7553 
7554 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7555   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7556             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7557   match(Set dst (DecodeN src));
7558   effect(KILL cr);
7559   format %{ "decode_heap_oop $dst,$src" %}
7560   ins_encode %{
7561     Register s = $src$$Register;
7562     Register d = $dst$$Register;
7563     if (s != d) {
7564       __ movq(d, s);
7565     }
7566     __ decode_heap_oop(d);
7567   %}
7568   ins_pipe(ialu_reg_long);
7569 %}
7570 
7571 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7572   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7573             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7574   match(Set dst (DecodeN src));
7575   effect(KILL cr);
7576   format %{ "decode_heap_oop_not_null $dst,$src" %}
7577   ins_encode %{
7578     Register s = $src$$Register;
7579     Register d = $dst$$Register;
7580     if (s != d) {
7581       __ decode_heap_oop_not_null(d, s);
7582     } else {
7583       __ decode_heap_oop_not_null(d);
7584     }
7585   %}
7586   ins_pipe(ialu_reg_long);
7587 %}
7588 
7589 
7590 //----------Conditional Move---------------------------------------------------
7591 // Jump
7592 // dummy instruction for generating temp registers
7593 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7594   match(Jump (LShiftL switch_val shift));
7595   ins_cost(350);
7596   predicate(false);
7597   effect(TEMP dest);
7598 
7599   format %{ "leaq    $dest, [$constantaddress]\n\t"
7600             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7601   ins_encode %{
7602     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7603     // to do that and the compiler is using that register as one it can allocate.
7604     // So we build it all by hand.
7605     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7606     // ArrayAddress dispatch(table, index);
7607     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7608     __ lea($dest$$Register, $constantaddress);
7609     __ jmp(dispatch);
7610   %}
7611   ins_pipe(pipe_jmp);
7612   ins_pc_relative(1);
7613 %}
7614 
7615 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7616   match(Jump (AddL (LShiftL switch_val shift) offset));
7617   ins_cost(350);
7618   effect(TEMP dest);
7619 
7620   format %{ "leaq    $dest, [$constantaddress]\n\t"
7621             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7622   ins_encode %{
7623     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7624     // to do that and the compiler is using that register as one it can allocate.
7625     // So we build it all by hand.
7626     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7627     // ArrayAddress dispatch(table, index);
7628     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7629     __ lea($dest$$Register, $constantaddress);
7630     __ jmp(dispatch);
7631   %}
7632   ins_pipe(pipe_jmp);
7633   ins_pc_relative(1);
7634 %}
7635 
7636 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7637   match(Jump switch_val);
7638   ins_cost(350);
7639   effect(TEMP dest);
7640 
7641   format %{ "leaq    $dest, [$constantaddress]\n\t"
7642             "jmp     [$dest + $switch_val]\n\t" %}
7643   ins_encode %{
7644     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7645     // to do that and the compiler is using that register as one it can allocate.
7646     // So we build it all by hand.
7647     // Address index(noreg, switch_reg, Address::times_1);
7648     // ArrayAddress dispatch(table, index);
7649     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7650     __ lea($dest$$Register, $constantaddress);
7651     __ jmp(dispatch);
7652   %}
7653   ins_pipe(pipe_jmp);
7654   ins_pc_relative(1);
7655 %}
7656 
7657 // Conditional move
7658 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7659 %{
7660   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7661 
7662   ins_cost(200); // XXX
7663   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7664   opcode(0x0F, 0x40);
7665   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7666   ins_pipe(pipe_cmov_reg);
7667 %}
7668 
7669 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7670   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7671 
7672   ins_cost(200); // XXX
7673   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7674   opcode(0x0F, 0x40);
7675   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7676   ins_pipe(pipe_cmov_reg);
7677 %}
7678 
7679 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7680   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7681   ins_cost(200);
7682   expand %{
7683     cmovI_regU(cop, cr, dst, src);
7684   %}
7685 %}
7686 
7687 // Conditional move
7688 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7689   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7690 
7691   ins_cost(250); // XXX
7692   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7693   opcode(0x0F, 0x40);
7694   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7695   ins_pipe(pipe_cmov_mem);
7696 %}
7697 
7698 // Conditional move
7699 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7700 %{
7701   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7702 
7703   ins_cost(250); // XXX
7704   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7705   opcode(0x0F, 0x40);
7706   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7707   ins_pipe(pipe_cmov_mem);
7708 %}
7709 
7710 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7711   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7712   ins_cost(250);
7713   expand %{
7714     cmovI_memU(cop, cr, dst, src);
7715   %}
7716 %}
7717 
7718 // Conditional move
7719 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7720 %{
7721   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7722 
7723   ins_cost(200); // XXX
7724   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7725   opcode(0x0F, 0x40);
7726   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7727   ins_pipe(pipe_cmov_reg);
7728 %}
7729 
7730 // Conditional move
7731 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7732 %{
7733   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7734 
7735   ins_cost(200); // XXX
7736   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7737   opcode(0x0F, 0x40);
7738   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7739   ins_pipe(pipe_cmov_reg);
7740 %}
7741 
7742 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7743   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7744   ins_cost(200);
7745   expand %{
7746     cmovN_regU(cop, cr, dst, src);
7747   %}
7748 %}
7749 
7750 // Conditional move
7751 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7752 %{
7753   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7754 
7755   ins_cost(200); // XXX
7756   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7757   opcode(0x0F, 0x40);
7758   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7759   ins_pipe(pipe_cmov_reg);  // XXX
7760 %}
7761 
7762 // Conditional move
7763 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7764 %{
7765   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7766 
7767   ins_cost(200); // XXX
7768   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7769   opcode(0x0F, 0x40);
7770   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7771   ins_pipe(pipe_cmov_reg); // XXX
7772 %}
7773 
7774 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7775   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7776   ins_cost(200);
7777   expand %{
7778     cmovP_regU(cop, cr, dst, src);
7779   %}
7780 %}
7781 
7782 // DISABLED: Requires the ADLC to emit a bottom_type call that
7783 // correctly meets the two pointer arguments; one is an incoming
7784 // register but the other is a memory operand.  ALSO appears to
7785 // be buggy with implicit null checks.
7786 //
7787 //// Conditional move
7788 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7789 //%{
7790 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7791 //  ins_cost(250);
7792 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7793 //  opcode(0x0F,0x40);
7794 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7795 //  ins_pipe( pipe_cmov_mem );
7796 //%}
7797 //
7798 //// Conditional move
7799 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7800 //%{
7801 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7802 //  ins_cost(250);
7803 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7804 //  opcode(0x0F,0x40);
7805 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7806 //  ins_pipe( pipe_cmov_mem );
7807 //%}
7808 
7809 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7810 %{
7811   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7812 
7813   ins_cost(200); // XXX
7814   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7815   opcode(0x0F, 0x40);
7816   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7817   ins_pipe(pipe_cmov_reg);  // XXX
7818 %}
7819 
7820 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7821 %{
7822   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7823 
7824   ins_cost(200); // XXX
7825   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7826   opcode(0x0F, 0x40);
7827   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7828   ins_pipe(pipe_cmov_mem);  // XXX
7829 %}
7830 
7831 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7832 %{
7833   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7834 
7835   ins_cost(200); // XXX
7836   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7837   opcode(0x0F, 0x40);
7838   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7839   ins_pipe(pipe_cmov_reg); // XXX
7840 %}
7841 
7842 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7843   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7844   ins_cost(200);
7845   expand %{
7846     cmovL_regU(cop, cr, dst, src);
7847   %}
7848 %}
7849 
7850 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7851 %{
7852   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7853 
7854   ins_cost(200); // XXX
7855   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7856   opcode(0x0F, 0x40);
7857   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7858   ins_pipe(pipe_cmov_mem); // XXX
7859 %}
7860 
7861 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7862   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7863   ins_cost(200);
7864   expand %{
7865     cmovL_memU(cop, cr, dst, src);
7866   %}
7867 %}
7868 
7869 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7870 %{
7871   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7872 
7873   ins_cost(200); // XXX
7874   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7875             "movss     $dst, $src\n"
7876     "skip:" %}
7877   ins_encode(enc_cmovf_branch(cop, dst, src));
7878   ins_pipe(pipe_slow);
7879 %}
7880 
7881 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7882 // %{
7883 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7884 
7885 //   ins_cost(200); // XXX
7886 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7887 //             "movss     $dst, $src\n"
7888 //     "skip:" %}
7889 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7890 //   ins_pipe(pipe_slow);
7891 // %}
7892 
7893 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7894 %{
7895   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7896 
7897   ins_cost(200); // XXX
7898   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7899             "movss     $dst, $src\n"
7900     "skip:" %}
7901   ins_encode(enc_cmovf_branch(cop, dst, src));
7902   ins_pipe(pipe_slow);
7903 %}
7904 
7905 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7906   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7907   ins_cost(200);
7908   expand %{
7909     cmovF_regU(cop, cr, dst, src);
7910   %}
7911 %}
7912 
7913 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7914 %{
7915   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7916 
7917   ins_cost(200); // XXX
7918   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7919             "movsd     $dst, $src\n"
7920     "skip:" %}
7921   ins_encode(enc_cmovd_branch(cop, dst, src));
7922   ins_pipe(pipe_slow);
7923 %}
7924 
7925 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7926 %{
7927   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7928 
7929   ins_cost(200); // XXX
7930   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7931             "movsd     $dst, $src\n"
7932     "skip:" %}
7933   ins_encode(enc_cmovd_branch(cop, dst, src));
7934   ins_pipe(pipe_slow);
7935 %}
7936 
7937 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7938   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7939   ins_cost(200);
7940   expand %{
7941     cmovD_regU(cop, cr, dst, src);
7942   %}
7943 %}
7944 
7945 //----------Arithmetic Instructions--------------------------------------------
7946 //----------Addition Instructions----------------------------------------------
7947 
7948 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7949 %{
7950   match(Set dst (AddI dst src));
7951   effect(KILL cr);
7952 
7953   format %{ "addl    $dst, $src\t# int" %}
7954   opcode(0x03);
7955   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7956   ins_pipe(ialu_reg_reg);
7957 %}
7958 
7959 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7960 %{
7961   match(Set dst (AddI dst src));
7962   effect(KILL cr);
7963 
7964   format %{ "addl    $dst, $src\t# int" %}
7965   opcode(0x81, 0x00); /* /0 id */
7966   ins_encode(OpcSErm(dst, src), Con8or32(src));
7967   ins_pipe( ialu_reg );
7968 %}
7969 
7970 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7971 %{
7972   match(Set dst (AddI dst (LoadI src)));
7973   effect(KILL cr);
7974 
7975   ins_cost(125); // XXX
7976   format %{ "addl    $dst, $src\t# int" %}
7977   opcode(0x03);
7978   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7979   ins_pipe(ialu_reg_mem);
7980 %}
7981 
7982 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7983 %{
7984   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7985   effect(KILL cr);
7986 
7987   ins_cost(150); // XXX
7988   format %{ "addl    $dst, $src\t# int" %}
7989   opcode(0x01); /* Opcode 01 /r */
7990   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7991   ins_pipe(ialu_mem_reg);
7992 %}
7993 
7994 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7995 %{
7996   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7997   effect(KILL cr);
7998 
7999   ins_cost(125); // XXX
8000   format %{ "addl    $dst, $src\t# int" %}
8001   opcode(0x81); /* Opcode 81 /0 id */
8002   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8003   ins_pipe(ialu_mem_imm);
8004 %}
8005 
8006 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8007 %{
8008   predicate(UseIncDec);
8009   match(Set dst (AddI dst src));
8010   effect(KILL cr);
8011 
8012   format %{ "incl    $dst\t# int" %}
8013   opcode(0xFF, 0x00); // FF /0
8014   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8015   ins_pipe(ialu_reg);
8016 %}
8017 
8018 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8019 %{
8020   predicate(UseIncDec);
8021   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8022   effect(KILL cr);
8023 
8024   ins_cost(125); // XXX
8025   format %{ "incl    $dst\t# int" %}
8026   opcode(0xFF); /* Opcode FF /0 */
8027   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8028   ins_pipe(ialu_mem_imm);
8029 %}
8030 
8031 // XXX why does that use AddI
8032 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8033 %{
8034   predicate(UseIncDec);
8035   match(Set dst (AddI dst src));
8036   effect(KILL cr);
8037 
8038   format %{ "decl    $dst\t# int" %}
8039   opcode(0xFF, 0x01); // FF /1
8040   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8041   ins_pipe(ialu_reg);
8042 %}
8043 
8044 // XXX why does that use AddI
8045 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8046 %{
8047   predicate(UseIncDec);
8048   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8049   effect(KILL cr);
8050 
8051   ins_cost(125); // XXX
8052   format %{ "decl    $dst\t# int" %}
8053   opcode(0xFF); /* Opcode FF /1 */
8054   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8055   ins_pipe(ialu_mem_imm);
8056 %}
8057 
8058 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8059 %{
8060   match(Set dst (AddI src0 src1));
8061 
8062   ins_cost(110);
8063   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8064   opcode(0x8D); /* 0x8D /r */
8065   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8066   ins_pipe(ialu_reg_reg);
8067 %}
8068 
8069 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8070 %{
8071   match(Set dst (AddL dst src));
8072   effect(KILL cr);
8073 
8074   format %{ "addq    $dst, $src\t# long" %}
8075   opcode(0x03);
8076   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8077   ins_pipe(ialu_reg_reg);
8078 %}
8079 
8080 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8081 %{
8082   match(Set dst (AddL dst src));
8083   effect(KILL cr);
8084 
8085   format %{ "addq    $dst, $src\t# long" %}
8086   opcode(0x81, 0x00); /* /0 id */
8087   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8088   ins_pipe( ialu_reg );
8089 %}
8090 
8091 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8092 %{
8093   match(Set dst (AddL dst (LoadL src)));
8094   effect(KILL cr);
8095 
8096   ins_cost(125); // XXX
8097   format %{ "addq    $dst, $src\t# long" %}
8098   opcode(0x03);
8099   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8100   ins_pipe(ialu_reg_mem);
8101 %}
8102 
8103 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8104 %{
8105   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8106   effect(KILL cr);
8107 
8108   ins_cost(150); // XXX
8109   format %{ "addq    $dst, $src\t# long" %}
8110   opcode(0x01); /* Opcode 01 /r */
8111   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8112   ins_pipe(ialu_mem_reg);
8113 %}
8114 
8115 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8116 %{
8117   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8118   effect(KILL cr);
8119 
8120   ins_cost(125); // XXX
8121   format %{ "addq    $dst, $src\t# long" %}
8122   opcode(0x81); /* Opcode 81 /0 id */
8123   ins_encode(REX_mem_wide(dst),
8124              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8125   ins_pipe(ialu_mem_imm);
8126 %}
8127 
8128 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8129 %{
8130   predicate(UseIncDec);
8131   match(Set dst (AddL dst src));
8132   effect(KILL cr);
8133 
8134   format %{ "incq    $dst\t# long" %}
8135   opcode(0xFF, 0x00); // FF /0
8136   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8137   ins_pipe(ialu_reg);
8138 %}
8139 
8140 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8141 %{
8142   predicate(UseIncDec);
8143   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8144   effect(KILL cr);
8145 
8146   ins_cost(125); // XXX
8147   format %{ "incq    $dst\t# long" %}
8148   opcode(0xFF); /* Opcode FF /0 */
8149   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8150   ins_pipe(ialu_mem_imm);
8151 %}
8152 
8153 // XXX why does that use AddL
8154 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8155 %{
8156   predicate(UseIncDec);
8157   match(Set dst (AddL dst src));
8158   effect(KILL cr);
8159 
8160   format %{ "decq    $dst\t# long" %}
8161   opcode(0xFF, 0x01); // FF /1
8162   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8163   ins_pipe(ialu_reg);
8164 %}
8165 
8166 // XXX why does that use AddL
8167 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8168 %{
8169   predicate(UseIncDec);
8170   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8171   effect(KILL cr);
8172 
8173   ins_cost(125); // XXX
8174   format %{ "decq    $dst\t# long" %}
8175   opcode(0xFF); /* Opcode FF /1 */
8176   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8177   ins_pipe(ialu_mem_imm);
8178 %}
8179 
8180 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8181 %{
8182   match(Set dst (AddL src0 src1));
8183 
8184   ins_cost(110);
8185   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8186   opcode(0x8D); /* 0x8D /r */
8187   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8188   ins_pipe(ialu_reg_reg);
8189 %}
8190 
8191 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8192 %{
8193   match(Set dst (AddP dst src));
8194   effect(KILL cr);
8195 
8196   format %{ "addq    $dst, $src\t# ptr" %}
8197   opcode(0x03);
8198   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8199   ins_pipe(ialu_reg_reg);
8200 %}
8201 
8202 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8203 %{
8204   match(Set dst (AddP dst src));
8205   effect(KILL cr);
8206 
8207   format %{ "addq    $dst, $src\t# ptr" %}
8208   opcode(0x81, 0x00); /* /0 id */
8209   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8210   ins_pipe( ialu_reg );
8211 %}
8212 
8213 // XXX addP mem ops ????
8214 
8215 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8216 %{
8217   match(Set dst (AddP src0 src1));
8218 
8219   ins_cost(110);
8220   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8221   opcode(0x8D); /* 0x8D /r */
8222   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8223   ins_pipe(ialu_reg_reg);
8224 %}
8225 
8226 instruct checkCastPP(rRegP dst)
8227 %{
8228   match(Set dst (CheckCastPP dst));
8229 
8230   size(0);
8231   format %{ "# checkcastPP of $dst" %}
8232   ins_encode(/* empty encoding */);
8233   ins_pipe(empty);
8234 %}
8235 
8236 instruct castPP(rRegP dst)
8237 %{
8238   match(Set dst (CastPP dst));
8239 
8240   size(0);
8241   format %{ "# castPP of $dst" %}
8242   ins_encode(/* empty encoding */);
8243   ins_pipe(empty);
8244 %}
8245 
8246 instruct castII(rRegI dst)
8247 %{
8248   match(Set dst (CastII dst));
8249 
8250   size(0);
8251   format %{ "# castII of $dst" %}
8252   ins_encode(/* empty encoding */);
8253   ins_cost(0);
8254   ins_pipe(empty);
8255 %}
8256 
8257 // LoadP-locked same as a regular LoadP when used with compare-swap
8258 instruct loadPLocked(rRegP dst, memory mem)
8259 %{
8260   match(Set dst (LoadPLocked mem));
8261 
8262   ins_cost(125); // XXX
8263   format %{ "movq    $dst, $mem\t# ptr locked" %}
8264   opcode(0x8B);
8265   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8266   ins_pipe(ialu_reg_mem); // XXX
8267 %}
8268 
8269 // LoadL-locked - same as a regular LoadL when used with compare-swap
8270 instruct loadLLocked(rRegL dst, memory mem)
8271 %{
8272   match(Set dst (LoadLLocked mem));
8273 
8274   ins_cost(125); // XXX
8275   format %{ "movq    $dst, $mem\t# long locked" %}
8276   opcode(0x8B);
8277   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8278   ins_pipe(ialu_reg_mem); // XXX
8279 %}
8280 
8281 // Conditional-store of the updated heap-top.
8282 // Used during allocation of the shared heap.
8283 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8284 
8285 instruct storePConditional(memory heap_top_ptr,
8286                            rax_RegP oldval, rRegP newval,
8287                            rFlagsReg cr)
8288 %{
8289   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8290  
8291   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8292             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8293   opcode(0x0F, 0xB1);
8294   ins_encode(lock_prefix,
8295              REX_reg_mem_wide(newval, heap_top_ptr),
8296              OpcP, OpcS,
8297              reg_mem(newval, heap_top_ptr));
8298   ins_pipe(pipe_cmpxchg);
8299 %}
8300 
8301 // Conditional-store of an int value.
8302 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8303 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8304 %{
8305   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8306   effect(KILL oldval);
8307 
8308   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8309   opcode(0x0F, 0xB1);
8310   ins_encode(lock_prefix,
8311              REX_reg_mem(newval, mem),
8312              OpcP, OpcS,
8313              reg_mem(newval, mem));
8314   ins_pipe(pipe_cmpxchg);
8315 %}
8316 
8317 // Conditional-store of a long value.
8318 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8319 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8320 %{
8321   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8322   effect(KILL oldval);
8323 
8324   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8325   opcode(0x0F, 0xB1);
8326   ins_encode(lock_prefix,
8327              REX_reg_mem_wide(newval, mem),
8328              OpcP, OpcS,
8329              reg_mem(newval, mem));
8330   ins_pipe(pipe_cmpxchg);
8331 %}
8332 
8333 
8334 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8335 instruct compareAndSwapP(rRegI res,
8336                          memory mem_ptr,
8337                          rax_RegP oldval, rRegP newval,
8338                          rFlagsReg cr)
8339 %{
8340   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8341   effect(KILL cr, KILL oldval);
8342 
8343   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8344             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8345             "sete    $res\n\t"
8346             "movzbl  $res, $res" %}
8347   opcode(0x0F, 0xB1);
8348   ins_encode(lock_prefix,
8349              REX_reg_mem_wide(newval, mem_ptr),
8350              OpcP, OpcS,
8351              reg_mem(newval, mem_ptr),
8352              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8353              REX_reg_breg(res, res), // movzbl
8354              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8355   ins_pipe( pipe_cmpxchg );
8356 %}
8357 
8358 instruct compareAndSwapL(rRegI res,
8359                          memory mem_ptr,
8360                          rax_RegL oldval, rRegL newval,
8361                          rFlagsReg cr)
8362 %{
8363   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8364   effect(KILL cr, KILL oldval);
8365 
8366   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8367             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8368             "sete    $res\n\t"
8369             "movzbl  $res, $res" %}
8370   opcode(0x0F, 0xB1);
8371   ins_encode(lock_prefix,
8372              REX_reg_mem_wide(newval, mem_ptr),
8373              OpcP, OpcS,
8374              reg_mem(newval, mem_ptr),
8375              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8376              REX_reg_breg(res, res), // movzbl
8377              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8378   ins_pipe( pipe_cmpxchg );
8379 %}
8380 
8381 instruct compareAndSwapI(rRegI res,
8382                          memory mem_ptr,
8383                          rax_RegI oldval, rRegI newval,
8384                          rFlagsReg cr)
8385 %{
8386   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8387   effect(KILL cr, KILL oldval);
8388 
8389   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8390             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8391             "sete    $res\n\t"
8392             "movzbl  $res, $res" %}
8393   opcode(0x0F, 0xB1);
8394   ins_encode(lock_prefix,
8395              REX_reg_mem(newval, mem_ptr),
8396              OpcP, OpcS,
8397              reg_mem(newval, mem_ptr),
8398              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8399              REX_reg_breg(res, res), // movzbl
8400              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8401   ins_pipe( pipe_cmpxchg );
8402 %}
8403 
8404 
8405 instruct compareAndSwapN(rRegI res,
8406                           memory mem_ptr,
8407                           rax_RegN oldval, rRegN newval,
8408                           rFlagsReg cr) %{
8409   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8410   effect(KILL cr, KILL oldval);
8411 
8412   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8413             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8414             "sete    $res\n\t"
8415             "movzbl  $res, $res" %}
8416   opcode(0x0F, 0xB1);
8417   ins_encode(lock_prefix,
8418              REX_reg_mem(newval, mem_ptr),
8419              OpcP, OpcS,
8420              reg_mem(newval, mem_ptr),
8421              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8422              REX_reg_breg(res, res), // movzbl
8423              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8424   ins_pipe( pipe_cmpxchg );
8425 %}
8426 
8427 //----------Subtraction Instructions-------------------------------------------
8428 
8429 // Integer Subtraction Instructions
8430 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8431 %{
8432   match(Set dst (SubI dst src));
8433   effect(KILL cr);
8434 
8435   format %{ "subl    $dst, $src\t# int" %}
8436   opcode(0x2B);
8437   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8438   ins_pipe(ialu_reg_reg);
8439 %}
8440 
8441 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8442 %{
8443   match(Set dst (SubI dst src));
8444   effect(KILL cr);
8445 
8446   format %{ "subl    $dst, $src\t# int" %}
8447   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8448   ins_encode(OpcSErm(dst, src), Con8or32(src));
8449   ins_pipe(ialu_reg);
8450 %}
8451 
8452 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8453 %{
8454   match(Set dst (SubI dst (LoadI src)));
8455   effect(KILL cr);
8456 
8457   ins_cost(125);
8458   format %{ "subl    $dst, $src\t# int" %}
8459   opcode(0x2B);
8460   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8461   ins_pipe(ialu_reg_mem);
8462 %}
8463 
8464 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8465 %{
8466   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8467   effect(KILL cr);
8468 
8469   ins_cost(150);
8470   format %{ "subl    $dst, $src\t# int" %}
8471   opcode(0x29); /* Opcode 29 /r */
8472   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8473   ins_pipe(ialu_mem_reg);
8474 %}
8475 
8476 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8477 %{
8478   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8479   effect(KILL cr);
8480 
8481   ins_cost(125); // XXX
8482   format %{ "subl    $dst, $src\t# int" %}
8483   opcode(0x81); /* Opcode 81 /5 id */
8484   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8485   ins_pipe(ialu_mem_imm);
8486 %}
8487 
8488 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8489 %{
8490   match(Set dst (SubL dst src));
8491   effect(KILL cr);
8492 
8493   format %{ "subq    $dst, $src\t# long" %}
8494   opcode(0x2B);
8495   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8496   ins_pipe(ialu_reg_reg);
8497 %}
8498 
8499 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8500 %{
8501   match(Set dst (SubL dst src));
8502   effect(KILL cr);
8503 
8504   format %{ "subq    $dst, $src\t# long" %}
8505   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8506   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8507   ins_pipe(ialu_reg);
8508 %}
8509 
8510 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8511 %{
8512   match(Set dst (SubL dst (LoadL src)));
8513   effect(KILL cr);
8514 
8515   ins_cost(125);
8516   format %{ "subq    $dst, $src\t# long" %}
8517   opcode(0x2B);
8518   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8519   ins_pipe(ialu_reg_mem);
8520 %}
8521 
8522 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8523 %{
8524   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8525   effect(KILL cr);
8526 
8527   ins_cost(150);
8528   format %{ "subq    $dst, $src\t# long" %}
8529   opcode(0x29); /* Opcode 29 /r */
8530   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8531   ins_pipe(ialu_mem_reg);
8532 %}
8533 
8534 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8535 %{
8536   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8537   effect(KILL cr);
8538 
8539   ins_cost(125); // XXX
8540   format %{ "subq    $dst, $src\t# long" %}
8541   opcode(0x81); /* Opcode 81 /5 id */
8542   ins_encode(REX_mem_wide(dst),
8543              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8544   ins_pipe(ialu_mem_imm);
8545 %}
8546 
8547 // Subtract from a pointer
8548 // XXX hmpf???
8549 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8550 %{
8551   match(Set dst (AddP dst (SubI zero src)));
8552   effect(KILL cr);
8553 
8554   format %{ "subq    $dst, $src\t# ptr - int" %}
8555   opcode(0x2B);
8556   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8557   ins_pipe(ialu_reg_reg);
8558 %}
8559 
8560 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8561 %{
8562   match(Set dst (SubI zero dst));
8563   effect(KILL cr);
8564 
8565   format %{ "negl    $dst\t# int" %}
8566   opcode(0xF7, 0x03);  // Opcode F7 /3
8567   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8568   ins_pipe(ialu_reg);
8569 %}
8570 
8571 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8572 %{
8573   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8574   effect(KILL cr);
8575 
8576   format %{ "negl    $dst\t# int" %}
8577   opcode(0xF7, 0x03);  // Opcode F7 /3
8578   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8579   ins_pipe(ialu_reg);
8580 %}
8581 
8582 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8583 %{
8584   match(Set dst (SubL zero dst));
8585   effect(KILL cr);
8586 
8587   format %{ "negq    $dst\t# long" %}
8588   opcode(0xF7, 0x03);  // Opcode F7 /3
8589   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8590   ins_pipe(ialu_reg);
8591 %}
8592 
8593 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8594 %{
8595   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8596   effect(KILL cr);
8597 
8598   format %{ "negq    $dst\t# long" %}
8599   opcode(0xF7, 0x03);  // Opcode F7 /3
8600   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8601   ins_pipe(ialu_reg);
8602 %}
8603 
8604 
8605 //----------Multiplication/Division Instructions-------------------------------
8606 // Integer Multiplication Instructions
8607 // Multiply Register
8608 
8609 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8610 %{
8611   match(Set dst (MulI dst src));
8612   effect(KILL cr);
8613 
8614   ins_cost(300);
8615   format %{ "imull   $dst, $src\t# int" %}
8616   opcode(0x0F, 0xAF);
8617   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8618   ins_pipe(ialu_reg_reg_alu0);
8619 %}
8620 
8621 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8622 %{
8623   match(Set dst (MulI src imm));
8624   effect(KILL cr);
8625 
8626   ins_cost(300);
8627   format %{ "imull   $dst, $src, $imm\t# int" %}
8628   opcode(0x69); /* 69 /r id */
8629   ins_encode(REX_reg_reg(dst, src),
8630              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8631   ins_pipe(ialu_reg_reg_alu0);
8632 %}
8633 
8634 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8635 %{
8636   match(Set dst (MulI dst (LoadI src)));
8637   effect(KILL cr);
8638 
8639   ins_cost(350);
8640   format %{ "imull   $dst, $src\t# int" %}
8641   opcode(0x0F, 0xAF);
8642   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8643   ins_pipe(ialu_reg_mem_alu0);
8644 %}
8645 
8646 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8647 %{
8648   match(Set dst (MulI (LoadI src) imm));
8649   effect(KILL cr);
8650 
8651   ins_cost(300);
8652   format %{ "imull   $dst, $src, $imm\t# int" %}
8653   opcode(0x69); /* 69 /r id */
8654   ins_encode(REX_reg_mem(dst, src),
8655              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8656   ins_pipe(ialu_reg_mem_alu0);
8657 %}
8658 
8659 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8660 %{
8661   match(Set dst (MulL dst src));
8662   effect(KILL cr);
8663 
8664   ins_cost(300);
8665   format %{ "imulq   $dst, $src\t# long" %}
8666   opcode(0x0F, 0xAF);
8667   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8668   ins_pipe(ialu_reg_reg_alu0);
8669 %}
8670 
8671 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8672 %{
8673   match(Set dst (MulL src imm));
8674   effect(KILL cr);
8675 
8676   ins_cost(300);
8677   format %{ "imulq   $dst, $src, $imm\t# long" %}
8678   opcode(0x69); /* 69 /r id */
8679   ins_encode(REX_reg_reg_wide(dst, src),
8680              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8681   ins_pipe(ialu_reg_reg_alu0);
8682 %}
8683 
8684 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8685 %{
8686   match(Set dst (MulL dst (LoadL src)));
8687   effect(KILL cr);
8688 
8689   ins_cost(350);
8690   format %{ "imulq   $dst, $src\t# long" %}
8691   opcode(0x0F, 0xAF);
8692   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8693   ins_pipe(ialu_reg_mem_alu0);
8694 %}
8695 
8696 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8697 %{
8698   match(Set dst (MulL (LoadL src) imm));
8699   effect(KILL cr);
8700 
8701   ins_cost(300);
8702   format %{ "imulq   $dst, $src, $imm\t# long" %}
8703   opcode(0x69); /* 69 /r id */
8704   ins_encode(REX_reg_mem_wide(dst, src),
8705              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8706   ins_pipe(ialu_reg_mem_alu0);
8707 %}
8708 
8709 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8710 %{
8711   match(Set dst (MulHiL src rax));
8712   effect(USE_KILL rax, KILL cr);
8713 
8714   ins_cost(300);
8715   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8716   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8717   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8718   ins_pipe(ialu_reg_reg_alu0);
8719 %}
8720 
8721 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8722                    rFlagsReg cr)
8723 %{
8724   match(Set rax (DivI rax div));
8725   effect(KILL rdx, KILL cr);
8726 
8727   ins_cost(30*100+10*100); // XXX
8728   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8729             "jne,s   normal\n\t"
8730             "xorl    rdx, rdx\n\t"
8731             "cmpl    $div, -1\n\t"
8732             "je,s    done\n"
8733     "normal: cdql\n\t"
8734             "idivl   $div\n"
8735     "done:"        %}
8736   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8737   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8738   ins_pipe(ialu_reg_reg_alu0);
8739 %}
8740 
8741 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8742                    rFlagsReg cr)
8743 %{
8744   match(Set rax (DivL rax div));
8745   effect(KILL rdx, KILL cr);
8746 
8747   ins_cost(30*100+10*100); // XXX
8748   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8749             "cmpq    rax, rdx\n\t"
8750             "jne,s   normal\n\t"
8751             "xorl    rdx, rdx\n\t"
8752             "cmpq    $div, -1\n\t"
8753             "je,s    done\n"
8754     "normal: cdqq\n\t"
8755             "idivq   $div\n"
8756     "done:"        %}
8757   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8758   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8759   ins_pipe(ialu_reg_reg_alu0);
8760 %}
8761 
8762 // Integer DIVMOD with Register, both quotient and mod results
8763 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8764                              rFlagsReg cr)
8765 %{
8766   match(DivModI rax div);
8767   effect(KILL cr);
8768 
8769   ins_cost(30*100+10*100); // XXX
8770   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8771             "jne,s   normal\n\t"
8772             "xorl    rdx, rdx\n\t"
8773             "cmpl    $div, -1\n\t"
8774             "je,s    done\n"
8775     "normal: cdql\n\t"
8776             "idivl   $div\n"
8777     "done:"        %}
8778   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8779   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8780   ins_pipe(pipe_slow);
8781 %}
8782 
8783 // Long DIVMOD with Register, both quotient and mod results
8784 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8785                              rFlagsReg cr)
8786 %{
8787   match(DivModL rax div);
8788   effect(KILL cr);
8789 
8790   ins_cost(30*100+10*100); // XXX
8791   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8792             "cmpq    rax, rdx\n\t"
8793             "jne,s   normal\n\t"
8794             "xorl    rdx, rdx\n\t"
8795             "cmpq    $div, -1\n\t"
8796             "je,s    done\n"
8797     "normal: cdqq\n\t"
8798             "idivq   $div\n"
8799     "done:"        %}
8800   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8801   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8802   ins_pipe(pipe_slow);
8803 %}
8804 
8805 //----------- DivL-By-Constant-Expansions--------------------------------------
8806 // DivI cases are handled by the compiler
8807 
8808 // Magic constant, reciprocal of 10
8809 instruct loadConL_0x6666666666666667(rRegL dst)
8810 %{
8811   effect(DEF dst);
8812 
8813   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8814   ins_encode(load_immL(dst, 0x6666666666666667));
8815   ins_pipe(ialu_reg);
8816 %}
8817 
8818 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8819 %{
8820   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8821 
8822   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8823   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8824   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8825   ins_pipe(ialu_reg_reg_alu0);
8826 %}
8827 
8828 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8829 %{
8830   effect(USE_DEF dst, KILL cr);
8831 
8832   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8833   opcode(0xC1, 0x7); /* C1 /7 ib */
8834   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8835   ins_pipe(ialu_reg);
8836 %}
8837 
8838 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8839 %{
8840   effect(USE_DEF dst, KILL cr);
8841 
8842   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8843   opcode(0xC1, 0x7); /* C1 /7 ib */
8844   ins_encode(reg_opc_imm_wide(dst, 0x2));
8845   ins_pipe(ialu_reg);
8846 %}
8847 
8848 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8849 %{
8850   match(Set dst (DivL src div));
8851 
8852   ins_cost((5+8)*100);
8853   expand %{
8854     rax_RegL rax;                     // Killed temp
8855     rFlagsReg cr;                     // Killed
8856     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8857     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8858     sarL_rReg_63(src, cr);            // sarq  src, 63
8859     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8860     subL_rReg(dst, src, cr);          // subl  rdx, src
8861   %}
8862 %}
8863 
8864 //-----------------------------------------------------------------------------
8865 
8866 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8867                    rFlagsReg cr)
8868 %{
8869   match(Set rdx (ModI rax div));
8870   effect(KILL rax, KILL cr);
8871 
8872   ins_cost(300); // XXX
8873   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8874             "jne,s   normal\n\t"
8875             "xorl    rdx, rdx\n\t"
8876             "cmpl    $div, -1\n\t"
8877             "je,s    done\n"
8878     "normal: cdql\n\t"
8879             "idivl   $div\n"
8880     "done:"        %}
8881   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8882   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8883   ins_pipe(ialu_reg_reg_alu0);
8884 %}
8885 
8886 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8887                    rFlagsReg cr)
8888 %{
8889   match(Set rdx (ModL rax div));
8890   effect(KILL rax, KILL cr);
8891 
8892   ins_cost(300); // XXX
8893   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8894             "cmpq    rax, rdx\n\t"
8895             "jne,s   normal\n\t"
8896             "xorl    rdx, rdx\n\t"
8897             "cmpq    $div, -1\n\t"
8898             "je,s    done\n"
8899     "normal: cdqq\n\t"
8900             "idivq   $div\n"
8901     "done:"        %}
8902   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8903   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8904   ins_pipe(ialu_reg_reg_alu0);
8905 %}
8906 
8907 // Integer Shift Instructions
8908 // Shift Left by one
8909 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8910 %{
8911   match(Set dst (LShiftI dst shift));
8912   effect(KILL cr);
8913 
8914   format %{ "sall    $dst, $shift" %}
8915   opcode(0xD1, 0x4); /* D1 /4 */
8916   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8917   ins_pipe(ialu_reg);
8918 %}
8919 
8920 // Shift Left by one
8921 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8922 %{
8923   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8924   effect(KILL cr);
8925 
8926   format %{ "sall    $dst, $shift\t" %}
8927   opcode(0xD1, 0x4); /* D1 /4 */
8928   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8929   ins_pipe(ialu_mem_imm);
8930 %}
8931 
8932 // Shift Left by 8-bit immediate
8933 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8934 %{
8935   match(Set dst (LShiftI dst shift));
8936   effect(KILL cr);
8937 
8938   format %{ "sall    $dst, $shift" %}
8939   opcode(0xC1, 0x4); /* C1 /4 ib */
8940   ins_encode(reg_opc_imm(dst, shift));
8941   ins_pipe(ialu_reg);
8942 %}
8943 
8944 // Shift Left by 8-bit immediate
8945 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8946 %{
8947   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8948   effect(KILL cr);
8949 
8950   format %{ "sall    $dst, $shift" %}
8951   opcode(0xC1, 0x4); /* C1 /4 ib */
8952   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8953   ins_pipe(ialu_mem_imm);
8954 %}
8955 
8956 // Shift Left by variable
8957 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8958 %{
8959   match(Set dst (LShiftI dst shift));
8960   effect(KILL cr);
8961 
8962   format %{ "sall    $dst, $shift" %}
8963   opcode(0xD3, 0x4); /* D3 /4 */
8964   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8965   ins_pipe(ialu_reg_reg);
8966 %}
8967 
8968 // Shift Left by variable
8969 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8970 %{
8971   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8972   effect(KILL cr);
8973 
8974   format %{ "sall    $dst, $shift" %}
8975   opcode(0xD3, 0x4); /* D3 /4 */
8976   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8977   ins_pipe(ialu_mem_reg);
8978 %}
8979 
8980 // Arithmetic shift right by one
8981 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8982 %{
8983   match(Set dst (RShiftI dst shift));
8984   effect(KILL cr);
8985 
8986   format %{ "sarl    $dst, $shift" %}
8987   opcode(0xD1, 0x7); /* D1 /7 */
8988   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8989   ins_pipe(ialu_reg);
8990 %}
8991 
8992 // Arithmetic shift right by one
8993 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8994 %{
8995   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8996   effect(KILL cr);
8997 
8998   format %{ "sarl    $dst, $shift" %}
8999   opcode(0xD1, 0x7); /* D1 /7 */
9000   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9001   ins_pipe(ialu_mem_imm);
9002 %}
9003 
9004 // Arithmetic Shift Right by 8-bit immediate
9005 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9006 %{
9007   match(Set dst (RShiftI dst shift));
9008   effect(KILL cr);
9009 
9010   format %{ "sarl    $dst, $shift" %}
9011   opcode(0xC1, 0x7); /* C1 /7 ib */
9012   ins_encode(reg_opc_imm(dst, shift));
9013   ins_pipe(ialu_mem_imm);
9014 %}
9015 
9016 // Arithmetic Shift Right by 8-bit immediate
9017 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9018 %{
9019   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9020   effect(KILL cr);
9021 
9022   format %{ "sarl    $dst, $shift" %}
9023   opcode(0xC1, 0x7); /* C1 /7 ib */
9024   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9025   ins_pipe(ialu_mem_imm);
9026 %}
9027 
9028 // Arithmetic Shift Right by variable
9029 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9030 %{
9031   match(Set dst (RShiftI dst shift));
9032   effect(KILL cr);
9033 
9034   format %{ "sarl    $dst, $shift" %}
9035   opcode(0xD3, 0x7); /* D3 /7 */
9036   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9037   ins_pipe(ialu_reg_reg);
9038 %}
9039 
9040 // Arithmetic Shift Right by variable
9041 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9042 %{
9043   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9044   effect(KILL cr);
9045 
9046   format %{ "sarl    $dst, $shift" %}
9047   opcode(0xD3, 0x7); /* D3 /7 */
9048   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9049   ins_pipe(ialu_mem_reg);
9050 %}
9051 
9052 // Logical shift right by one
9053 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9054 %{
9055   match(Set dst (URShiftI dst shift));
9056   effect(KILL cr);
9057 
9058   format %{ "shrl    $dst, $shift" %}
9059   opcode(0xD1, 0x5); /* D1 /5 */
9060   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9061   ins_pipe(ialu_reg);
9062 %}
9063 
9064 // Logical shift right by one
9065 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9066 %{
9067   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9068   effect(KILL cr);
9069 
9070   format %{ "shrl    $dst, $shift" %}
9071   opcode(0xD1, 0x5); /* D1 /5 */
9072   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9073   ins_pipe(ialu_mem_imm);
9074 %}
9075 
9076 // Logical Shift Right by 8-bit immediate
9077 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9078 %{
9079   match(Set dst (URShiftI dst shift));
9080   effect(KILL cr);
9081 
9082   format %{ "shrl    $dst, $shift" %}
9083   opcode(0xC1, 0x5); /* C1 /5 ib */
9084   ins_encode(reg_opc_imm(dst, shift));
9085   ins_pipe(ialu_reg);
9086 %}
9087 
9088 // Logical Shift Right by 8-bit immediate
9089 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9090 %{
9091   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9092   effect(KILL cr);
9093 
9094   format %{ "shrl    $dst, $shift" %}
9095   opcode(0xC1, 0x5); /* C1 /5 ib */
9096   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9097   ins_pipe(ialu_mem_imm);
9098 %}
9099 
9100 // Logical Shift Right by variable
9101 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9102 %{
9103   match(Set dst (URShiftI dst shift));
9104   effect(KILL cr);
9105 
9106   format %{ "shrl    $dst, $shift" %}
9107   opcode(0xD3, 0x5); /* D3 /5 */
9108   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9109   ins_pipe(ialu_reg_reg);
9110 %}
9111 
9112 // Logical Shift Right by variable
9113 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9114 %{
9115   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9116   effect(KILL cr);
9117 
9118   format %{ "shrl    $dst, $shift" %}
9119   opcode(0xD3, 0x5); /* D3 /5 */
9120   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9121   ins_pipe(ialu_mem_reg);
9122 %}
9123 
9124 // Long Shift Instructions
9125 // Shift Left by one
9126 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9127 %{
9128   match(Set dst (LShiftL dst shift));
9129   effect(KILL cr);
9130 
9131   format %{ "salq    $dst, $shift" %}
9132   opcode(0xD1, 0x4); /* D1 /4 */
9133   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9134   ins_pipe(ialu_reg);
9135 %}
9136 
9137 // Shift Left by one
9138 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9139 %{
9140   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9141   effect(KILL cr);
9142 
9143   format %{ "salq    $dst, $shift" %}
9144   opcode(0xD1, 0x4); /* D1 /4 */
9145   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9146   ins_pipe(ialu_mem_imm);
9147 %}
9148 
9149 // Shift Left by 8-bit immediate
9150 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9151 %{
9152   match(Set dst (LShiftL dst shift));
9153   effect(KILL cr);
9154 
9155   format %{ "salq    $dst, $shift" %}
9156   opcode(0xC1, 0x4); /* C1 /4 ib */
9157   ins_encode(reg_opc_imm_wide(dst, shift));
9158   ins_pipe(ialu_reg);
9159 %}
9160 
9161 // Shift Left by 8-bit immediate
9162 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9163 %{
9164   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9165   effect(KILL cr);
9166 
9167   format %{ "salq    $dst, $shift" %}
9168   opcode(0xC1, 0x4); /* C1 /4 ib */
9169   ins_encode(REX_mem_wide(dst), OpcP,
9170              RM_opc_mem(secondary, dst), Con8or32(shift));
9171   ins_pipe(ialu_mem_imm);
9172 %}
9173 
9174 // Shift Left by variable
9175 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9176 %{
9177   match(Set dst (LShiftL dst shift));
9178   effect(KILL cr);
9179 
9180   format %{ "salq    $dst, $shift" %}
9181   opcode(0xD3, 0x4); /* D3 /4 */
9182   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9183   ins_pipe(ialu_reg_reg);
9184 %}
9185 
9186 // Shift Left by variable
9187 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9188 %{
9189   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9190   effect(KILL cr);
9191 
9192   format %{ "salq    $dst, $shift" %}
9193   opcode(0xD3, 0x4); /* D3 /4 */
9194   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9195   ins_pipe(ialu_mem_reg);
9196 %}
9197 
9198 // Arithmetic shift right by one
9199 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9200 %{
9201   match(Set dst (RShiftL dst shift));
9202   effect(KILL cr);
9203 
9204   format %{ "sarq    $dst, $shift" %}
9205   opcode(0xD1, 0x7); /* D1 /7 */
9206   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9207   ins_pipe(ialu_reg);
9208 %}
9209 
9210 // Arithmetic shift right by one
9211 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9212 %{
9213   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9214   effect(KILL cr);
9215 
9216   format %{ "sarq    $dst, $shift" %}
9217   opcode(0xD1, 0x7); /* D1 /7 */
9218   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9219   ins_pipe(ialu_mem_imm);
9220 %}
9221 
9222 // Arithmetic Shift Right by 8-bit immediate
9223 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9224 %{
9225   match(Set dst (RShiftL dst shift));
9226   effect(KILL cr);
9227 
9228   format %{ "sarq    $dst, $shift" %}
9229   opcode(0xC1, 0x7); /* C1 /7 ib */
9230   ins_encode(reg_opc_imm_wide(dst, shift));
9231   ins_pipe(ialu_mem_imm);
9232 %}
9233 
9234 // Arithmetic Shift Right by 8-bit immediate
9235 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9236 %{
9237   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9238   effect(KILL cr);
9239 
9240   format %{ "sarq    $dst, $shift" %}
9241   opcode(0xC1, 0x7); /* C1 /7 ib */
9242   ins_encode(REX_mem_wide(dst), OpcP,
9243              RM_opc_mem(secondary, dst), Con8or32(shift));
9244   ins_pipe(ialu_mem_imm);
9245 %}
9246 
9247 // Arithmetic Shift Right by variable
9248 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9249 %{
9250   match(Set dst (RShiftL dst shift));
9251   effect(KILL cr);
9252 
9253   format %{ "sarq    $dst, $shift" %}
9254   opcode(0xD3, 0x7); /* D3 /7 */
9255   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9256   ins_pipe(ialu_reg_reg);
9257 %}
9258 
9259 // Arithmetic Shift Right by variable
9260 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9261 %{
9262   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9263   effect(KILL cr);
9264 
9265   format %{ "sarq    $dst, $shift" %}
9266   opcode(0xD3, 0x7); /* D3 /7 */
9267   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9268   ins_pipe(ialu_mem_reg);
9269 %}
9270 
9271 // Logical shift right by one
9272 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9273 %{
9274   match(Set dst (URShiftL dst shift));
9275   effect(KILL cr);
9276 
9277   format %{ "shrq    $dst, $shift" %}
9278   opcode(0xD1, 0x5); /* D1 /5 */
9279   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9280   ins_pipe(ialu_reg);
9281 %}
9282 
9283 // Logical shift right by one
9284 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9285 %{
9286   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9287   effect(KILL cr);
9288 
9289   format %{ "shrq    $dst, $shift" %}
9290   opcode(0xD1, 0x5); /* D1 /5 */
9291   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9292   ins_pipe(ialu_mem_imm);
9293 %}
9294 
9295 // Logical Shift Right by 8-bit immediate
9296 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9297 %{
9298   match(Set dst (URShiftL dst shift));
9299   effect(KILL cr);
9300 
9301   format %{ "shrq    $dst, $shift" %}
9302   opcode(0xC1, 0x5); /* C1 /5 ib */
9303   ins_encode(reg_opc_imm_wide(dst, shift));
9304   ins_pipe(ialu_reg);
9305 %}
9306 
9307 
9308 // Logical Shift Right by 8-bit immediate
9309 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9310 %{
9311   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9312   effect(KILL cr);
9313 
9314   format %{ "shrq    $dst, $shift" %}
9315   opcode(0xC1, 0x5); /* C1 /5 ib */
9316   ins_encode(REX_mem_wide(dst), OpcP,
9317              RM_opc_mem(secondary, dst), Con8or32(shift));
9318   ins_pipe(ialu_mem_imm);
9319 %}
9320 
9321 // Logical Shift Right by variable
9322 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9323 %{
9324   match(Set dst (URShiftL dst shift));
9325   effect(KILL cr);
9326 
9327   format %{ "shrq    $dst, $shift" %}
9328   opcode(0xD3, 0x5); /* D3 /5 */
9329   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9330   ins_pipe(ialu_reg_reg);
9331 %}
9332 
9333 // Logical Shift Right by variable
9334 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9335 %{
9336   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9337   effect(KILL cr);
9338 
9339   format %{ "shrq    $dst, $shift" %}
9340   opcode(0xD3, 0x5); /* D3 /5 */
9341   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9342   ins_pipe(ialu_mem_reg);
9343 %}
9344 
9345 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9346 // This idiom is used by the compiler for the i2b bytecode.
9347 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9348 %{
9349   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9350 
9351   format %{ "movsbl  $dst, $src\t# i2b" %}
9352   opcode(0x0F, 0xBE);
9353   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9354   ins_pipe(ialu_reg_reg);
9355 %}
9356 
9357 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9358 // This idiom is used by the compiler the i2s bytecode.
9359 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9360 %{
9361   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9362 
9363   format %{ "movswl  $dst, $src\t# i2s" %}
9364   opcode(0x0F, 0xBF);
9365   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9366   ins_pipe(ialu_reg_reg);
9367 %}
9368 
9369 // ROL/ROR instructions
9370 
9371 // ROL expand
9372 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9373   effect(KILL cr, USE_DEF dst);
9374 
9375   format %{ "roll    $dst" %}
9376   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9377   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9378   ins_pipe(ialu_reg);
9379 %}
9380 
9381 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9382   effect(USE_DEF dst, USE shift, KILL cr);
9383 
9384   format %{ "roll    $dst, $shift" %}
9385   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9386   ins_encode( reg_opc_imm(dst, shift) );
9387   ins_pipe(ialu_reg);
9388 %}
9389 
9390 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9391 %{
9392   effect(USE_DEF dst, USE shift, KILL cr);
9393 
9394   format %{ "roll    $dst, $shift" %}
9395   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9396   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9397   ins_pipe(ialu_reg_reg);
9398 %}
9399 // end of ROL expand
9400 
9401 // Rotate Left by one
9402 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9403 %{
9404   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9405 
9406   expand %{
9407     rolI_rReg_imm1(dst, cr);
9408   %}
9409 %}
9410 
9411 // Rotate Left by 8-bit immediate
9412 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9413 %{
9414   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9415   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9416 
9417   expand %{
9418     rolI_rReg_imm8(dst, lshift, cr);
9419   %}
9420 %}
9421 
9422 // Rotate Left by variable
9423 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9424 %{
9425   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9426 
9427   expand %{
9428     rolI_rReg_CL(dst, shift, cr);
9429   %}
9430 %}
9431 
9432 // Rotate Left by variable
9433 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9434 %{
9435   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9436 
9437   expand %{
9438     rolI_rReg_CL(dst, shift, cr);
9439   %}
9440 %}
9441 
9442 // ROR expand
9443 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9444 %{
9445   effect(USE_DEF dst, KILL cr);
9446 
9447   format %{ "rorl    $dst" %}
9448   opcode(0xD1, 0x1); /* D1 /1 */
9449   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9450   ins_pipe(ialu_reg);
9451 %}
9452 
9453 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9454 %{
9455   effect(USE_DEF dst, USE shift, KILL cr);
9456 
9457   format %{ "rorl    $dst, $shift" %}
9458   opcode(0xC1, 0x1); /* C1 /1 ib */
9459   ins_encode(reg_opc_imm(dst, shift));
9460   ins_pipe(ialu_reg);
9461 %}
9462 
9463 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9464 %{
9465   effect(USE_DEF dst, USE shift, KILL cr);
9466 
9467   format %{ "rorl    $dst, $shift" %}
9468   opcode(0xD3, 0x1); /* D3 /1 */
9469   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9470   ins_pipe(ialu_reg_reg);
9471 %}
9472 // end of ROR expand
9473 
9474 // Rotate Right by one
9475 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9476 %{
9477   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9478 
9479   expand %{
9480     rorI_rReg_imm1(dst, cr);
9481   %}
9482 %}
9483 
9484 // Rotate Right by 8-bit immediate
9485 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9486 %{
9487   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9488   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9489 
9490   expand %{
9491     rorI_rReg_imm8(dst, rshift, cr);
9492   %}
9493 %}
9494 
9495 // Rotate Right by variable
9496 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9497 %{
9498   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9499 
9500   expand %{
9501     rorI_rReg_CL(dst, shift, cr);
9502   %}
9503 %}
9504 
9505 // Rotate Right by variable
9506 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9507 %{
9508   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9509 
9510   expand %{
9511     rorI_rReg_CL(dst, shift, cr);
9512   %}
9513 %}
9514 
9515 // for long rotate
9516 // ROL expand
9517 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9518   effect(USE_DEF dst, KILL cr);
9519 
9520   format %{ "rolq    $dst" %}
9521   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9522   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9523   ins_pipe(ialu_reg);
9524 %}
9525 
9526 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9527   effect(USE_DEF dst, USE shift, KILL cr);
9528 
9529   format %{ "rolq    $dst, $shift" %}
9530   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9531   ins_encode( reg_opc_imm_wide(dst, shift) );
9532   ins_pipe(ialu_reg);
9533 %}
9534 
9535 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9536 %{
9537   effect(USE_DEF dst, USE shift, KILL cr);
9538 
9539   format %{ "rolq    $dst, $shift" %}
9540   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9541   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9542   ins_pipe(ialu_reg_reg);
9543 %}
9544 // end of ROL expand
9545 
9546 // Rotate Left by one
9547 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9548 %{
9549   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9550 
9551   expand %{
9552     rolL_rReg_imm1(dst, cr);
9553   %}
9554 %}
9555 
9556 // Rotate Left by 8-bit immediate
9557 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9558 %{
9559   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9560   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9561 
9562   expand %{
9563     rolL_rReg_imm8(dst, lshift, cr);
9564   %}
9565 %}
9566 
9567 // Rotate Left by variable
9568 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9569 %{
9570   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9571 
9572   expand %{
9573     rolL_rReg_CL(dst, shift, cr);
9574   %}
9575 %}
9576 
9577 // Rotate Left by variable
9578 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9579 %{
9580   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9581 
9582   expand %{
9583     rolL_rReg_CL(dst, shift, cr);
9584   %}
9585 %}
9586 
9587 // ROR expand
9588 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9589 %{
9590   effect(USE_DEF dst, KILL cr);
9591 
9592   format %{ "rorq    $dst" %}
9593   opcode(0xD1, 0x1); /* D1 /1 */
9594   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9595   ins_pipe(ialu_reg);
9596 %}
9597 
9598 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9599 %{
9600   effect(USE_DEF dst, USE shift, KILL cr);
9601 
9602   format %{ "rorq    $dst, $shift" %}
9603   opcode(0xC1, 0x1); /* C1 /1 ib */
9604   ins_encode(reg_opc_imm_wide(dst, shift));
9605   ins_pipe(ialu_reg);
9606 %}
9607 
9608 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9609 %{
9610   effect(USE_DEF dst, USE shift, KILL cr);
9611 
9612   format %{ "rorq    $dst, $shift" %}
9613   opcode(0xD3, 0x1); /* D3 /1 */
9614   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9615   ins_pipe(ialu_reg_reg);
9616 %}
9617 // end of ROR expand
9618 
9619 // Rotate Right by one
9620 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9621 %{
9622   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9623 
9624   expand %{
9625     rorL_rReg_imm1(dst, cr);
9626   %}
9627 %}
9628 
9629 // Rotate Right by 8-bit immediate
9630 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9631 %{
9632   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9633   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9634 
9635   expand %{
9636     rorL_rReg_imm8(dst, rshift, cr);
9637   %}
9638 %}
9639 
9640 // Rotate Right by variable
9641 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9642 %{
9643   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9644 
9645   expand %{
9646     rorL_rReg_CL(dst, shift, cr);
9647   %}
9648 %}
9649 
9650 // Rotate Right by variable
9651 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9652 %{
9653   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9654 
9655   expand %{
9656     rorL_rReg_CL(dst, shift, cr);
9657   %}
9658 %}
9659 
9660 // Logical Instructions
9661 
9662 // Integer Logical Instructions
9663 
9664 // And Instructions
9665 // And Register with Register
9666 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9667 %{
9668   match(Set dst (AndI dst src));
9669   effect(KILL cr);
9670 
9671   format %{ "andl    $dst, $src\t# int" %}
9672   opcode(0x23);
9673   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9674   ins_pipe(ialu_reg_reg);
9675 %}
9676 
9677 // And Register with Immediate 255
9678 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9679 %{
9680   match(Set dst (AndI dst src));
9681 
9682   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9683   opcode(0x0F, 0xB6);
9684   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9685   ins_pipe(ialu_reg);
9686 %}
9687 
9688 // And Register with Immediate 255 and promote to long
9689 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9690 %{
9691   match(Set dst (ConvI2L (AndI src mask)));
9692 
9693   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9694   opcode(0x0F, 0xB6);
9695   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9696   ins_pipe(ialu_reg);
9697 %}
9698 
9699 // And Register with Immediate 65535
9700 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9701 %{
9702   match(Set dst (AndI dst src));
9703 
9704   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9705   opcode(0x0F, 0xB7);
9706   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9707   ins_pipe(ialu_reg);
9708 %}
9709 
9710 // And Register with Immediate 65535 and promote to long
9711 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9712 %{
9713   match(Set dst (ConvI2L (AndI src mask)));
9714 
9715   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9716   opcode(0x0F, 0xB7);
9717   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9718   ins_pipe(ialu_reg);
9719 %}
9720 
9721 // And Register with Immediate
9722 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9723 %{
9724   match(Set dst (AndI dst src));
9725   effect(KILL cr);
9726 
9727   format %{ "andl    $dst, $src\t# int" %}
9728   opcode(0x81, 0x04); /* Opcode 81 /4 */
9729   ins_encode(OpcSErm(dst, src), Con8or32(src));
9730   ins_pipe(ialu_reg);
9731 %}
9732 
9733 // And Register with Memory
9734 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9735 %{
9736   match(Set dst (AndI dst (LoadI src)));
9737   effect(KILL cr);
9738 
9739   ins_cost(125);
9740   format %{ "andl    $dst, $src\t# int" %}
9741   opcode(0x23);
9742   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9743   ins_pipe(ialu_reg_mem);
9744 %}
9745 
9746 // And Memory with Register
9747 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9748 %{
9749   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9750   effect(KILL cr);
9751 
9752   ins_cost(150);
9753   format %{ "andl    $dst, $src\t# int" %}
9754   opcode(0x21); /* Opcode 21 /r */
9755   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9756   ins_pipe(ialu_mem_reg);
9757 %}
9758 
9759 // And Memory with Immediate
9760 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9761 %{
9762   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9763   effect(KILL cr);
9764 
9765   ins_cost(125);
9766   format %{ "andl    $dst, $src\t# int" %}
9767   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9768   ins_encode(REX_mem(dst), OpcSE(src),
9769              RM_opc_mem(secondary, dst), Con8or32(src));
9770   ins_pipe(ialu_mem_imm);
9771 %}
9772 
9773 // Or Instructions
9774 // Or Register with Register
9775 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9776 %{
9777   match(Set dst (OrI dst src));
9778   effect(KILL cr);
9779 
9780   format %{ "orl     $dst, $src\t# int" %}
9781   opcode(0x0B);
9782   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9783   ins_pipe(ialu_reg_reg);
9784 %}
9785 
9786 // Or Register with Immediate
9787 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9788 %{
9789   match(Set dst (OrI dst src));
9790   effect(KILL cr);
9791 
9792   format %{ "orl     $dst, $src\t# int" %}
9793   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9794   ins_encode(OpcSErm(dst, src), Con8or32(src));
9795   ins_pipe(ialu_reg);
9796 %}
9797 
9798 // Or Register with Memory
9799 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9800 %{
9801   match(Set dst (OrI dst (LoadI src)));
9802   effect(KILL cr);
9803 
9804   ins_cost(125);
9805   format %{ "orl     $dst, $src\t# int" %}
9806   opcode(0x0B);
9807   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9808   ins_pipe(ialu_reg_mem);
9809 %}
9810 
9811 // Or Memory with Register
9812 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9813 %{
9814   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9815   effect(KILL cr);
9816 
9817   ins_cost(150);
9818   format %{ "orl     $dst, $src\t# int" %}
9819   opcode(0x09); /* Opcode 09 /r */
9820   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9821   ins_pipe(ialu_mem_reg);
9822 %}
9823 
9824 // Or Memory with Immediate
9825 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9826 %{
9827   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9828   effect(KILL cr);
9829 
9830   ins_cost(125);
9831   format %{ "orl     $dst, $src\t# int" %}
9832   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9833   ins_encode(REX_mem(dst), OpcSE(src),
9834              RM_opc_mem(secondary, dst), Con8or32(src));
9835   ins_pipe(ialu_mem_imm);
9836 %}
9837 
9838 // Xor Instructions
9839 // Xor Register with Register
9840 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9841 %{
9842   match(Set dst (XorI dst src));
9843   effect(KILL cr);
9844 
9845   format %{ "xorl    $dst, $src\t# int" %}
9846   opcode(0x33);
9847   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9848   ins_pipe(ialu_reg_reg);
9849 %}
9850 
9851 // Xor Register with Immediate -1
9852 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9853   match(Set dst (XorI dst imm));  
9854 
9855   format %{ "not    $dst" %}  
9856   ins_encode %{
9857      __ notl($dst$$Register);
9858   %}
9859   ins_pipe(ialu_reg);
9860 %}
9861 
9862 // Xor Register with Immediate
9863 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9864 %{
9865   match(Set dst (XorI dst src));
9866   effect(KILL cr);
9867 
9868   format %{ "xorl    $dst, $src\t# int" %}
9869   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9870   ins_encode(OpcSErm(dst, src), Con8or32(src));
9871   ins_pipe(ialu_reg);
9872 %}
9873 
9874 // Xor Register with Memory
9875 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9876 %{
9877   match(Set dst (XorI dst (LoadI src)));
9878   effect(KILL cr);
9879 
9880   ins_cost(125);
9881   format %{ "xorl    $dst, $src\t# int" %}
9882   opcode(0x33);
9883   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9884   ins_pipe(ialu_reg_mem);
9885 %}
9886 
9887 // Xor Memory with Register
9888 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9889 %{
9890   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9891   effect(KILL cr);
9892 
9893   ins_cost(150);
9894   format %{ "xorl    $dst, $src\t# int" %}
9895   opcode(0x31); /* Opcode 31 /r */
9896   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9897   ins_pipe(ialu_mem_reg);
9898 %}
9899 
9900 // Xor Memory with Immediate
9901 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9902 %{
9903   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9904   effect(KILL cr);
9905 
9906   ins_cost(125);
9907   format %{ "xorl    $dst, $src\t# int" %}
9908   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9909   ins_encode(REX_mem(dst), OpcSE(src),
9910              RM_opc_mem(secondary, dst), Con8or32(src));
9911   ins_pipe(ialu_mem_imm);
9912 %}
9913 
9914 
9915 // Long Logical Instructions
9916 
9917 // And Instructions
9918 // And Register with Register
9919 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9920 %{
9921   match(Set dst (AndL dst src));
9922   effect(KILL cr);
9923 
9924   format %{ "andq    $dst, $src\t# long" %}
9925   opcode(0x23);
9926   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9927   ins_pipe(ialu_reg_reg);
9928 %}
9929 
9930 // And Register with Immediate 255
9931 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9932 %{
9933   match(Set dst (AndL dst src));
9934 
9935   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9936   opcode(0x0F, 0xB6);
9937   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9938   ins_pipe(ialu_reg);
9939 %}
9940 
9941 // And Register with Immediate 65535
9942 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9943 %{
9944   match(Set dst (AndL dst src));
9945 
9946   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9947   opcode(0x0F, 0xB7);
9948   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9949   ins_pipe(ialu_reg);
9950 %}
9951 
9952 // And Register with Immediate
9953 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9954 %{
9955   match(Set dst (AndL dst src));
9956   effect(KILL cr);
9957 
9958   format %{ "andq    $dst, $src\t# long" %}
9959   opcode(0x81, 0x04); /* Opcode 81 /4 */
9960   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9961   ins_pipe(ialu_reg);
9962 %}
9963 
9964 // And Register with Memory
9965 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9966 %{
9967   match(Set dst (AndL dst (LoadL src)));
9968   effect(KILL cr);
9969 
9970   ins_cost(125);
9971   format %{ "andq    $dst, $src\t# long" %}
9972   opcode(0x23);
9973   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9974   ins_pipe(ialu_reg_mem);
9975 %}
9976 
9977 // And Memory with Register
9978 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9979 %{
9980   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9981   effect(KILL cr);
9982 
9983   ins_cost(150);
9984   format %{ "andq    $dst, $src\t# long" %}
9985   opcode(0x21); /* Opcode 21 /r */
9986   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9987   ins_pipe(ialu_mem_reg);
9988 %}
9989 
9990 // And Memory with Immediate
9991 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9992 %{
9993   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9994   effect(KILL cr);
9995 
9996   ins_cost(125);
9997   format %{ "andq    $dst, $src\t# long" %}
9998   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9999   ins_encode(REX_mem_wide(dst), OpcSE(src),
10000              RM_opc_mem(secondary, dst), Con8or32(src));
10001   ins_pipe(ialu_mem_imm);
10002 %}
10003 
10004 // Or Instructions
10005 // Or Register with Register
10006 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10007 %{
10008   match(Set dst (OrL dst src));
10009   effect(KILL cr);
10010 
10011   format %{ "orq     $dst, $src\t# long" %}
10012   opcode(0x0B);
10013   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10014   ins_pipe(ialu_reg_reg);
10015 %}
10016 
10017 // Use any_RegP to match R15 (TLS register) without spilling.
10018 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10019   match(Set dst (OrL dst (CastP2X src)));
10020   effect(KILL cr);
10021 
10022   format %{ "orq     $dst, $src\t# long" %}
10023   opcode(0x0B);
10024   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10025   ins_pipe(ialu_reg_reg);
10026 %}
10027 
10028 
10029 // Or Register with Immediate
10030 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10031 %{
10032   match(Set dst (OrL dst src));
10033   effect(KILL cr);
10034 
10035   format %{ "orq     $dst, $src\t# long" %}
10036   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10037   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10038   ins_pipe(ialu_reg);
10039 %}
10040 
10041 // Or Register with Memory
10042 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10043 %{
10044   match(Set dst (OrL dst (LoadL src)));
10045   effect(KILL cr);
10046 
10047   ins_cost(125);
10048   format %{ "orq     $dst, $src\t# long" %}
10049   opcode(0x0B);
10050   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10051   ins_pipe(ialu_reg_mem);
10052 %}
10053 
10054 // Or Memory with Register
10055 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10056 %{
10057   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10058   effect(KILL cr);
10059 
10060   ins_cost(150);
10061   format %{ "orq     $dst, $src\t# long" %}
10062   opcode(0x09); /* Opcode 09 /r */
10063   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10064   ins_pipe(ialu_mem_reg);
10065 %}
10066 
10067 // Or Memory with Immediate
10068 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10069 %{
10070   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10071   effect(KILL cr);
10072 
10073   ins_cost(125);
10074   format %{ "orq     $dst, $src\t# long" %}
10075   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10076   ins_encode(REX_mem_wide(dst), OpcSE(src),
10077              RM_opc_mem(secondary, dst), Con8or32(src));
10078   ins_pipe(ialu_mem_imm);
10079 %}
10080 
10081 // Xor Instructions
10082 // Xor Register with Register
10083 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10084 %{
10085   match(Set dst (XorL dst src));
10086   effect(KILL cr);
10087 
10088   format %{ "xorq    $dst, $src\t# long" %}
10089   opcode(0x33);
10090   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10091   ins_pipe(ialu_reg_reg);
10092 %}
10093 
10094 // Xor Register with Immediate -1
10095 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10096   match(Set dst (XorL dst imm));  
10097 
10098   format %{ "notq   $dst" %}  
10099   ins_encode %{
10100      __ notq($dst$$Register);
10101   %}
10102   ins_pipe(ialu_reg);
10103 %}
10104 
10105 // Xor Register with Immediate
10106 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10107 %{
10108   match(Set dst (XorL dst src));
10109   effect(KILL cr);
10110 
10111   format %{ "xorq    $dst, $src\t# long" %}
10112   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10113   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10114   ins_pipe(ialu_reg);
10115 %}
10116 
10117 // Xor Register with Memory
10118 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10119 %{
10120   match(Set dst (XorL dst (LoadL src)));
10121   effect(KILL cr);
10122 
10123   ins_cost(125);
10124   format %{ "xorq    $dst, $src\t# long" %}
10125   opcode(0x33);
10126   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10127   ins_pipe(ialu_reg_mem);
10128 %}
10129 
10130 // Xor Memory with Register
10131 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10132 %{
10133   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10134   effect(KILL cr);
10135 
10136   ins_cost(150);
10137   format %{ "xorq    $dst, $src\t# long" %}
10138   opcode(0x31); /* Opcode 31 /r */
10139   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10140   ins_pipe(ialu_mem_reg);
10141 %}
10142 
10143 // Xor Memory with Immediate
10144 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10145 %{
10146   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10147   effect(KILL cr);
10148 
10149   ins_cost(125);
10150   format %{ "xorq    $dst, $src\t# long" %}
10151   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10152   ins_encode(REX_mem_wide(dst), OpcSE(src),
10153              RM_opc_mem(secondary, dst), Con8or32(src));
10154   ins_pipe(ialu_mem_imm);
10155 %}
10156 
10157 // Convert Int to Boolean
10158 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10159 %{
10160   match(Set dst (Conv2B src));
10161   effect(KILL cr);
10162 
10163   format %{ "testl   $src, $src\t# ci2b\n\t"
10164             "setnz   $dst\n\t"
10165             "movzbl  $dst, $dst" %}
10166   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10167              setNZ_reg(dst),
10168              REX_reg_breg(dst, dst), // movzbl
10169              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10170   ins_pipe(pipe_slow); // XXX
10171 %}
10172 
10173 // Convert Pointer to Boolean
10174 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10175 %{
10176   match(Set dst (Conv2B src));
10177   effect(KILL cr);
10178 
10179   format %{ "testq   $src, $src\t# cp2b\n\t"
10180             "setnz   $dst\n\t"
10181             "movzbl  $dst, $dst" %}
10182   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10183              setNZ_reg(dst),
10184              REX_reg_breg(dst, dst), // movzbl
10185              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10186   ins_pipe(pipe_slow); // XXX
10187 %}
10188 
10189 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10190 %{
10191   match(Set dst (CmpLTMask p q));
10192   effect(KILL cr);
10193 
10194   ins_cost(400); // XXX
10195   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10196             "setlt   $dst\n\t"
10197             "movzbl  $dst, $dst\n\t"
10198             "negl    $dst" %}
10199   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10200              setLT_reg(dst),
10201              REX_reg_breg(dst, dst), // movzbl
10202              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10203              neg_reg(dst));
10204   ins_pipe(pipe_slow);
10205 %}
10206 
10207 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10208 %{
10209   match(Set dst (CmpLTMask dst zero));
10210   effect(KILL cr);
10211 
10212   ins_cost(100); // XXX
10213   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10214   opcode(0xC1, 0x7);  /* C1 /7 ib */
10215   ins_encode(reg_opc_imm(dst, 0x1F));
10216   ins_pipe(ialu_reg);
10217 %}
10218 
10219 
10220 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10221                          rRegI tmp,
10222                          rFlagsReg cr)
10223 %{
10224   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10225   effect(TEMP tmp, KILL cr);
10226 
10227   ins_cost(400); // XXX
10228   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10229             "sbbl    $tmp, $tmp\n\t"
10230             "andl    $tmp, $y\n\t"
10231             "addl    $p, $tmp" %}
10232   ins_encode(enc_cmpLTP(p, q, y, tmp));
10233   ins_pipe(pipe_cmplt);
10234 %}
10235 
10236 /* If I enable this, I encourage spilling in the inner loop of compress.
10237 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10238 %{
10239   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10240   effect( TEMP tmp, KILL cr );
10241   ins_cost(400);
10242 
10243   format %{ "SUB    $p,$q\n\t"
10244             "SBB    RCX,RCX\n\t"
10245             "AND    RCX,$y\n\t"
10246             "ADD    $p,RCX" %}
10247   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10248 %}
10249 */
10250 
10251 //---------- FP Instructions------------------------------------------------
10252 
10253 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10254 %{
10255   match(Set cr (CmpF src1 src2));
10256 
10257   ins_cost(145);
10258   format %{ "ucomiss $src1, $src2\n\t"
10259             "jnp,s   exit\n\t"
10260             "pushfq\t# saw NaN, set CF\n\t"
10261             "andq    [rsp], #0xffffff2b\n\t"
10262             "popfq\n"
10263     "exit:   nop\t# avoid branch to branch" %}
10264   opcode(0x0F, 0x2E);
10265   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10266              cmpfp_fixup);
10267   ins_pipe(pipe_slow);
10268 %}
10269 
10270 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10271   match(Set cr (CmpF src1 src2));
10272 
10273   ins_cost(145);
10274   format %{ "ucomiss $src1, $src2" %}
10275   ins_encode %{
10276     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10277   %}
10278   ins_pipe(pipe_slow);
10279 %}
10280 
10281 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10282 %{
10283   match(Set cr (CmpF src1 (LoadF src2)));
10284 
10285   ins_cost(145);
10286   format %{ "ucomiss $src1, $src2\n\t"
10287             "jnp,s   exit\n\t"
10288             "pushfq\t# saw NaN, set CF\n\t"
10289             "andq    [rsp], #0xffffff2b\n\t"
10290             "popfq\n"
10291     "exit:   nop\t# avoid branch to branch" %}
10292   opcode(0x0F, 0x2E);
10293   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10294              cmpfp_fixup);
10295   ins_pipe(pipe_slow);
10296 %}
10297 
10298 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10299   match(Set cr (CmpF src1 (LoadF src2)));
10300 
10301   ins_cost(100);
10302   format %{ "ucomiss $src1, $src2" %}
10303   opcode(0x0F, 0x2E);
10304   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10305   ins_pipe(pipe_slow);
10306 %}
10307 
10308 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10309   match(Set cr (CmpF src con));
10310 
10311   ins_cost(145);
10312   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10313             "jnp,s   exit\n\t"
10314             "pushfq\t# saw NaN, set CF\n\t"
10315             "andq    [rsp], #0xffffff2b\n\t"
10316             "popfq\n"
10317     "exit:   nop\t# avoid branch to branch" %}
10318   ins_encode %{
10319     Label L_exit;
10320     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10321     __ jcc(Assembler::noParity, L_exit);
10322     __ pushf();
10323     __ andq(rsp, 0xffffff2b);
10324     __ popf();
10325     __ bind(L_exit);
10326     __ nop();
10327   %}
10328   ins_pipe(pipe_slow);
10329 %}
10330 
10331 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10332   match(Set cr (CmpF src con));
10333   ins_cost(100);
10334   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10335   ins_encode %{
10336     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10337   %}
10338   ins_pipe(pipe_slow);
10339 %}
10340 
10341 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10342 %{
10343   match(Set cr (CmpD src1 src2));
10344 
10345   ins_cost(145);
10346   format %{ "ucomisd $src1, $src2\n\t"
10347             "jnp,s   exit\n\t"
10348             "pushfq\t# saw NaN, set CF\n\t"
10349             "andq    [rsp], #0xffffff2b\n\t"
10350             "popfq\n"
10351     "exit:   nop\t# avoid branch to branch" %}
10352   opcode(0x66, 0x0F, 0x2E);
10353   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10354              cmpfp_fixup);
10355   ins_pipe(pipe_slow);
10356 %}
10357 
10358 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10359   match(Set cr (CmpD src1 src2));
10360 
10361   ins_cost(100);
10362   format %{ "ucomisd $src1, $src2 test" %}
10363   ins_encode %{
10364     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10365   %}
10366   ins_pipe(pipe_slow);
10367 %}
10368 
10369 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10370 %{
10371   match(Set cr (CmpD src1 (LoadD src2)));
10372 
10373   ins_cost(145);
10374   format %{ "ucomisd $src1, $src2\n\t"
10375             "jnp,s   exit\n\t"
10376             "pushfq\t# saw NaN, set CF\n\t"
10377             "andq    [rsp], #0xffffff2b\n\t"
10378             "popfq\n"
10379     "exit:   nop\t# avoid branch to branch" %}
10380   opcode(0x66, 0x0F, 0x2E);
10381   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10382              cmpfp_fixup);
10383   ins_pipe(pipe_slow);
10384 %}
10385 
10386 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10387   match(Set cr (CmpD src1 (LoadD src2)));
10388 
10389   ins_cost(100);
10390   format %{ "ucomisd $src1, $src2" %}
10391   opcode(0x66, 0x0F, 0x2E);
10392   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10393   ins_pipe(pipe_slow);
10394 %}
10395 
10396 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10397   match(Set cr (CmpD src con));
10398 
10399   ins_cost(145);
10400   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10401             "jnp,s   exit\n\t"
10402             "pushfq\t# saw NaN, set CF\n\t"
10403             "andq    [rsp], #0xffffff2b\n\t"
10404             "popfq\n"
10405     "exit:   nop\t# avoid branch to branch" %}
10406   ins_encode %{
10407     Label L_exit;
10408     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10409     __ jcc(Assembler::noParity, L_exit);
10410     __ pushf();
10411     __ andq(rsp, 0xffffff2b);
10412     __ popf();
10413     __ bind(L_exit);
10414     __ nop();
10415   %}
10416   ins_pipe(pipe_slow);
10417 %}
10418 
10419 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10420   match(Set cr (CmpD src con));
10421   ins_cost(100);
10422   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10423   ins_encode %{
10424     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10425   %}
10426   ins_pipe(pipe_slow);
10427 %}
10428 
10429 // Compare into -1,0,1
10430 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10431 %{
10432   match(Set dst (CmpF3 src1 src2));
10433   effect(KILL cr);
10434 
10435   ins_cost(275);
10436   format %{ "ucomiss $src1, $src2\n\t"
10437             "movl    $dst, #-1\n\t"
10438             "jp,s    done\n\t"
10439             "jb,s    done\n\t"
10440             "setne   $dst\n\t"
10441             "movzbl  $dst, $dst\n"
10442     "done:" %}
10443 
10444   opcode(0x0F, 0x2E);
10445   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10446              cmpfp3(dst));
10447   ins_pipe(pipe_slow);
10448 %}
10449 
10450 // Compare into -1,0,1
10451 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10452 %{
10453   match(Set dst (CmpF3 src1 (LoadF src2)));
10454   effect(KILL cr);
10455 
10456   ins_cost(275);
10457   format %{ "ucomiss $src1, $src2\n\t"
10458             "movl    $dst, #-1\n\t"
10459             "jp,s    done\n\t"
10460             "jb,s    done\n\t"
10461             "setne   $dst\n\t"
10462             "movzbl  $dst, $dst\n"
10463     "done:" %}
10464 
10465   opcode(0x0F, 0x2E);
10466   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10467              cmpfp3(dst));
10468   ins_pipe(pipe_slow);
10469 %}
10470 
10471 // Compare into -1,0,1
10472 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10473   match(Set dst (CmpF3 src con));
10474   effect(KILL cr);
10475 
10476   ins_cost(275);
10477   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10478             "movl    $dst, #-1\n\t"
10479             "jp,s    done\n\t"
10480             "jb,s    done\n\t"
10481             "setne   $dst\n\t"
10482             "movzbl  $dst, $dst\n"
10483     "done:" %}
10484   ins_encode %{
10485     Label L_done;
10486     Register Rdst = $dst$$Register;
10487     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10488     __ movl(Rdst, -1);
10489     __ jcc(Assembler::parity, L_done);
10490     __ jcc(Assembler::below, L_done);
10491     __ setb(Assembler::notEqual, Rdst);
10492     __ movzbl(Rdst, Rdst);
10493     __ bind(L_done);
10494   %}
10495   ins_pipe(pipe_slow);
10496 %}
10497 
10498 // Compare into -1,0,1
10499 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10500 %{
10501   match(Set dst (CmpD3 src1 src2));
10502   effect(KILL cr);
10503 
10504   ins_cost(275);
10505   format %{ "ucomisd $src1, $src2\n\t"
10506             "movl    $dst, #-1\n\t"
10507             "jp,s    done\n\t"
10508             "jb,s    done\n\t"
10509             "setne   $dst\n\t"
10510             "movzbl  $dst, $dst\n"
10511     "done:" %}
10512 
10513   opcode(0x66, 0x0F, 0x2E);
10514   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10515              cmpfp3(dst));
10516   ins_pipe(pipe_slow);
10517 %}
10518 
10519 // Compare into -1,0,1
10520 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10521 %{
10522   match(Set dst (CmpD3 src1 (LoadD src2)));
10523   effect(KILL cr);
10524 
10525   ins_cost(275);
10526   format %{ "ucomisd $src1, $src2\n\t"
10527             "movl    $dst, #-1\n\t"
10528             "jp,s    done\n\t"
10529             "jb,s    done\n\t"
10530             "setne   $dst\n\t"
10531             "movzbl  $dst, $dst\n"
10532     "done:" %}
10533 
10534   opcode(0x66, 0x0F, 0x2E);
10535   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10536              cmpfp3(dst));
10537   ins_pipe(pipe_slow);
10538 %}
10539 
10540 // Compare into -1,0,1
10541 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10542   match(Set dst (CmpD3 src con));
10543   effect(KILL cr);
10544 
10545   ins_cost(275);
10546   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10547             "movl    $dst, #-1\n\t"
10548             "jp,s    done\n\t"
10549             "jb,s    done\n\t"
10550             "setne   $dst\n\t"
10551             "movzbl  $dst, $dst\n"
10552     "done:" %}
10553   ins_encode %{
10554     Register Rdst = $dst$$Register;
10555     Label L_done;
10556     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10557     __ movl(Rdst, -1);
10558     __ jcc(Assembler::parity, L_done);
10559     __ jcc(Assembler::below, L_done);
10560     __ setb(Assembler::notEqual, Rdst);
10561     __ movzbl(Rdst, Rdst);
10562     __ bind(L_done);
10563   %}
10564   ins_pipe(pipe_slow);
10565 %}
10566 
10567 instruct addF_reg(regF dst, regF src)
10568 %{
10569   match(Set dst (AddF dst src));
10570 
10571   format %{ "addss   $dst, $src" %}
10572   ins_cost(150); // XXX
10573   opcode(0xF3, 0x0F, 0x58);
10574   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10575   ins_pipe(pipe_slow);
10576 %}
10577 
10578 instruct addF_mem(regF dst, memory src)
10579 %{
10580   match(Set dst (AddF dst (LoadF src)));
10581 
10582   format %{ "addss   $dst, $src" %}
10583   ins_cost(150); // XXX
10584   opcode(0xF3, 0x0F, 0x58);
10585   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10586   ins_pipe(pipe_slow);
10587 %}
10588 
10589 instruct addF_imm(regF dst, immF con) %{
10590   match(Set dst (AddF dst con));
10591   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10592   ins_cost(150); // XXX
10593   ins_encode %{
10594     __ addss($dst$$XMMRegister, $constantaddress($con));
10595   %}
10596   ins_pipe(pipe_slow);
10597 %}
10598 
10599 instruct addD_reg(regD dst, regD src)
10600 %{
10601   match(Set dst (AddD dst src));
10602 
10603   format %{ "addsd   $dst, $src" %}
10604   ins_cost(150); // XXX
10605   opcode(0xF2, 0x0F, 0x58);
10606   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10607   ins_pipe(pipe_slow);
10608 %}
10609 
10610 instruct addD_mem(regD dst, memory src)
10611 %{
10612   match(Set dst (AddD dst (LoadD src)));
10613 
10614   format %{ "addsd   $dst, $src" %}
10615   ins_cost(150); // XXX
10616   opcode(0xF2, 0x0F, 0x58);
10617   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10618   ins_pipe(pipe_slow);
10619 %}
10620 
10621 instruct addD_imm(regD dst, immD con) %{
10622   match(Set dst (AddD dst con));
10623   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10624   ins_cost(150); // XXX
10625   ins_encode %{
10626     __ addsd($dst$$XMMRegister, $constantaddress($con));
10627   %}
10628   ins_pipe(pipe_slow);
10629 %}
10630 
10631 instruct subF_reg(regF dst, regF src)
10632 %{
10633   match(Set dst (SubF dst src));
10634 
10635   format %{ "subss   $dst, $src" %}
10636   ins_cost(150); // XXX
10637   opcode(0xF3, 0x0F, 0x5C);
10638   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10639   ins_pipe(pipe_slow);
10640 %}
10641 
10642 instruct subF_mem(regF dst, memory src)
10643 %{
10644   match(Set dst (SubF dst (LoadF src)));
10645 
10646   format %{ "subss   $dst, $src" %}
10647   ins_cost(150); // XXX
10648   opcode(0xF3, 0x0F, 0x5C);
10649   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10650   ins_pipe(pipe_slow);
10651 %}
10652 
10653 instruct subF_imm(regF dst, immF con) %{
10654   match(Set dst (SubF dst con));
10655   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10656   ins_cost(150); // XXX
10657   ins_encode %{
10658     __ subss($dst$$XMMRegister, $constantaddress($con));
10659   %}
10660   ins_pipe(pipe_slow);
10661 %}
10662 
10663 instruct subD_reg(regD dst, regD src)
10664 %{
10665   match(Set dst (SubD dst src));
10666 
10667   format %{ "subsd   $dst, $src" %}
10668   ins_cost(150); // XXX
10669   opcode(0xF2, 0x0F, 0x5C);
10670   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10671   ins_pipe(pipe_slow);
10672 %}
10673 
10674 instruct subD_mem(regD dst, memory src)
10675 %{
10676   match(Set dst (SubD dst (LoadD src)));
10677 
10678   format %{ "subsd   $dst, $src" %}
10679   ins_cost(150); // XXX
10680   opcode(0xF2, 0x0F, 0x5C);
10681   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10682   ins_pipe(pipe_slow);
10683 %}
10684 
10685 instruct subD_imm(regD dst, immD con) %{
10686   match(Set dst (SubD dst con));
10687   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10688   ins_cost(150); // XXX
10689   ins_encode %{
10690     __ subsd($dst$$XMMRegister, $constantaddress($con));
10691   %}
10692   ins_pipe(pipe_slow);
10693 %}
10694 
10695 instruct mulF_reg(regF dst, regF src)
10696 %{
10697   match(Set dst (MulF dst src));
10698 
10699   format %{ "mulss   $dst, $src" %}
10700   ins_cost(150); // XXX
10701   opcode(0xF3, 0x0F, 0x59);
10702   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10703   ins_pipe(pipe_slow);
10704 %}
10705 
10706 instruct mulF_mem(regF dst, memory src)
10707 %{
10708   match(Set dst (MulF dst (LoadF src)));
10709 
10710   format %{ "mulss   $dst, $src" %}
10711   ins_cost(150); // XXX
10712   opcode(0xF3, 0x0F, 0x59);
10713   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10714   ins_pipe(pipe_slow);
10715 %}
10716 
10717 instruct mulF_imm(regF dst, immF con) %{
10718   match(Set dst (MulF dst con));
10719   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10720   ins_cost(150); // XXX
10721   ins_encode %{
10722     __ mulss($dst$$XMMRegister, $constantaddress($con));
10723   %}
10724   ins_pipe(pipe_slow);
10725 %}
10726 
10727 instruct mulD_reg(regD dst, regD src)
10728 %{
10729   match(Set dst (MulD dst src));
10730 
10731   format %{ "mulsd   $dst, $src" %}
10732   ins_cost(150); // XXX
10733   opcode(0xF2, 0x0F, 0x59);
10734   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10735   ins_pipe(pipe_slow);
10736 %}
10737 
10738 instruct mulD_mem(regD dst, memory src)
10739 %{
10740   match(Set dst (MulD dst (LoadD src)));
10741 
10742   format %{ "mulsd   $dst, $src" %}
10743   ins_cost(150); // XXX
10744   opcode(0xF2, 0x0F, 0x59);
10745   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10746   ins_pipe(pipe_slow);
10747 %}
10748 
10749 instruct mulD_imm(regD dst, immD con) %{
10750   match(Set dst (MulD dst con));
10751   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10752   ins_cost(150); // XXX
10753   ins_encode %{
10754     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10755   %}
10756   ins_pipe(pipe_slow);
10757 %}
10758 
10759 instruct divF_reg(regF dst, regF src)
10760 %{
10761   match(Set dst (DivF dst src));
10762 
10763   format %{ "divss   $dst, $src" %}
10764   ins_cost(150); // XXX
10765   opcode(0xF3, 0x0F, 0x5E);
10766   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10767   ins_pipe(pipe_slow);
10768 %}
10769 
10770 instruct divF_mem(regF dst, memory src)
10771 %{
10772   match(Set dst (DivF dst (LoadF src)));
10773 
10774   format %{ "divss   $dst, $src" %}
10775   ins_cost(150); // XXX
10776   opcode(0xF3, 0x0F, 0x5E);
10777   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10778   ins_pipe(pipe_slow);
10779 %}
10780 
10781 instruct divF_imm(regF dst, immF con) %{
10782   match(Set dst (DivF dst con));
10783   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10784   ins_cost(150); // XXX
10785   ins_encode %{
10786     __ divss($dst$$XMMRegister, $constantaddress($con));
10787   %}
10788   ins_pipe(pipe_slow);
10789 %}
10790 
10791 instruct divD_reg(regD dst, regD src)
10792 %{
10793   match(Set dst (DivD dst src));
10794 
10795   format %{ "divsd   $dst, $src" %}
10796   ins_cost(150); // XXX
10797   opcode(0xF2, 0x0F, 0x5E);
10798   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10799   ins_pipe(pipe_slow);
10800 %}
10801 
10802 instruct divD_mem(regD dst, memory src)
10803 %{
10804   match(Set dst (DivD dst (LoadD src)));
10805 
10806   format %{ "divsd   $dst, $src" %}
10807   ins_cost(150); // XXX
10808   opcode(0xF2, 0x0F, 0x5E);
10809   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10810   ins_pipe(pipe_slow);
10811 %}
10812 
10813 instruct divD_imm(regD dst, immD con) %{
10814   match(Set dst (DivD dst con));
10815   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10816   ins_cost(150); // XXX
10817   ins_encode %{
10818     __ divsd($dst$$XMMRegister, $constantaddress($con));
10819   %}
10820   ins_pipe(pipe_slow);
10821 %}
10822 
10823 instruct sqrtF_reg(regF dst, regF src)
10824 %{
10825   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10826 
10827   format %{ "sqrtss  $dst, $src" %}
10828   ins_cost(150); // XXX
10829   opcode(0xF3, 0x0F, 0x51);
10830   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10831   ins_pipe(pipe_slow);
10832 %}
10833 
10834 instruct sqrtF_mem(regF dst, memory src)
10835 %{
10836   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10837 
10838   format %{ "sqrtss  $dst, $src" %}
10839   ins_cost(150); // XXX
10840   opcode(0xF3, 0x0F, 0x51);
10841   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10842   ins_pipe(pipe_slow);
10843 %}
10844 
10845 instruct sqrtF_imm(regF dst, immF con) %{
10846   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
10847   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10848   ins_cost(150); // XXX
10849   ins_encode %{
10850     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
10851   %}
10852   ins_pipe(pipe_slow);
10853 %}
10854 
10855 instruct sqrtD_reg(regD dst, regD src)
10856 %{
10857   match(Set dst (SqrtD src));
10858 
10859   format %{ "sqrtsd  $dst, $src" %}
10860   ins_cost(150); // XXX
10861   opcode(0xF2, 0x0F, 0x51);
10862   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10863   ins_pipe(pipe_slow);
10864 %}
10865 
10866 instruct sqrtD_mem(regD dst, memory src)
10867 %{
10868   match(Set dst (SqrtD (LoadD src)));
10869 
10870   format %{ "sqrtsd  $dst, $src" %}
10871   ins_cost(150); // XXX
10872   opcode(0xF2, 0x0F, 0x51);
10873   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10874   ins_pipe(pipe_slow);
10875 %}
10876 
10877 instruct sqrtD_imm(regD dst, immD con) %{
10878   match(Set dst (SqrtD con));
10879   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10880   ins_cost(150); // XXX
10881   ins_encode %{
10882     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
10883   %}
10884   ins_pipe(pipe_slow);
10885 %}
10886 
10887 instruct absF_reg(regF dst)
10888 %{
10889   match(Set dst (AbsF dst));
10890 
10891   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10892   ins_encode(absF_encoding(dst));
10893   ins_pipe(pipe_slow);
10894 %}
10895 
10896 instruct absD_reg(regD dst)
10897 %{
10898   match(Set dst (AbsD dst));
10899 
10900   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10901             "# abs double by sign masking" %}
10902   ins_encode(absD_encoding(dst));
10903   ins_pipe(pipe_slow);
10904 %}
10905 
10906 instruct negF_reg(regF dst)
10907 %{
10908   match(Set dst (NegF dst));
10909 
10910   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10911   ins_encode(negF_encoding(dst));
10912   ins_pipe(pipe_slow);
10913 %}
10914 
10915 instruct negD_reg(regD dst)
10916 %{
10917   match(Set dst (NegD dst));
10918 
10919   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10920             "# neg double by sign flipping" %}
10921   ins_encode(negD_encoding(dst));
10922   ins_pipe(pipe_slow);
10923 %}
10924 
10925 // -----------Trig and Trancendental Instructions------------------------------
10926 instruct cosD_reg(regD dst) %{
10927   match(Set dst (CosD dst));
10928 
10929   format %{ "dcos   $dst\n\t" %}
10930   opcode(0xD9, 0xFF);
10931   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10932   ins_pipe( pipe_slow );
10933 %}
10934 
10935 instruct sinD_reg(regD dst) %{
10936   match(Set dst (SinD dst));
10937 
10938   format %{ "dsin   $dst\n\t" %}
10939   opcode(0xD9, 0xFE);
10940   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10941   ins_pipe( pipe_slow );
10942 %}
10943 
10944 instruct tanD_reg(regD dst) %{
10945   match(Set dst (TanD dst));
10946 
10947   format %{ "dtan   $dst\n\t" %}
10948   ins_encode( Push_SrcXD(dst),
10949               Opcode(0xD9), Opcode(0xF2),   //fptan
10950               Opcode(0xDD), Opcode(0xD8),   //fstp st
10951               Push_ResultXD(dst) );
10952   ins_pipe( pipe_slow );
10953 %}
10954 
10955 instruct log10D_reg(regD dst) %{
10956   // The source and result Double operands in XMM registers
10957   match(Set dst (Log10D dst));
10958   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10959   // fyl2x        ; compute log_10(2) * log_2(x)
10960   format %{ "fldlg2\t\t\t#Log10\n\t"
10961             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10962          %}
10963    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10964               Push_SrcXD(dst),
10965               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10966               Push_ResultXD(dst));
10967 
10968   ins_pipe( pipe_slow );
10969 %}
10970 
10971 instruct logD_reg(regD dst) %{
10972   // The source and result Double operands in XMM registers
10973   match(Set dst (LogD dst));
10974   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10975   // fyl2x        ; compute log_e(2) * log_2(x)
10976   format %{ "fldln2\t\t\t#Log_e\n\t"
10977             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10978          %}
10979   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10980               Push_SrcXD(dst),
10981               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10982               Push_ResultXD(dst));
10983   ins_pipe( pipe_slow );
10984 %}
10985 
10986 
10987 
10988 //----------Arithmetic Conversion Instructions---------------------------------
10989 
10990 instruct roundFloat_nop(regF dst)
10991 %{
10992   match(Set dst (RoundFloat dst));
10993 
10994   ins_cost(0);
10995   ins_encode();
10996   ins_pipe(empty);
10997 %}
10998 
10999 instruct roundDouble_nop(regD dst)
11000 %{
11001   match(Set dst (RoundDouble dst));
11002 
11003   ins_cost(0);
11004   ins_encode();
11005   ins_pipe(empty);
11006 %}
11007 
11008 instruct convF2D_reg_reg(regD dst, regF src)
11009 %{
11010   match(Set dst (ConvF2D src));
11011 
11012   format %{ "cvtss2sd $dst, $src" %}
11013   opcode(0xF3, 0x0F, 0x5A);
11014   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11015   ins_pipe(pipe_slow); // XXX
11016 %}
11017 
11018 instruct convF2D_reg_mem(regD dst, memory src)
11019 %{
11020   match(Set dst (ConvF2D (LoadF src)));
11021 
11022   format %{ "cvtss2sd $dst, $src" %}
11023   opcode(0xF3, 0x0F, 0x5A);
11024   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11025   ins_pipe(pipe_slow); // XXX
11026 %}
11027 
11028 instruct convD2F_reg_reg(regF dst, regD src)
11029 %{
11030   match(Set dst (ConvD2F src));
11031 
11032   format %{ "cvtsd2ss $dst, $src" %}
11033   opcode(0xF2, 0x0F, 0x5A);
11034   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11035   ins_pipe(pipe_slow); // XXX
11036 %}
11037 
11038 instruct convD2F_reg_mem(regF dst, memory src)
11039 %{
11040   match(Set dst (ConvD2F (LoadD src)));
11041 
11042   format %{ "cvtsd2ss $dst, $src" %}
11043   opcode(0xF2, 0x0F, 0x5A);
11044   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11045   ins_pipe(pipe_slow); // XXX
11046 %}
11047 
11048 // XXX do mem variants
11049 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11050 %{
11051   match(Set dst (ConvF2I src));
11052   effect(KILL cr);
11053 
11054   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11055             "cmpl    $dst, #0x80000000\n\t"
11056             "jne,s   done\n\t"
11057             "subq    rsp, #8\n\t"
11058             "movss   [rsp], $src\n\t"
11059             "call    f2i_fixup\n\t"
11060             "popq    $dst\n"
11061     "done:   "%}
11062   opcode(0xF3, 0x0F, 0x2C);
11063   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11064              f2i_fixup(dst, src));
11065   ins_pipe(pipe_slow);
11066 %}
11067 
11068 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11069 %{
11070   match(Set dst (ConvF2L src));
11071   effect(KILL cr);
11072 
11073   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11074             "cmpq    $dst, [0x8000000000000000]\n\t"
11075             "jne,s   done\n\t"
11076             "subq    rsp, #8\n\t"
11077             "movss   [rsp], $src\n\t"
11078             "call    f2l_fixup\n\t"
11079             "popq    $dst\n"
11080     "done:   "%}
11081   opcode(0xF3, 0x0F, 0x2C);
11082   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11083              f2l_fixup(dst, src));
11084   ins_pipe(pipe_slow);
11085 %}
11086 
11087 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11088 %{
11089   match(Set dst (ConvD2I src));
11090   effect(KILL cr);
11091 
11092   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11093             "cmpl    $dst, #0x80000000\n\t"
11094             "jne,s   done\n\t"
11095             "subq    rsp, #8\n\t"
11096             "movsd   [rsp], $src\n\t"
11097             "call    d2i_fixup\n\t"
11098             "popq    $dst\n"
11099     "done:   "%}
11100   opcode(0xF2, 0x0F, 0x2C);
11101   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11102              d2i_fixup(dst, src));
11103   ins_pipe(pipe_slow);
11104 %}
11105 
11106 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11107 %{
11108   match(Set dst (ConvD2L src));
11109   effect(KILL cr);
11110 
11111   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11112             "cmpq    $dst, [0x8000000000000000]\n\t"
11113             "jne,s   done\n\t"
11114             "subq    rsp, #8\n\t"
11115             "movsd   [rsp], $src\n\t"
11116             "call    d2l_fixup\n\t"
11117             "popq    $dst\n"
11118     "done:   "%}
11119   opcode(0xF2, 0x0F, 0x2C);
11120   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11121              d2l_fixup(dst, src));
11122   ins_pipe(pipe_slow);
11123 %}
11124 
11125 instruct convI2F_reg_reg(regF dst, rRegI src)
11126 %{
11127   predicate(!UseXmmI2F);
11128   match(Set dst (ConvI2F src));
11129 
11130   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11131   opcode(0xF3, 0x0F, 0x2A);
11132   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11133   ins_pipe(pipe_slow); // XXX
11134 %}
11135 
11136 instruct convI2F_reg_mem(regF dst, memory src)
11137 %{
11138   match(Set dst (ConvI2F (LoadI src)));
11139 
11140   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11141   opcode(0xF3, 0x0F, 0x2A);
11142   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11143   ins_pipe(pipe_slow); // XXX
11144 %}
11145 
11146 instruct convI2D_reg_reg(regD dst, rRegI src)
11147 %{
11148   predicate(!UseXmmI2D);
11149   match(Set dst (ConvI2D src));
11150 
11151   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11152   opcode(0xF2, 0x0F, 0x2A);
11153   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11154   ins_pipe(pipe_slow); // XXX
11155 %}
11156 
11157 instruct convI2D_reg_mem(regD dst, memory src)
11158 %{
11159   match(Set dst (ConvI2D (LoadI src)));
11160 
11161   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11162   opcode(0xF2, 0x0F, 0x2A);
11163   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11164   ins_pipe(pipe_slow); // XXX
11165 %}
11166 
11167 instruct convXI2F_reg(regF dst, rRegI src)
11168 %{
11169   predicate(UseXmmI2F);
11170   match(Set dst (ConvI2F src));
11171 
11172   format %{ "movdl $dst, $src\n\t"
11173             "cvtdq2psl $dst, $dst\t# i2f" %}
11174   ins_encode %{
11175     __ movdl($dst$$XMMRegister, $src$$Register);
11176     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11177   %}
11178   ins_pipe(pipe_slow); // XXX
11179 %}
11180 
11181 instruct convXI2D_reg(regD dst, rRegI src)
11182 %{
11183   predicate(UseXmmI2D);
11184   match(Set dst (ConvI2D src));
11185 
11186   format %{ "movdl $dst, $src\n\t"
11187             "cvtdq2pdl $dst, $dst\t# i2d" %}
11188   ins_encode %{
11189     __ movdl($dst$$XMMRegister, $src$$Register);
11190     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11191   %}
11192   ins_pipe(pipe_slow); // XXX
11193 %}
11194 
11195 instruct convL2F_reg_reg(regF dst, rRegL src)
11196 %{
11197   match(Set dst (ConvL2F src));
11198 
11199   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11200   opcode(0xF3, 0x0F, 0x2A);
11201   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11202   ins_pipe(pipe_slow); // XXX
11203 %}
11204 
11205 instruct convL2F_reg_mem(regF dst, memory src)
11206 %{
11207   match(Set dst (ConvL2F (LoadL src)));
11208 
11209   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11210   opcode(0xF3, 0x0F, 0x2A);
11211   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11212   ins_pipe(pipe_slow); // XXX
11213 %}
11214 
11215 instruct convL2D_reg_reg(regD dst, rRegL src)
11216 %{
11217   match(Set dst (ConvL2D src));
11218 
11219   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11220   opcode(0xF2, 0x0F, 0x2A);
11221   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11222   ins_pipe(pipe_slow); // XXX
11223 %}
11224 
11225 instruct convL2D_reg_mem(regD dst, memory src)
11226 %{
11227   match(Set dst (ConvL2D (LoadL src)));
11228 
11229   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11230   opcode(0xF2, 0x0F, 0x2A);
11231   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11232   ins_pipe(pipe_slow); // XXX
11233 %}
11234 
11235 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11236 %{
11237   match(Set dst (ConvI2L src));
11238 
11239   ins_cost(125);
11240   format %{ "movslq  $dst, $src\t# i2l" %}
11241   ins_encode %{
11242     __ movslq($dst$$Register, $src$$Register);
11243   %}
11244   ins_pipe(ialu_reg_reg);
11245 %}
11246 
11247 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11248 // %{
11249 //   match(Set dst (ConvI2L src));
11250 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11251 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11252 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11253 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11254 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11255 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11256 
11257 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11258 //   ins_encode(enc_copy(dst, src));
11259 // //   opcode(0x63); // needs REX.W
11260 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11261 //   ins_pipe(ialu_reg_reg);
11262 // %}
11263 
11264 // Zero-extend convert int to long
11265 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11266 %{
11267   match(Set dst (AndL (ConvI2L src) mask));
11268 
11269   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11270   ins_encode(enc_copy(dst, src));
11271   ins_pipe(ialu_reg_reg);
11272 %}
11273 
11274 // Zero-extend convert int to long
11275 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11276 %{
11277   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11278 
11279   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11280   opcode(0x8B);
11281   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11282   ins_pipe(ialu_reg_mem);
11283 %}
11284 
11285 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11286 %{
11287   match(Set dst (AndL src mask));
11288 
11289   format %{ "movl    $dst, $src\t# zero-extend long" %}
11290   ins_encode(enc_copy_always(dst, src));
11291   ins_pipe(ialu_reg_reg);
11292 %}
11293 
11294 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11295 %{
11296   match(Set dst (ConvL2I src));
11297 
11298   format %{ "movl    $dst, $src\t# l2i" %}
11299   ins_encode(enc_copy_always(dst, src));
11300   ins_pipe(ialu_reg_reg);
11301 %}
11302 
11303 
11304 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11305   match(Set dst (MoveF2I src));
11306   effect(DEF dst, USE src);
11307 
11308   ins_cost(125);
11309   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11310   opcode(0x8B);
11311   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11312   ins_pipe(ialu_reg_mem);
11313 %}
11314 
11315 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11316   match(Set dst (MoveI2F src));
11317   effect(DEF dst, USE src);
11318 
11319   ins_cost(125);
11320   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11321   opcode(0xF3, 0x0F, 0x10);
11322   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11323   ins_pipe(pipe_slow);
11324 %}
11325 
11326 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11327   match(Set dst (MoveD2L src));
11328   effect(DEF dst, USE src);
11329 
11330   ins_cost(125);
11331   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11332   opcode(0x8B);
11333   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11334   ins_pipe(ialu_reg_mem);
11335 %}
11336 
11337 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11338   predicate(!UseXmmLoadAndClearUpper);
11339   match(Set dst (MoveL2D src));
11340   effect(DEF dst, USE src);
11341 
11342   ins_cost(125);
11343   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11344   opcode(0x66, 0x0F, 0x12);
11345   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11346   ins_pipe(pipe_slow);
11347 %}
11348 
11349 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11350   predicate(UseXmmLoadAndClearUpper);
11351   match(Set dst (MoveL2D src));
11352   effect(DEF dst, USE src);
11353 
11354   ins_cost(125);
11355   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11356   opcode(0xF2, 0x0F, 0x10);
11357   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11358   ins_pipe(pipe_slow);
11359 %}
11360 
11361 
11362 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11363   match(Set dst (MoveF2I src));
11364   effect(DEF dst, USE src);
11365 
11366   ins_cost(95); // XXX
11367   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11368   opcode(0xF3, 0x0F, 0x11);
11369   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11370   ins_pipe(pipe_slow);
11371 %}
11372 
11373 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11374   match(Set dst (MoveI2F src));
11375   effect(DEF dst, USE src);
11376 
11377   ins_cost(100);
11378   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11379   opcode(0x89);
11380   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11381   ins_pipe( ialu_mem_reg );
11382 %}
11383 
11384 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11385   match(Set dst (MoveD2L src));
11386   effect(DEF dst, USE src);
11387 
11388   ins_cost(95); // XXX
11389   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11390   opcode(0xF2, 0x0F, 0x11);
11391   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11392   ins_pipe(pipe_slow);
11393 %}
11394 
11395 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11396   match(Set dst (MoveL2D src));
11397   effect(DEF dst, USE src);
11398 
11399   ins_cost(100);
11400   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11401   opcode(0x89);
11402   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11403   ins_pipe(ialu_mem_reg);
11404 %}
11405 
11406 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11407   match(Set dst (MoveF2I src));
11408   effect(DEF dst, USE src);
11409   ins_cost(85);
11410   format %{ "movd    $dst,$src\t# MoveF2I" %}
11411   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11412   ins_pipe( pipe_slow );
11413 %}
11414 
11415 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11416   match(Set dst (MoveD2L src));
11417   effect(DEF dst, USE src);
11418   ins_cost(85);
11419   format %{ "movd    $dst,$src\t# MoveD2L" %}
11420   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11421   ins_pipe( pipe_slow );
11422 %}
11423 
11424 // The next instructions have long latency and use Int unit. Set high cost.
11425 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11426   match(Set dst (MoveI2F src));
11427   effect(DEF dst, USE src);
11428   ins_cost(300);
11429   format %{ "movd    $dst,$src\t# MoveI2F" %}
11430   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11431   ins_pipe( pipe_slow );
11432 %}
11433 
11434 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11435   match(Set dst (MoveL2D src));
11436   effect(DEF dst, USE src);
11437   ins_cost(300);
11438   format %{ "movd    $dst,$src\t# MoveL2D" %}
11439   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11440   ins_pipe( pipe_slow );
11441 %}
11442 
11443 // Replicate scalar to packed byte (1 byte) values in xmm
11444 instruct Repl8B_reg(regD dst, regD src) %{
11445   match(Set dst (Replicate8B src));
11446   format %{ "MOVDQA  $dst,$src\n\t"
11447             "PUNPCKLBW $dst,$dst\n\t"
11448             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11449   ins_encode( pshufd_8x8(dst, src));
11450   ins_pipe( pipe_slow );
11451 %}
11452 
11453 // Replicate scalar to packed byte (1 byte) values in xmm
11454 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11455   match(Set dst (Replicate8B src));
11456   format %{ "MOVD    $dst,$src\n\t"
11457             "PUNPCKLBW $dst,$dst\n\t"
11458             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11459   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11460   ins_pipe( pipe_slow );
11461 %}
11462 
11463 // Replicate scalar zero to packed byte (1 byte) values in xmm
11464 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11465   match(Set dst (Replicate8B zero));
11466   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11467   ins_encode( pxor(dst, dst));
11468   ins_pipe( fpu_reg_reg );
11469 %}
11470 
11471 // Replicate scalar to packed shore (2 byte) values in xmm
11472 instruct Repl4S_reg(regD dst, regD src) %{
11473   match(Set dst (Replicate4S src));
11474   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11475   ins_encode( pshufd_4x16(dst, src));
11476   ins_pipe( fpu_reg_reg );
11477 %}
11478 
11479 // Replicate scalar to packed shore (2 byte) values in xmm
11480 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11481   match(Set dst (Replicate4S src));
11482   format %{ "MOVD    $dst,$src\n\t"
11483             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11484   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11485   ins_pipe( fpu_reg_reg );
11486 %}
11487 
11488 // Replicate scalar zero to packed short (2 byte) values in xmm
11489 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11490   match(Set dst (Replicate4S zero));
11491   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11492   ins_encode( pxor(dst, dst));
11493   ins_pipe( fpu_reg_reg );
11494 %}
11495 
11496 // Replicate scalar to packed char (2 byte) values in xmm
11497 instruct Repl4C_reg(regD dst, regD src) %{
11498   match(Set dst (Replicate4C src));
11499   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11500   ins_encode( pshufd_4x16(dst, src));
11501   ins_pipe( fpu_reg_reg );
11502 %}
11503 
11504 // Replicate scalar to packed char (2 byte) values in xmm
11505 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11506   match(Set dst (Replicate4C src));
11507   format %{ "MOVD    $dst,$src\n\t"
11508             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11509   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11510   ins_pipe( fpu_reg_reg );
11511 %}
11512 
11513 // Replicate scalar zero to packed char (2 byte) values in xmm
11514 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11515   match(Set dst (Replicate4C zero));
11516   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11517   ins_encode( pxor(dst, dst));
11518   ins_pipe( fpu_reg_reg );
11519 %}
11520 
11521 // Replicate scalar to packed integer (4 byte) values in xmm
11522 instruct Repl2I_reg(regD dst, regD src) %{
11523   match(Set dst (Replicate2I src));
11524   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11525   ins_encode( pshufd(dst, src, 0x00));
11526   ins_pipe( fpu_reg_reg );
11527 %}
11528 
11529 // Replicate scalar to packed integer (4 byte) values in xmm
11530 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11531   match(Set dst (Replicate2I src));
11532   format %{ "MOVD   $dst,$src\n\t"
11533             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11534   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11535   ins_pipe( fpu_reg_reg );
11536 %}
11537 
11538 // Replicate scalar zero to packed integer (2 byte) values in xmm
11539 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11540   match(Set dst (Replicate2I zero));
11541   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11542   ins_encode( pxor(dst, dst));
11543   ins_pipe( fpu_reg_reg );
11544 %}
11545 
11546 // Replicate scalar to packed single precision floating point values in xmm
11547 instruct Repl2F_reg(regD dst, regD src) %{
11548   match(Set dst (Replicate2F src));
11549   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11550   ins_encode( pshufd(dst, src, 0xe0));
11551   ins_pipe( fpu_reg_reg );
11552 %}
11553 
11554 // Replicate scalar to packed single precision floating point values in xmm
11555 instruct Repl2F_regF(regD dst, regF src) %{
11556   match(Set dst (Replicate2F src));
11557   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11558   ins_encode( pshufd(dst, src, 0xe0));
11559   ins_pipe( fpu_reg_reg );
11560 %}
11561 
11562 // Replicate scalar to packed single precision floating point values in xmm
11563 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11564   match(Set dst (Replicate2F zero));
11565   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11566   ins_encode( pxor(dst, dst));
11567   ins_pipe( fpu_reg_reg );
11568 %}
11569 
11570 
11571 // =======================================================================
11572 // fast clearing of an array
11573 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11574                   rFlagsReg cr)
11575 %{
11576   match(Set dummy (ClearArray cnt base));
11577   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11578 
11579   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11580             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11581   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11582              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11583   ins_pipe(pipe_slow);
11584 %}
11585 
11586 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11587                         rax_RegI result, regD tmp1, rFlagsReg cr)
11588 %{
11589   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11590   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11591 
11592   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11593   ins_encode %{
11594     __ string_compare($str1$$Register, $str2$$Register,
11595                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11596                       $tmp1$$XMMRegister);
11597   %}
11598   ins_pipe( pipe_slow );
11599 %}
11600 
11601 // fast search of substring with known size.
11602 instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11603                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11604 %{
11605   predicate(UseSSE42Intrinsics);
11606   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11607   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11608 
11609   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11610   ins_encode %{
11611     int icnt2 = (int)$int_cnt2$$constant;
11612     if (icnt2 >= 8) {
11613       // IndexOf for constant substrings with size >= 8 elements
11614       // which don't need to be loaded through stack.
11615       __ string_indexofC8($str1$$Register, $str2$$Register,
11616                           $cnt1$$Register, $cnt2$$Register,
11617                           icnt2, $result$$Register,
11618                           $vec$$XMMRegister, $tmp$$Register);
11619     } else {
11620       // Small strings are loaded through stack if they cross page boundary.
11621       __ string_indexof($str1$$Register, $str2$$Register,
11622                         $cnt1$$Register, $cnt2$$Register,
11623                         icnt2, $result$$Register,
11624                         $vec$$XMMRegister, $tmp$$Register);
11625     }
11626   %}
11627   ins_pipe( pipe_slow );
11628 %}
11629 
11630 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11631                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11632 %{
11633   predicate(UseSSE42Intrinsics);
11634   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11635   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11636 
11637   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11638   ins_encode %{
11639     __ string_indexof($str1$$Register, $str2$$Register,
11640                       $cnt1$$Register, $cnt2$$Register,
11641                       (-1), $result$$Register,
11642                       $vec$$XMMRegister, $tmp$$Register);
11643   %}
11644   ins_pipe( pipe_slow );
11645 %}
11646 
11647 // fast string equals
11648 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11649                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11650 %{
11651   match(Set result (StrEquals (Binary str1 str2) cnt));
11652   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11653 
11654   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11655   ins_encode %{
11656     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11657                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11658                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11659   %}
11660   ins_pipe( pipe_slow );
11661 %}
11662 
11663 // fast array equals
11664 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11665                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11666 %{
11667   match(Set result (AryEq ary1 ary2));
11668   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11669   //ins_cost(300);
11670 
11671   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11672   ins_encode %{
11673     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11674                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11675                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11676   %}
11677   ins_pipe( pipe_slow );
11678 %}
11679 
11680 //----------Control Flow Instructions------------------------------------------
11681 // Signed compare Instructions
11682 
11683 // XXX more variants!!
11684 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11685 %{
11686   match(Set cr (CmpI op1 op2));
11687   effect(DEF cr, USE op1, USE op2);
11688 
11689   format %{ "cmpl    $op1, $op2" %}
11690   opcode(0x3B);  /* Opcode 3B /r */
11691   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11692   ins_pipe(ialu_cr_reg_reg);
11693 %}
11694 
11695 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11696 %{
11697   match(Set cr (CmpI op1 op2));
11698 
11699   format %{ "cmpl    $op1, $op2" %}
11700   opcode(0x81, 0x07); /* Opcode 81 /7 */
11701   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11702   ins_pipe(ialu_cr_reg_imm);
11703 %}
11704 
11705 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11706 %{
11707   match(Set cr (CmpI op1 (LoadI op2)));
11708 
11709   ins_cost(500); // XXX
11710   format %{ "cmpl    $op1, $op2" %}
11711   opcode(0x3B); /* Opcode 3B /r */
11712   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11713   ins_pipe(ialu_cr_reg_mem);
11714 %}
11715 
11716 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11717 %{
11718   match(Set cr (CmpI src zero));
11719 
11720   format %{ "testl   $src, $src" %}
11721   opcode(0x85);
11722   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11723   ins_pipe(ialu_cr_reg_imm);
11724 %}
11725 
11726 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11727 %{
11728   match(Set cr (CmpI (AndI src con) zero));
11729 
11730   format %{ "testl   $src, $con" %}
11731   opcode(0xF7, 0x00);
11732   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11733   ins_pipe(ialu_cr_reg_imm);
11734 %}
11735 
11736 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11737 %{
11738   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11739 
11740   format %{ "testl   $src, $mem" %}
11741   opcode(0x85);
11742   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11743   ins_pipe(ialu_cr_reg_mem);
11744 %}
11745 
11746 // Unsigned compare Instructions; really, same as signed except they
11747 // produce an rFlagsRegU instead of rFlagsReg.
11748 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11749 %{
11750   match(Set cr (CmpU op1 op2));
11751 
11752   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11753   opcode(0x3B); /* Opcode 3B /r */
11754   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11755   ins_pipe(ialu_cr_reg_reg);
11756 %}
11757 
11758 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11759 %{
11760   match(Set cr (CmpU op1 op2));
11761 
11762   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11763   opcode(0x81,0x07); /* Opcode 81 /7 */
11764   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11765   ins_pipe(ialu_cr_reg_imm);
11766 %}
11767 
11768 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11769 %{
11770   match(Set cr (CmpU op1 (LoadI op2)));
11771 
11772   ins_cost(500); // XXX
11773   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11774   opcode(0x3B); /* Opcode 3B /r */
11775   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11776   ins_pipe(ialu_cr_reg_mem);
11777 %}
11778 
11779 // // // Cisc-spilled version of cmpU_rReg
11780 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11781 // //%{
11782 // //  match(Set cr (CmpU (LoadI op1) op2));
11783 // //
11784 // //  format %{ "CMPu   $op1,$op2" %}
11785 // //  ins_cost(500);
11786 // //  opcode(0x39);  /* Opcode 39 /r */
11787 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11788 // //%}
11789 
11790 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11791 %{
11792   match(Set cr (CmpU src zero));
11793 
11794   format %{ "testl  $src, $src\t# unsigned" %}
11795   opcode(0x85);
11796   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11797   ins_pipe(ialu_cr_reg_imm);
11798 %}
11799 
11800 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11801 %{
11802   match(Set cr (CmpP op1 op2));
11803 
11804   format %{ "cmpq    $op1, $op2\t# ptr" %}
11805   opcode(0x3B); /* Opcode 3B /r */
11806   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11807   ins_pipe(ialu_cr_reg_reg);
11808 %}
11809 
11810 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11811 %{
11812   match(Set cr (CmpP op1 (LoadP op2)));
11813 
11814   ins_cost(500); // XXX
11815   format %{ "cmpq    $op1, $op2\t# ptr" %}
11816   opcode(0x3B); /* Opcode 3B /r */
11817   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11818   ins_pipe(ialu_cr_reg_mem);
11819 %}
11820 
11821 // // // Cisc-spilled version of cmpP_rReg
11822 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11823 // //%{
11824 // //  match(Set cr (CmpP (LoadP op1) op2));
11825 // //
11826 // //  format %{ "CMPu   $op1,$op2" %}
11827 // //  ins_cost(500);
11828 // //  opcode(0x39);  /* Opcode 39 /r */
11829 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11830 // //%}
11831 
11832 // XXX this is generalized by compP_rReg_mem???
11833 // Compare raw pointer (used in out-of-heap check).
11834 // Only works because non-oop pointers must be raw pointers
11835 // and raw pointers have no anti-dependencies.
11836 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11837 %{
11838   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11839   match(Set cr (CmpP op1 (LoadP op2)));
11840 
11841   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11842   opcode(0x3B); /* Opcode 3B /r */
11843   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11844   ins_pipe(ialu_cr_reg_mem);
11845 %}
11846 
11847 // This will generate a signed flags result. This should be OK since
11848 // any compare to a zero should be eq/neq.
11849 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11850 %{
11851   match(Set cr (CmpP src zero));
11852 
11853   format %{ "testq   $src, $src\t# ptr" %}
11854   opcode(0x85);
11855   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11856   ins_pipe(ialu_cr_reg_imm);
11857 %}
11858 
11859 // This will generate a signed flags result. This should be OK since
11860 // any compare to a zero should be eq/neq.
11861 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11862 %{
11863   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11864   match(Set cr (CmpP (LoadP op) zero));
11865 
11866   ins_cost(500); // XXX
11867   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11868   opcode(0xF7); /* Opcode F7 /0 */
11869   ins_encode(REX_mem_wide(op),
11870              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11871   ins_pipe(ialu_cr_reg_imm);
11872 %}
11873 
11874 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11875 %{
11876   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11877   match(Set cr (CmpP (LoadP mem) zero));
11878 
11879   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11880   ins_encode %{
11881     __ cmpq(r12, $mem$$Address);
11882   %}
11883   ins_pipe(ialu_cr_reg_mem);
11884 %}
11885 
11886 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11887 %{
11888   match(Set cr (CmpN op1 op2));
11889 
11890   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11891   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11892   ins_pipe(ialu_cr_reg_reg);
11893 %}
11894 
11895 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11896 %{
11897   match(Set cr (CmpN src (LoadN mem)));
11898 
11899   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11900   ins_encode %{
11901     __ cmpl($src$$Register, $mem$$Address);
11902   %}
11903   ins_pipe(ialu_cr_reg_mem);
11904 %}
11905 
11906 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11907   match(Set cr (CmpN op1 op2));
11908 
11909   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11910   ins_encode %{
11911     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11912   %}
11913   ins_pipe(ialu_cr_reg_imm);
11914 %}
11915 
11916 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11917 %{
11918   match(Set cr (CmpN src (LoadN mem)));
11919 
11920   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11921   ins_encode %{
11922     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11923   %}
11924   ins_pipe(ialu_cr_reg_mem);
11925 %}
11926 
11927 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11928   match(Set cr (CmpN src zero));
11929 
11930   format %{ "testl   $src, $src\t# compressed ptr" %}
11931   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11932   ins_pipe(ialu_cr_reg_imm);
11933 %}
11934 
11935 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11936 %{
11937   predicate(Universe::narrow_oop_base() != NULL);
11938   match(Set cr (CmpN (LoadN mem) zero));
11939 
11940   ins_cost(500); // XXX
11941   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11942   ins_encode %{
11943     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11944   %}
11945   ins_pipe(ialu_cr_reg_mem);
11946 %}
11947 
11948 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11949 %{
11950   predicate(Universe::narrow_oop_base() == NULL);
11951   match(Set cr (CmpN (LoadN mem) zero));
11952 
11953   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11954   ins_encode %{
11955     __ cmpl(r12, $mem$$Address);
11956   %}
11957   ins_pipe(ialu_cr_reg_mem);
11958 %}
11959 
11960 // Yanked all unsigned pointer compare operations.
11961 // Pointer compares are done with CmpP which is already unsigned.
11962 
11963 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11964 %{
11965   match(Set cr (CmpL op1 op2));
11966 
11967   format %{ "cmpq    $op1, $op2" %}
11968   opcode(0x3B);  /* Opcode 3B /r */
11969   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11970   ins_pipe(ialu_cr_reg_reg);
11971 %}
11972 
11973 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11974 %{
11975   match(Set cr (CmpL op1 op2));
11976 
11977   format %{ "cmpq    $op1, $op2" %}
11978   opcode(0x81, 0x07); /* Opcode 81 /7 */
11979   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11980   ins_pipe(ialu_cr_reg_imm);
11981 %}
11982 
11983 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11984 %{
11985   match(Set cr (CmpL op1 (LoadL op2)));
11986 
11987   format %{ "cmpq    $op1, $op2" %}
11988   opcode(0x3B); /* Opcode 3B /r */
11989   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11990   ins_pipe(ialu_cr_reg_mem);
11991 %}
11992 
11993 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11994 %{
11995   match(Set cr (CmpL src zero));
11996 
11997   format %{ "testq   $src, $src" %}
11998   opcode(0x85);
11999   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12000   ins_pipe(ialu_cr_reg_imm);
12001 %}
12002 
12003 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12004 %{
12005   match(Set cr (CmpL (AndL src con) zero));
12006 
12007   format %{ "testq   $src, $con\t# long" %}
12008   opcode(0xF7, 0x00);
12009   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
12010   ins_pipe(ialu_cr_reg_imm);
12011 %}
12012 
12013 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12014 %{
12015   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12016 
12017   format %{ "testq   $src, $mem" %}
12018   opcode(0x85);
12019   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12020   ins_pipe(ialu_cr_reg_mem);
12021 %}
12022 
12023 // Manifest a CmpL result in an integer register.  Very painful.
12024 // This is the test to avoid.
12025 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12026 %{
12027   match(Set dst (CmpL3 src1 src2));
12028   effect(KILL flags);
12029 
12030   ins_cost(275); // XXX
12031   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12032             "movl    $dst, -1\n\t"
12033             "jl,s    done\n\t"
12034             "setne   $dst\n\t"
12035             "movzbl  $dst, $dst\n\t"
12036     "done:" %}
12037   ins_encode(cmpl3_flag(src1, src2, dst));
12038   ins_pipe(pipe_slow);
12039 %}
12040 
12041 //----------Max and Min--------------------------------------------------------
12042 // Min Instructions
12043 
12044 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12045 %{
12046   effect(USE_DEF dst, USE src, USE cr);
12047 
12048   format %{ "cmovlgt $dst, $src\t# min" %}
12049   opcode(0x0F, 0x4F);
12050   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12051   ins_pipe(pipe_cmov_reg);
12052 %}
12053 
12054 
12055 instruct minI_rReg(rRegI dst, rRegI src)
12056 %{
12057   match(Set dst (MinI dst src));
12058 
12059   ins_cost(200);
12060   expand %{
12061     rFlagsReg cr;
12062     compI_rReg(cr, dst, src);
12063     cmovI_reg_g(dst, src, cr);
12064   %}
12065 %}
12066 
12067 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12068 %{
12069   effect(USE_DEF dst, USE src, USE cr);
12070 
12071   format %{ "cmovllt $dst, $src\t# max" %}
12072   opcode(0x0F, 0x4C);
12073   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12074   ins_pipe(pipe_cmov_reg);
12075 %}
12076 
12077 
12078 instruct maxI_rReg(rRegI dst, rRegI src)
12079 %{
12080   match(Set dst (MaxI dst src));
12081 
12082   ins_cost(200);
12083   expand %{
12084     rFlagsReg cr;
12085     compI_rReg(cr, dst, src);
12086     cmovI_reg_l(dst, src, cr);
12087   %}
12088 %}
12089 
12090 // ============================================================================
12091 // Branch Instructions
12092 
12093 // Jump Direct - Label defines a relative address from JMP+1
12094 instruct jmpDir(label labl)
12095 %{
12096   match(Goto);
12097   effect(USE labl);
12098 
12099   ins_cost(300);
12100   format %{ "jmp     $labl" %}
12101   size(5);
12102   opcode(0xE9);
12103   ins_encode(OpcP, Lbl(labl));
12104   ins_pipe(pipe_jmp);
12105   ins_pc_relative(1);
12106 %}
12107 
12108 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12109 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12110 %{
12111   match(If cop cr);
12112   effect(USE labl);
12113 
12114   ins_cost(300);
12115   format %{ "j$cop     $labl" %}
12116   size(6);
12117   opcode(0x0F, 0x80);
12118   ins_encode(Jcc(cop, labl));
12119   ins_pipe(pipe_jcc);
12120   ins_pc_relative(1);
12121 %}
12122 
12123 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12124 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12125 %{
12126   match(CountedLoopEnd cop cr);
12127   effect(USE labl);
12128 
12129   ins_cost(300);
12130   format %{ "j$cop     $labl\t# loop end" %}
12131   size(6);
12132   opcode(0x0F, 0x80);
12133   ins_encode(Jcc(cop, labl));
12134   ins_pipe(pipe_jcc);
12135   ins_pc_relative(1);
12136 %}
12137 
12138 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12139 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12140   match(CountedLoopEnd cop cmp);
12141   effect(USE labl);
12142 
12143   ins_cost(300);
12144   format %{ "j$cop,u   $labl\t# loop end" %}
12145   size(6);
12146   opcode(0x0F, 0x80);
12147   ins_encode(Jcc(cop, labl));
12148   ins_pipe(pipe_jcc);
12149   ins_pc_relative(1);
12150 %}
12151 
12152 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12153   match(CountedLoopEnd cop cmp);
12154   effect(USE labl);
12155 
12156   ins_cost(200);
12157   format %{ "j$cop,u   $labl\t# loop end" %}
12158   size(6);
12159   opcode(0x0F, 0x80);
12160   ins_encode(Jcc(cop, labl));
12161   ins_pipe(pipe_jcc);
12162   ins_pc_relative(1);
12163 %}
12164 
12165 // Jump Direct Conditional - using unsigned comparison
12166 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12167   match(If cop cmp);
12168   effect(USE labl);
12169 
12170   ins_cost(300);
12171   format %{ "j$cop,u  $labl" %}
12172   size(6);
12173   opcode(0x0F, 0x80);
12174   ins_encode(Jcc(cop, labl));
12175   ins_pipe(pipe_jcc);
12176   ins_pc_relative(1);
12177 %}
12178 
12179 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12180   match(If cop cmp);
12181   effect(USE labl);
12182 
12183   ins_cost(200);
12184   format %{ "j$cop,u  $labl" %}
12185   size(6);
12186   opcode(0x0F, 0x80);
12187   ins_encode(Jcc(cop, labl));
12188   ins_pipe(pipe_jcc);
12189   ins_pc_relative(1);
12190 %}
12191 
12192 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12193   match(If cop cmp);
12194   effect(USE labl);
12195 
12196   ins_cost(200);
12197   format %{ $$template
12198     if ($cop$$cmpcode == Assembler::notEqual) {
12199       $$emit$$"jp,u   $labl\n\t"
12200       $$emit$$"j$cop,u   $labl"
12201     } else {
12202       $$emit$$"jp,u   done\n\t"
12203       $$emit$$"j$cop,u   $labl\n\t"
12204       $$emit$$"done:"
12205     }
12206   %}
12207   size(12);
12208   opcode(0x0F, 0x80);
12209   ins_encode %{
12210     Label* l = $labl$$label;
12211     $$$emit8$primary;
12212     emit_cc(cbuf, $secondary, Assembler::parity);
12213     int parity_disp = -1;
12214     if ($cop$$cmpcode == Assembler::notEqual) {
12215        // the two jumps 6 bytes apart so the jump distances are too
12216        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12217     } else if ($cop$$cmpcode == Assembler::equal) {
12218        parity_disp = 6;
12219     } else {
12220        ShouldNotReachHere();
12221     }
12222     emit_d32(cbuf, parity_disp);
12223     $$$emit8$primary;
12224     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12225     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12226     emit_d32(cbuf, disp);
12227   %}
12228   ins_pipe(pipe_jcc);
12229   ins_pc_relative(1);
12230 %}
12231 
12232 // ============================================================================
12233 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12234 // superklass array for an instance of the superklass.  Set a hidden
12235 // internal cache on a hit (cache is checked with exposed code in
12236 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12237 // encoding ALSO sets flags.
12238 
12239 instruct partialSubtypeCheck(rdi_RegP result,
12240                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12241                              rFlagsReg cr)
12242 %{
12243   match(Set result (PartialSubtypeCheck sub super));
12244   effect(KILL rcx, KILL cr);
12245 
12246   ins_cost(1100);  // slightly larger than the next version
12247   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12248             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12249             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12250             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12251             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12252             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12253             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12254     "miss:\t" %}
12255 
12256   opcode(0x1); // Force a XOR of RDI
12257   ins_encode(enc_PartialSubtypeCheck());
12258   ins_pipe(pipe_slow);
12259 %}
12260 
12261 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12262                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12263                                      immP0 zero,
12264                                      rdi_RegP result)
12265 %{
12266   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12267   effect(KILL rcx, KILL result);
12268 
12269   ins_cost(1000);
12270   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12271             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12272             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12273             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12274             "jne,s   miss\t\t# Missed: flags nz\n\t"
12275             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12276     "miss:\t" %}
12277 
12278   opcode(0x0); // No need to XOR RDI
12279   ins_encode(enc_PartialSubtypeCheck());
12280   ins_pipe(pipe_slow);
12281 %}
12282 
12283 // ============================================================================
12284 // Branch Instructions -- short offset versions
12285 //
12286 // These instructions are used to replace jumps of a long offset (the default
12287 // match) with jumps of a shorter offset.  These instructions are all tagged
12288 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12289 // match rules in general matching.  Instead, the ADLC generates a conversion
12290 // method in the MachNode which can be used to do in-place replacement of the
12291 // long variant with the shorter variant.  The compiler will determine if a
12292 // branch can be taken by the is_short_branch_offset() predicate in the machine
12293 // specific code section of the file.
12294 
12295 // Jump Direct - Label defines a relative address from JMP+1
12296 instruct jmpDir_short(label labl) %{
12297   match(Goto);
12298   effect(USE labl);
12299 
12300   ins_cost(300);
12301   format %{ "jmp,s   $labl" %}
12302   size(2);
12303   opcode(0xEB);
12304   ins_encode(OpcP, LblShort(labl));
12305   ins_pipe(pipe_jmp);
12306   ins_pc_relative(1);
12307   ins_short_branch(1);
12308 %}
12309 
12310 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12311 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12312   match(If cop cr);
12313   effect(USE labl);
12314 
12315   ins_cost(300);
12316   format %{ "j$cop,s   $labl" %}
12317   size(2);
12318   opcode(0x70);
12319   ins_encode(JccShort(cop, labl));
12320   ins_pipe(pipe_jcc);
12321   ins_pc_relative(1);
12322   ins_short_branch(1);
12323 %}
12324 
12325 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12326 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12327   match(CountedLoopEnd cop cr);
12328   effect(USE labl);
12329 
12330   ins_cost(300);
12331   format %{ "j$cop,s   $labl\t# loop end" %}
12332   size(2);
12333   opcode(0x70);
12334   ins_encode(JccShort(cop, labl));
12335   ins_pipe(pipe_jcc);
12336   ins_pc_relative(1);
12337   ins_short_branch(1);
12338 %}
12339 
12340 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12341 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12342   match(CountedLoopEnd cop cmp);
12343   effect(USE labl);
12344 
12345   ins_cost(300);
12346   format %{ "j$cop,us  $labl\t# loop end" %}
12347   size(2);
12348   opcode(0x70);
12349   ins_encode(JccShort(cop, labl));
12350   ins_pipe(pipe_jcc);
12351   ins_pc_relative(1);
12352   ins_short_branch(1);
12353 %}
12354 
12355 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12356   match(CountedLoopEnd cop cmp);
12357   effect(USE labl);
12358 
12359   ins_cost(300);
12360   format %{ "j$cop,us  $labl\t# loop end" %}
12361   size(2);
12362   opcode(0x70);
12363   ins_encode(JccShort(cop, labl));
12364   ins_pipe(pipe_jcc);
12365   ins_pc_relative(1);
12366   ins_short_branch(1);
12367 %}
12368 
12369 // Jump Direct Conditional - using unsigned comparison
12370 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12371   match(If cop cmp);
12372   effect(USE labl);
12373 
12374   ins_cost(300);
12375   format %{ "j$cop,us  $labl" %}
12376   size(2);
12377   opcode(0x70);
12378   ins_encode(JccShort(cop, labl));
12379   ins_pipe(pipe_jcc);
12380   ins_pc_relative(1);
12381   ins_short_branch(1);
12382 %}
12383 
12384 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12385   match(If cop cmp);
12386   effect(USE labl);
12387 
12388   ins_cost(300);
12389   format %{ "j$cop,us  $labl" %}
12390   size(2);
12391   opcode(0x70);
12392   ins_encode(JccShort(cop, labl));
12393   ins_pipe(pipe_jcc);
12394   ins_pc_relative(1);
12395   ins_short_branch(1);
12396 %}
12397 
12398 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12399   match(If cop cmp);
12400   effect(USE labl);
12401 
12402   ins_cost(300);
12403   format %{ $$template
12404     if ($cop$$cmpcode == Assembler::notEqual) {
12405       $$emit$$"jp,u,s   $labl\n\t"
12406       $$emit$$"j$cop,u,s   $labl"
12407     } else {
12408       $$emit$$"jp,u,s   done\n\t"
12409       $$emit$$"j$cop,u,s  $labl\n\t"
12410       $$emit$$"done:"
12411     }
12412   %}
12413   size(4);
12414   opcode(0x70);
12415   ins_encode %{
12416     Label* l = $labl$$label;
12417     emit_cc(cbuf, $primary, Assembler::parity);
12418     int parity_disp = -1;
12419     if ($cop$$cmpcode == Assembler::notEqual) {
12420       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12421     } else if ($cop$$cmpcode == Assembler::equal) {
12422       parity_disp = 2;
12423     } else {
12424       ShouldNotReachHere();
12425     }
12426     emit_d8(cbuf, parity_disp);
12427     emit_cc(cbuf, $primary, $cop$$cmpcode);
12428     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12429     emit_d8(cbuf, disp);
12430     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12431     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12432   %}
12433   ins_pipe(pipe_jcc);
12434   ins_pc_relative(1);
12435   ins_short_branch(1);
12436 %}
12437 
12438 // ============================================================================
12439 // inlined locking and unlocking
12440 
12441 instruct cmpFastLock(rFlagsReg cr,
12442                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12443 %{
12444   match(Set cr (FastLock object box));
12445   effect(TEMP tmp, TEMP scr);
12446 
12447   ins_cost(300);
12448   format %{ "fastlock $object,$box,$tmp,$scr" %}
12449   ins_encode(Fast_Lock(object, box, tmp, scr));
12450   ins_pipe(pipe_slow);
12451   ins_pc_relative(1);
12452 %}
12453 
12454 instruct cmpFastUnlock(rFlagsReg cr,
12455                        rRegP object, rax_RegP box, rRegP tmp)
12456 %{
12457   match(Set cr (FastUnlock object box));
12458   effect(TEMP tmp);
12459 
12460   ins_cost(300);
12461   format %{ "fastunlock $object, $box, $tmp" %}
12462   ins_encode(Fast_Unlock(object, box, tmp));
12463   ins_pipe(pipe_slow);
12464   ins_pc_relative(1);
12465 %}
12466 
12467 
12468 // ============================================================================
12469 // Safepoint Instructions
12470 instruct safePoint_poll(rFlagsReg cr)
12471 %{
12472   match(SafePoint);
12473   effect(KILL cr);
12474 
12475   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12476             "# Safepoint: poll for GC" %}
12477   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12478   ins_cost(125);
12479   ins_encode(enc_safepoint_poll);
12480   ins_pipe(ialu_reg_mem);
12481 %}
12482 
12483 // ============================================================================
12484 // Procedure Call/Return Instructions
12485 // Call Java Static Instruction
12486 // Note: If this code changes, the corresponding ret_addr_offset() and
12487 //       compute_padding() functions will have to be adjusted.
12488 instruct CallStaticJavaDirect(method meth) %{
12489   match(CallStaticJava);
12490   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12491   effect(USE meth);
12492 
12493   ins_cost(300);
12494   format %{ "call,static " %}
12495   opcode(0xE8); /* E8 cd */
12496   ins_encode(Java_Static_Call(meth), call_epilog);
12497   ins_pipe(pipe_slow);
12498   ins_pc_relative(1);
12499   ins_alignment(4);
12500 %}
12501 
12502 // Call Java Static Instruction (method handle version)
12503 // Note: If this code changes, the corresponding ret_addr_offset() and
12504 //       compute_padding() functions will have to be adjusted.
12505 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12506   match(CallStaticJava);
12507   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12508   effect(USE meth);
12509   // RBP is saved by all callees (for interpreter stack correction).
12510   // We use it here for a similar purpose, in {preserve,restore}_SP.
12511 
12512   ins_cost(300);
12513   format %{ "call,static/MethodHandle " %}
12514   opcode(0xE8); /* E8 cd */
12515   ins_encode(preserve_SP,
12516              Java_Static_Call(meth),
12517              restore_SP,
12518              call_epilog);
12519   ins_pipe(pipe_slow);
12520   ins_pc_relative(1);
12521   ins_alignment(4);
12522 %}
12523 
12524 // Call Java Dynamic Instruction
12525 // Note: If this code changes, the corresponding ret_addr_offset() and
12526 //       compute_padding() functions will have to be adjusted.
12527 instruct CallDynamicJavaDirect(method meth)
12528 %{
12529   match(CallDynamicJava);
12530   effect(USE meth);
12531 
12532   ins_cost(300);
12533   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12534             "call,dynamic " %}
12535   opcode(0xE8); /* E8 cd */
12536   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12537   ins_pipe(pipe_slow);
12538   ins_pc_relative(1);
12539   ins_alignment(4);
12540 %}
12541 
12542 // Call Runtime Instruction
12543 instruct CallRuntimeDirect(method meth)
12544 %{
12545   match(CallRuntime);
12546   effect(USE meth);
12547 
12548   ins_cost(300);
12549   format %{ "call,runtime " %}
12550   opcode(0xE8); /* E8 cd */
12551   ins_encode(Java_To_Runtime(meth));
12552   ins_pipe(pipe_slow);
12553   ins_pc_relative(1);
12554 %}
12555 
12556 // Call runtime without safepoint
12557 instruct CallLeafDirect(method meth)
12558 %{
12559   match(CallLeaf);
12560   effect(USE meth);
12561 
12562   ins_cost(300);
12563   format %{ "call_leaf,runtime " %}
12564   opcode(0xE8); /* E8 cd */
12565   ins_encode(Java_To_Runtime(meth));
12566   ins_pipe(pipe_slow);
12567   ins_pc_relative(1);
12568 %}
12569 
12570 // Call runtime without safepoint
12571 instruct CallLeafNoFPDirect(method meth)
12572 %{
12573   match(CallLeafNoFP);
12574   effect(USE meth);
12575 
12576   ins_cost(300);
12577   format %{ "call_leaf_nofp,runtime " %}
12578   opcode(0xE8); /* E8 cd */
12579   ins_encode(Java_To_Runtime(meth));
12580   ins_pipe(pipe_slow);
12581   ins_pc_relative(1);
12582 %}
12583 
12584 // Return Instruction
12585 // Remove the return address & jump to it.
12586 // Notice: We always emit a nop after a ret to make sure there is room
12587 // for safepoint patching
12588 instruct Ret()
12589 %{
12590   match(Return);
12591 
12592   format %{ "ret" %}
12593   opcode(0xC3);
12594   ins_encode(OpcP);
12595   ins_pipe(pipe_jmp);
12596 %}
12597 
12598 // Tail Call; Jump from runtime stub to Java code.
12599 // Also known as an 'interprocedural jump'.
12600 // Target of jump will eventually return to caller.
12601 // TailJump below removes the return address.
12602 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12603 %{
12604   match(TailCall jump_target method_oop);
12605 
12606   ins_cost(300);
12607   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12608   opcode(0xFF, 0x4); /* Opcode FF /4 */
12609   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12610   ins_pipe(pipe_jmp);
12611 %}
12612 
12613 // Tail Jump; remove the return address; jump to target.
12614 // TailCall above leaves the return address around.
12615 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12616 %{
12617   match(TailJump jump_target ex_oop);
12618 
12619   ins_cost(300);
12620   format %{ "popq    rdx\t# pop return address\n\t"
12621             "jmp     $jump_target" %}
12622   opcode(0xFF, 0x4); /* Opcode FF /4 */
12623   ins_encode(Opcode(0x5a), // popq rdx
12624              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12625   ins_pipe(pipe_jmp);
12626 %}
12627 
12628 // Create exception oop: created by stack-crawling runtime code.
12629 // Created exception is now available to this handler, and is setup
12630 // just prior to jumping to this handler.  No code emitted.
12631 instruct CreateException(rax_RegP ex_oop)
12632 %{
12633   match(Set ex_oop (CreateEx));
12634 
12635   size(0);
12636   // use the following format syntax
12637   format %{ "# exception oop is in rax; no code emitted" %}
12638   ins_encode();
12639   ins_pipe(empty);
12640 %}
12641 
12642 // Rethrow exception:
12643 // The exception oop will come in the first argument position.
12644 // Then JUMP (not call) to the rethrow stub code.
12645 instruct RethrowException()
12646 %{
12647   match(Rethrow);
12648 
12649   // use the following format syntax
12650   format %{ "jmp     rethrow_stub" %}
12651   ins_encode(enc_rethrow);
12652   ins_pipe(pipe_jmp);
12653 %}
12654 
12655 
12656 //----------PEEPHOLE RULES-----------------------------------------------------
12657 // These must follow all instruction definitions as they use the names
12658 // defined in the instructions definitions.
12659 //
12660 // peepmatch ( root_instr_name [preceding_instruction]* );
12661 //
12662 // peepconstraint %{
12663 // (instruction_number.operand_name relational_op instruction_number.operand_name
12664 //  [, ...] );
12665 // // instruction numbers are zero-based using left to right order in peepmatch
12666 //
12667 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12668 // // provide an instruction_number.operand_name for each operand that appears
12669 // // in the replacement instruction's match rule
12670 //
12671 // ---------VM FLAGS---------------------------------------------------------
12672 //
12673 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12674 //
12675 // Each peephole rule is given an identifying number starting with zero and
12676 // increasing by one in the order seen by the parser.  An individual peephole
12677 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12678 // on the command-line.
12679 //
12680 // ---------CURRENT LIMITATIONS----------------------------------------------
12681 //
12682 // Only match adjacent instructions in same basic block
12683 // Only equality constraints
12684 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12685 // Only one replacement instruction
12686 //
12687 // ---------EXAMPLE----------------------------------------------------------
12688 //
12689 // // pertinent parts of existing instructions in architecture description
12690 // instruct movI(rRegI dst, rRegI src)
12691 // %{
12692 //   match(Set dst (CopyI src));
12693 // %}
12694 //
12695 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12696 // %{
12697 //   match(Set dst (AddI dst src));
12698 //   effect(KILL cr);
12699 // %}
12700 //
12701 // // Change (inc mov) to lea
12702 // peephole %{
12703 //   // increment preceeded by register-register move
12704 //   peepmatch ( incI_rReg movI );
12705 //   // require that the destination register of the increment
12706 //   // match the destination register of the move
12707 //   peepconstraint ( 0.dst == 1.dst );
12708 //   // construct a replacement instruction that sets
12709 //   // the destination to ( move's source register + one )
12710 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12711 // %}
12712 //
12713 
12714 // Implementation no longer uses movX instructions since
12715 // machine-independent system no longer uses CopyX nodes.
12716 //
12717 // peephole
12718 // %{
12719 //   peepmatch (incI_rReg movI);
12720 //   peepconstraint (0.dst == 1.dst);
12721 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12722 // %}
12723 
12724 // peephole
12725 // %{
12726 //   peepmatch (decI_rReg movI);
12727 //   peepconstraint (0.dst == 1.dst);
12728 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12729 // %}
12730 
12731 // peephole
12732 // %{
12733 //   peepmatch (addI_rReg_imm movI);
12734 //   peepconstraint (0.dst == 1.dst);
12735 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12736 // %}
12737 
12738 // peephole
12739 // %{
12740 //   peepmatch (incL_rReg movL);
12741 //   peepconstraint (0.dst == 1.dst);
12742 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12743 // %}
12744 
12745 // peephole
12746 // %{
12747 //   peepmatch (decL_rReg movL);
12748 //   peepconstraint (0.dst == 1.dst);
12749 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12750 // %}
12751 
12752 // peephole
12753 // %{
12754 //   peepmatch (addL_rReg_imm movL);
12755 //   peepconstraint (0.dst == 1.dst);
12756 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12757 // %}
12758 
12759 // peephole
12760 // %{
12761 //   peepmatch (addP_rReg_imm movP);
12762 //   peepconstraint (0.dst == 1.dst);
12763 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12764 // %}
12765 
12766 // // Change load of spilled value to only a spill
12767 // instruct storeI(memory mem, rRegI src)
12768 // %{
12769 //   match(Set mem (StoreI mem src));
12770 // %}
12771 //
12772 // instruct loadI(rRegI dst, memory mem)
12773 // %{
12774 //   match(Set dst (LoadI mem));
12775 // %}
12776 //
12777 
12778 peephole
12779 %{
12780   peepmatch (loadI storeI);
12781   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12782   peepreplace (storeI(1.mem 1.mem 1.src));
12783 %}
12784 
12785 peephole
12786 %{
12787   peepmatch (loadL storeL);
12788   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12789   peepreplace (storeL(1.mem 1.mem 1.src));
12790 %}
12791 
12792 //----------SMARTSPILL RULES---------------------------------------------------
12793 // These must follow all instruction definitions as they use the names
12794 // defined in the instructions definitions.