1 //
   2 // Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
 135 // Word a in each register holds a Float, words ab hold a Double.  We
 136 // currently do not use the SIMD capabilities, so registers cd are
 137 // unused at the moment.
 138 // XMM8-XMM15 must be encoded with REX.
 139 // Linux ABI:   No register preserved across function calls
 140 //              XMM0-XMM7 might hold parameters
 141 // Windows ABI: XMM6-XMM15 preserved across function calls
 142 //              XMM0-XMM3 might hold parameters
 143 
 144 reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
 145 reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
 146 
 147 reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
 148 reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
 149 
 150 reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
 151 reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
 152 
 153 reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
 154 reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
 155 
 156 reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
 157 reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
 158 
 159 reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
 160 reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
 161 
 162 #ifdef _WIN64
 163 
 164 reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
 165 reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
 166 
 167 reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
 168 reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
 169 
 170 reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
 171 reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
 172 
 173 reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
 174 reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
 175 
 176 reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
 177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
 178 
 179 reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
 180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
 181 
 182 reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
 183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
 184 
 185 reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
 186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
 187 
 188 reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
 189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
 190 
 191 reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
 192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
 193 
 194 #else
 195 
 196 reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
 197 reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
 198 
 199 reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
 200 reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
 201 
 202 reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
 203 reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
 204 
 205 reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
 206 reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
 207 
 208 reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
 209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
 210 
 211 reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
 212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
 213 
 214 reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
 215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
 216 
 217 reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
 218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
 219 
 220 reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
 221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
 222 
 223 reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
 224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
 225 
 226 #endif // _WIN64
 227 
 228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
 229 
 230 // Specify priority of register selection within phases of register
 231 // allocation.  Highest priority is first.  A useful heuristic is to
 232 // give registers a low priority when they are required by machine
 233 // instructions, like EAX and EDX on I486, and choose no-save registers
 234 // before save-on-call, & save-on-call before save-on-entry.  Registers
 235 // which participate in fixed calling sequences should come last.
 236 // Registers which are used as pairs must fall on an even boundary.
 237 
 238 alloc_class chunk0(R10,         R10_H,
 239                    R11,         R11_H,
 240                    R8,          R8_H,
 241                    R9,          R9_H,
 242                    R12,         R12_H,
 243                    RCX,         RCX_H,
 244                    RBX,         RBX_H,
 245                    RDI,         RDI_H,
 246                    RDX,         RDX_H,
 247                    RSI,         RSI_H,
 248                    RAX,         RAX_H,
 249                    RBP,         RBP_H,
 250                    R13,         R13_H,
 251                    R14,         R14_H,
 252                    R15,         R15_H,
 253                    RSP,         RSP_H);
 254 
 255 // XXX probably use 8-15 first on Linux
 256 alloc_class chunk1(XMM0,  XMM0_H,
 257                    XMM1,  XMM1_H,
 258                    XMM2,  XMM2_H,
 259                    XMM3,  XMM3_H,
 260                    XMM4,  XMM4_H,
 261                    XMM5,  XMM5_H,
 262                    XMM6,  XMM6_H,
 263                    XMM7,  XMM7_H,
 264                    XMM8,  XMM8_H,
 265                    XMM9,  XMM9_H,
 266                    XMM10, XMM10_H,
 267                    XMM11, XMM11_H,
 268                    XMM12, XMM12_H,
 269                    XMM13, XMM13_H,
 270                    XMM14, XMM14_H,
 271                    XMM15, XMM15_H);
 272 
 273 alloc_class chunk2(RFLAGS);
 274 
 275 
 276 //----------Architecture Description Register Classes--------------------------
 277 // Several register classes are automatically defined based upon information in
 278 // this architecture description.
 279 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 280 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 281 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 282 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 283 //
 284 
 285 // Class for all pointer registers (including RSP)
 286 reg_class any_reg(RAX, RAX_H,
 287                   RDX, RDX_H,
 288                   RBP, RBP_H,
 289                   RDI, RDI_H,
 290                   RSI, RSI_H,
 291                   RCX, RCX_H,
 292                   RBX, RBX_H,
 293                   RSP, RSP_H,
 294                   R8,  R8_H,
 295                   R9,  R9_H,
 296                   R10, R10_H,
 297                   R11, R11_H,
 298                   R12, R12_H,
 299                   R13, R13_H,
 300                   R14, R14_H,
 301                   R15, R15_H);
 302 
 303 // Class for all pointer registers except RSP
 304 reg_class ptr_reg(RAX, RAX_H,
 305                   RDX, RDX_H,
 306                   RBP, RBP_H,
 307                   RDI, RDI_H,
 308                   RSI, RSI_H,
 309                   RCX, RCX_H,
 310                   RBX, RBX_H,
 311                   R8,  R8_H,
 312                   R9,  R9_H,
 313                   R10, R10_H,
 314                   R11, R11_H,
 315                   R13, R13_H,
 316                   R14, R14_H);
 317 
 318 // Class for all pointer registers except RAX and RSP
 319 reg_class ptr_no_rax_reg(RDX, RDX_H,
 320                          RBP, RBP_H,
 321                          RDI, RDI_H,
 322                          RSI, RSI_H,
 323                          RCX, RCX_H,
 324                          RBX, RBX_H,
 325                          R8,  R8_H,
 326                          R9,  R9_H,
 327                          R10, R10_H,
 328                          R11, R11_H,
 329                          R13, R13_H,
 330                          R14, R14_H);
 331 
 332 reg_class ptr_no_rbp_reg(RDX, RDX_H,
 333                          RAX, RAX_H,
 334                          RDI, RDI_H,
 335                          RSI, RSI_H,
 336                          RCX, RCX_H,
 337                          RBX, RBX_H,
 338                          R8,  R8_H,
 339                          R9,  R9_H,
 340                          R10, R10_H,
 341                          R11, R11_H,
 342                          R13, R13_H,
 343                          R14, R14_H);
 344 
 345 // Class for all pointer registers except RAX, RBX and RSP
 346 reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
 347                              RBP, RBP_H,
 348                              RDI, RDI_H,
 349                              RSI, RSI_H,
 350                              RCX, RCX_H,
 351                              R8,  R8_H,
 352                              R9,  R9_H,
 353                              R10, R10_H,
 354                              R11, R11_H,
 355                              R13, R13_H,
 356                              R14, R14_H);
 357 
 358 // Singleton class for RAX pointer register
 359 reg_class ptr_rax_reg(RAX, RAX_H);
 360 
 361 // Singleton class for RBX pointer register
 362 reg_class ptr_rbx_reg(RBX, RBX_H);
 363 
 364 // Singleton class for RSI pointer register
 365 reg_class ptr_rsi_reg(RSI, RSI_H);
 366 
 367 // Singleton class for RDI pointer register
 368 reg_class ptr_rdi_reg(RDI, RDI_H);
 369 
 370 // Singleton class for RBP pointer register
 371 reg_class ptr_rbp_reg(RBP, RBP_H);
 372 
 373 // Singleton class for stack pointer
 374 reg_class ptr_rsp_reg(RSP, RSP_H);
 375 
 376 // Singleton class for TLS pointer
 377 reg_class ptr_r15_reg(R15, R15_H);
 378 
 379 // Class for all long registers (except RSP)
 380 reg_class long_reg(RAX, RAX_H,
 381                    RDX, RDX_H,
 382                    RBP, RBP_H,
 383                    RDI, RDI_H,
 384                    RSI, RSI_H,
 385                    RCX, RCX_H,
 386                    RBX, RBX_H,
 387                    R8,  R8_H,
 388                    R9,  R9_H,
 389                    R10, R10_H,
 390                    R11, R11_H,
 391                    R13, R13_H,
 392                    R14, R14_H);
 393 
 394 // Class for all long registers except RAX, RDX (and RSP)
 395 reg_class long_no_rax_rdx_reg(RBP, RBP_H,
 396                               RDI, RDI_H,
 397                               RSI, RSI_H,
 398                               RCX, RCX_H,
 399                               RBX, RBX_H,
 400                               R8,  R8_H,
 401                               R9,  R9_H,
 402                               R10, R10_H,
 403                               R11, R11_H,
 404                               R13, R13_H,
 405                               R14, R14_H);
 406 
 407 // Class for all long registers except RCX (and RSP)
 408 reg_class long_no_rcx_reg(RBP, RBP_H,
 409                           RDI, RDI_H,
 410                           RSI, RSI_H,
 411                           RAX, RAX_H,
 412                           RDX, RDX_H,
 413                           RBX, RBX_H,
 414                           R8,  R8_H,
 415                           R9,  R9_H,
 416                           R10, R10_H,
 417                           R11, R11_H,
 418                           R13, R13_H,
 419                           R14, R14_H);
 420 
 421 // Class for all long registers except RAX (and RSP)
 422 reg_class long_no_rax_reg(RBP, RBP_H,
 423                           RDX, RDX_H,
 424                           RDI, RDI_H,
 425                           RSI, RSI_H,
 426                           RCX, RCX_H,
 427                           RBX, RBX_H,
 428                           R8,  R8_H,
 429                           R9,  R9_H,
 430                           R10, R10_H,
 431                           R11, R11_H,
 432                           R13, R13_H,
 433                           R14, R14_H);
 434 
 435 // Singleton class for RAX long register
 436 reg_class long_rax_reg(RAX, RAX_H);
 437 
 438 // Singleton class for RCX long register
 439 reg_class long_rcx_reg(RCX, RCX_H);
 440 
 441 // Singleton class for RDX long register
 442 reg_class long_rdx_reg(RDX, RDX_H);
 443 
 444 // Class for all int registers (except RSP)
 445 reg_class int_reg(RAX,
 446                   RDX,
 447                   RBP,
 448                   RDI,
 449                   RSI,
 450                   RCX,
 451                   RBX,
 452                   R8,
 453                   R9,
 454                   R10,
 455                   R11,
 456                   R13,
 457                   R14);
 458 
 459 // Class for all int registers except RCX (and RSP)
 460 reg_class int_no_rcx_reg(RAX,
 461                          RDX,
 462                          RBP,
 463                          RDI,
 464                          RSI,
 465                          RBX,
 466                          R8,
 467                          R9,
 468                          R10,
 469                          R11,
 470                          R13,
 471                          R14);
 472 
 473 // Class for all int registers except RAX, RDX (and RSP)
 474 reg_class int_no_rax_rdx_reg(RBP,
 475                              RDI,
 476                              RSI,
 477                              RCX,
 478                              RBX,
 479                              R8,
 480                              R9,
 481                              R10,
 482                              R11,
 483                              R13,
 484                              R14);
 485 
 486 // Singleton class for RAX int register
 487 reg_class int_rax_reg(RAX);
 488 
 489 // Singleton class for RBX int register
 490 reg_class int_rbx_reg(RBX);
 491 
 492 // Singleton class for RCX int register
 493 reg_class int_rcx_reg(RCX);
 494 
 495 // Singleton class for RCX int register
 496 reg_class int_rdx_reg(RDX);
 497 
 498 // Singleton class for RCX int register
 499 reg_class int_rdi_reg(RDI);
 500 
 501 // Singleton class for instruction pointer
 502 // reg_class ip_reg(RIP);
 503 
 504 // Singleton class for condition codes
 505 reg_class int_flags(RFLAGS);
 506 
 507 // Class for all float registers
 508 reg_class float_reg(XMM0,
 509                     XMM1,
 510                     XMM2,
 511                     XMM3,
 512                     XMM4,
 513                     XMM5,
 514                     XMM6,
 515                     XMM7,
 516                     XMM8,
 517                     XMM9,
 518                     XMM10,
 519                     XMM11,
 520                     XMM12,
 521                     XMM13,
 522                     XMM14,
 523                     XMM15);
 524 
 525 // Class for all double registers
 526 reg_class double_reg(XMM0,  XMM0_H,
 527                      XMM1,  XMM1_H,
 528                      XMM2,  XMM2_H,
 529                      XMM3,  XMM3_H,
 530                      XMM4,  XMM4_H,
 531                      XMM5,  XMM5_H,
 532                      XMM6,  XMM6_H,
 533                      XMM7,  XMM7_H,
 534                      XMM8,  XMM8_H,
 535                      XMM9,  XMM9_H,
 536                      XMM10, XMM10_H,
 537                      XMM11, XMM11_H,
 538                      XMM12, XMM12_H,
 539                      XMM13, XMM13_H,
 540                      XMM14, XMM14_H,
 541                      XMM15, XMM15_H);
 542 %}
 543 
 544 
 545 //----------SOURCE BLOCK-------------------------------------------------------
 546 // This is a block of C++ code which provides values, functions, and
 547 // definitions necessary in the rest of the architecture description
 548 source %{
 549 #define   RELOC_IMM64    Assembler::imm_operand
 550 #define   RELOC_DISP32   Assembler::disp32_operand
 551 
 552 #define __ _masm.
 553 
 554 static int preserve_SP_size() {
 555   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 556 }
 557 
 558 // !!!!! Special hack to get all types of calls to specify the byte offset
 559 //       from the start of the call to the point where the return address
 560 //       will point.
 561 int MachCallStaticJavaNode::ret_addr_offset()
 562 {
 563   int offset = 5; // 5 bytes from start of call to where return address points
 564   if (_method_handle_invoke)
 565     offset += preserve_SP_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   return 15; // 15 bytes from start of call to where return address points
 572 }
 573 
 574 // In os_cpu .ad file
 575 // int MachCallRuntimeNode::ret_addr_offset()
 576 
 577 // Indicate if the safepoint node needs the polling page as an input.
 578 // Since amd64 does not have absolute addressing but RIP-relative
 579 // addressing and the polling page is within 2G, it doesn't.
 580 bool SafePointNode::needs_polling_address_input()
 581 {
 582   return false;
 583 }
 584 
 585 //
 586 // Compute padding required for nodes which need alignment
 587 //
 588 
 589 // The address of the call instruction needs to be 4-byte aligned to
 590 // ensure that it does not span a cache line so that it can be patched.
 591 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 592 {
 593   current_offset += 1; // skip call opcode byte
 594   return round_to(current_offset, alignment_required()) - current_offset;
 595 }
 596 
 597 // The address of the call instruction needs to be 4-byte aligned to
 598 // ensure that it does not span a cache line so that it can be patched.
 599 int CallStaticJavaHandleNode::compute_padding(int current_offset) const
 600 {
 601   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 602   current_offset += 1; // skip call opcode byte
 603   return round_to(current_offset, alignment_required()) - current_offset;
 604 }
 605 
 606 // The address of the call instruction needs to be 4-byte aligned to
 607 // ensure that it does not span a cache line so that it can be patched.
 608 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 609 {
 610   current_offset += 11; // skip movq instruction + call opcode byte
 611   return round_to(current_offset, alignment_required()) - current_offset;
 612 }
 613 
 614 #ifndef PRODUCT
 615 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
 616 {
 617   st->print("INT3");
 618 }
 619 #endif
 620 
 621 // EMIT_RM()
 622 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 623   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 624   cbuf.insts()->emit_int8(c);
 625 }
 626 
 627 // EMIT_CC()
 628 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 629   unsigned char c = (unsigned char) (f1 | f2);
 630   cbuf.insts()->emit_int8(c);
 631 }
 632 
 633 // EMIT_OPCODE()
 634 void emit_opcode(CodeBuffer &cbuf, int code) {
 635   cbuf.insts()->emit_int8((unsigned char) code);
 636 }
 637 
 638 // EMIT_OPCODE() w/ relocation information
 639 void emit_opcode(CodeBuffer &cbuf,
 640                  int code, relocInfo::relocType reloc, int offset, int format)
 641 {
 642   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 643   emit_opcode(cbuf, code);
 644 }
 645 
 646 // EMIT_D8()
 647 void emit_d8(CodeBuffer &cbuf, int d8) {
 648   cbuf.insts()->emit_int8((unsigned char) d8);
 649 }
 650 
 651 // EMIT_D16()
 652 void emit_d16(CodeBuffer &cbuf, int d16) {
 653   cbuf.insts()->emit_int16(d16);
 654 }
 655 
 656 // EMIT_D32()
 657 void emit_d32(CodeBuffer &cbuf, int d32) {
 658   cbuf.insts()->emit_int32(d32);
 659 }
 660 
 661 // EMIT_D64()
 662 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 663   cbuf.insts()->emit_int64(d64);
 664 }
 665 
 666 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 667 void emit_d32_reloc(CodeBuffer& cbuf,
 668                     int d32,
 669                     relocInfo::relocType reloc,
 670                     int format)
 671 {
 672   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 673   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 674   cbuf.insts()->emit_int32(d32);
 675 }
 676 
 677 // emit 32 bit value and construct relocation entry from RelocationHolder
 678 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 679 #ifdef ASSERT
 680   if (rspec.reloc()->type() == relocInfo::oop_type &&
 681       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 682     assert(oop((intptr_t)d32)->is_oop() && (ScavengeRootsInCode || !oop((intptr_t)d32)->is_scavengable()), "cannot embed scavengable oops in code");
 683   }
 684 #endif
 685   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 686   cbuf.insts()->emit_int32(d32);
 687 }
 688 
 689 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 690   address next_ip = cbuf.insts_end() + 4;
 691   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 692                  external_word_Relocation::spec(addr),
 693                  RELOC_DISP32);
 694 }
 695 
 696 
 697 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 698 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 699   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 700   cbuf.insts()->emit_int64(d64);
 701 }
 702 
 703 // emit 64 bit value and construct relocation entry from RelocationHolder
 704 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 705 #ifdef ASSERT
 706   if (rspec.reloc()->type() == relocInfo::oop_type &&
 707       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 708     assert(oop(d64)->is_oop() && (ScavengeRootsInCode || !oop(d64)->is_scavengable()),
 709            "cannot embed scavengable oops in code");
 710   }
 711 #endif
 712   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 713   cbuf.insts()->emit_int64(d64);
 714 }
 715 
 716 // Access stack slot for load or store
 717 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 718 {
 719   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 720   if (-0x80 <= disp && disp < 0x80) {
 721     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 722     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 723     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 724   } else {
 725     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 726     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 727     emit_d32(cbuf, disp);     // Displacement // R/M byte
 728   }
 729 }
 730 
 731    // rRegI ereg, memory mem) %{    // emit_reg_mem
 732 void encode_RegMem(CodeBuffer &cbuf,
 733                    int reg,
 734                    int base, int index, int scale, int disp, bool disp_is_oop)
 735 {
 736   assert(!disp_is_oop, "cannot have disp");
 737   int regenc = reg & 7;
 738   int baseenc = base & 7;
 739   int indexenc = index & 7;
 740 
 741   // There is no index & no scale, use form without SIB byte
 742   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 743     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 744     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 745       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 746     } else if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 747       // If 8-bit displacement, mode 0x1
 748       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 749       emit_d8(cbuf, disp);
 750     } else {
 751       // If 32-bit displacement
 752       if (base == -1) { // Special flag for absolute address
 753         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 754         if (disp_is_oop) {
 755           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 756         } else {
 757           emit_d32(cbuf, disp);
 758         }
 759       } else {
 760         // Normal base + offset
 761         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 762         if (disp_is_oop) {
 763           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 764         } else {
 765           emit_d32(cbuf, disp);
 766         }
 767       }
 768     }
 769   } else {
 770     // Else, encode with the SIB byte
 771     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 772     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 773       // If no displacement
 774       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 775       emit_rm(cbuf, scale, indexenc, baseenc);
 776     } else {
 777       if (-0x80 <= disp && disp < 0x80 && !disp_is_oop) {
 778         // If 8-bit displacement, mode 0x1
 779         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 780         emit_rm(cbuf, scale, indexenc, baseenc);
 781         emit_d8(cbuf, disp);
 782       } else {
 783         // If 32-bit displacement
 784         if (base == 0x04 ) {
 785           emit_rm(cbuf, 0x2, regenc, 0x4);
 786           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 787         } else {
 788           emit_rm(cbuf, 0x2, regenc, 0x4);
 789           emit_rm(cbuf, scale, indexenc, baseenc); // *
 790         }
 791         if (disp_is_oop) {
 792           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 793         } else {
 794           emit_d32(cbuf, disp);
 795         }
 796       }
 797     }
 798   }
 799 }
 800 
 801 void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
 802 {
 803   if (dstenc != srcenc) {
 804     if (dstenc < 8) {
 805       if (srcenc >= 8) {
 806         emit_opcode(cbuf, Assembler::REX_B);
 807         srcenc -= 8;
 808       }
 809     } else {
 810       if (srcenc < 8) {
 811         emit_opcode(cbuf, Assembler::REX_R);
 812       } else {
 813         emit_opcode(cbuf, Assembler::REX_RB);
 814         srcenc -= 8;
 815       }
 816       dstenc -= 8;
 817     }
 818 
 819     emit_opcode(cbuf, 0x8B);
 820     emit_rm(cbuf, 0x3, dstenc, srcenc);
 821   }
 822 }
 823 
 824 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 825   if( dst_encoding == src_encoding ) {
 826     // reg-reg copy, use an empty encoding
 827   } else {
 828     MacroAssembler _masm(&cbuf);
 829 
 830     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 831   }
 832 }
 833 
 834 
 835 //=============================================================================
 836 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 837 
 838 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 839   emit_constant_table(cbuf);
 840   set_table_base_offset(0);
 841   // Empty encoding
 842 }
 843 
 844 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 845   // Compute the size (even if it's zero) since
 846   // Compile::Shorten_branches needs the table to be emitted (which
 847   // happens in Compile::scratch_emit_size) to calculate the size for
 848   // MachConstantNodes.
 849   return MachNode::size(ra_);
 850 }
 851 
 852 #ifndef PRODUCT
 853 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 854   st->print("# MachConstantBase (empty encoding)");
 855 }
 856 #endif
 857 
 858 
 859 //=============================================================================
 860 #ifndef PRODUCT
 861 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 862 {
 863   Compile* C = ra_->C;
 864 
 865   int framesize = C->frame_slots() << LogBytesPerInt;
 866   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 867   // Remove wordSize for return adr already pushed
 868   // and another for the RBP we are going to save
 869   framesize -= 2*wordSize;
 870   bool need_nop = true;
 871 
 872   // Calls to C2R adapters often do not accept exceptional returns.
 873   // We require that their callers must bang for them.  But be
 874   // careful, because some VM calls (such as call site linkage) can
 875   // use several kilobytes of stack.  But the stack safety zone should
 876   // account for that.  See bugs 4446381, 4468289, 4497237.
 877   if (C->need_stack_bang(framesize)) {
 878     st->print_cr("# stack bang"); st->print("\t");
 879     need_nop = false;
 880   }
 881   st->print_cr("pushq   rbp"); st->print("\t");
 882 
 883   if (VerifyStackAtCalls) {
 884     // Majik cookie to verify stack depth
 885     st->print_cr("pushq   0xffffffffbadb100d"
 886                   "\t# Majik cookie for stack depth check");
 887     st->print("\t");
 888     framesize -= wordSize; // Remove 2 for cookie
 889     need_nop = false;
 890   }
 891 
 892   if (framesize) {
 893     st->print("subq    rsp, #%d\t# Create frame", framesize);
 894     if (framesize < 0x80 && need_nop) {
 895       st->print("\n\tnop\t# nop for patch_verified_entry");
 896     }
 897   }
 898 }
 899 #endif
 900 
 901 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
 902 {
 903   Compile* C = ra_->C;
 904 
 905   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 906   // NativeJump::patch_verified_entry will be able to patch out the entry
 907   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 908   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 909   // 6 bytes. So if we don't do the fldcw or the push then we must
 910   // use the 6 byte frame allocation even if we have no frame. :-(
 911   // If method sets FPU control word do it now
 912 
 913   int framesize = C->frame_slots() << LogBytesPerInt;
 914   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 915   // Remove wordSize for return adr already pushed
 916   // and another for the RBP we are going to save
 917   framesize -= 2*wordSize;
 918   bool need_nop = true;
 919 
 920   // Calls to C2R adapters often do not accept exceptional returns.
 921   // We require that their callers must bang for them.  But be
 922   // careful, because some VM calls (such as call site linkage) can
 923   // use several kilobytes of stack.  But the stack safety zone should
 924   // account for that.  See bugs 4446381, 4468289, 4497237.
 925   if (C->need_stack_bang(framesize)) {
 926     MacroAssembler masm(&cbuf);
 927     masm.generate_stack_overflow_check(framesize);
 928     need_nop = false;
 929   }
 930 
 931   // We always push rbp so that on return to interpreter rbp will be
 932   // restored correctly and we can correct the stack.
 933   emit_opcode(cbuf, 0x50 | RBP_enc);
 934 
 935   if (VerifyStackAtCalls) {
 936     // Majik cookie to verify stack depth
 937     emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
 938     emit_d32(cbuf, 0xbadb100d);
 939     framesize -= wordSize; // Remove 2 for cookie
 940     need_nop = false;
 941   }
 942 
 943   if (framesize) {
 944     emit_opcode(cbuf, Assembler::REX_W);
 945     if (framesize < 0x80) {
 946       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 947       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 948       emit_d8(cbuf, framesize);
 949       if (need_nop) {
 950         emit_opcode(cbuf, 0x90); // nop
 951       }
 952     } else {
 953       emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 954       emit_rm(cbuf, 0x3, 0x05, RSP_enc);
 955       emit_d32(cbuf, framesize);
 956     }
 957   }
 958 
 959   C->set_frame_complete(cbuf.insts_size());
 960 
 961 #ifdef ASSERT
 962   if (VerifyStackAtCalls) {
 963     Label L;
 964     MacroAssembler masm(&cbuf);
 965     masm.push(rax);
 966     masm.mov(rax, rsp);
 967     masm.andptr(rax, StackAlignmentInBytes-1);
 968     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 969     masm.pop(rax);
 970     masm.jcc(Assembler::equal, L);
 971     masm.stop("Stack is not properly aligned!");
 972     masm.bind(L);
 973   }
 974 #endif
 975 }
 976 
 977 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 978 {
 979   return MachNode::size(ra_); // too many variables; just compute it
 980                               // the hard way
 981 }
 982 
 983 int MachPrologNode::reloc() const
 984 {
 985   return 0; // a large enough number
 986 }
 987 
 988 //=============================================================================
 989 #ifndef PRODUCT
 990 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 991 {
 992   Compile* C = ra_->C;
 993   int framesize = C->frame_slots() << LogBytesPerInt;
 994   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 995   // Remove word for return adr already pushed
 996   // and RBP
 997   framesize -= 2*wordSize;
 998 
 999   if (framesize) {
1000     st->print_cr("addq\trsp, %d\t# Destroy frame", framesize);
1001     st->print("\t");
1002   }
1003 
1004   st->print_cr("popq\trbp");
1005   if (do_polling() && C->is_method_compilation()) {
1006     st->print_cr("\ttestl\trax, [rip + #offset_to_poll_page]\t"
1007                   "# Safepoint: poll for GC");
1008     st->print("\t");
1009   }
1010 }
1011 #endif
1012 
1013 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1014 {
1015   Compile* C = ra_->C;
1016   int framesize = C->frame_slots() << LogBytesPerInt;
1017   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1018   // Remove word for return adr already pushed
1019   // and RBP
1020   framesize -= 2*wordSize;
1021 
1022   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1023 
1024   if (framesize) {
1025     emit_opcode(cbuf, Assembler::REX_W);
1026     if (framesize < 0x80) {
1027       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
1028       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1029       emit_d8(cbuf, framesize);
1030     } else {
1031       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1032       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1033       emit_d32(cbuf, framesize);
1034     }
1035   }
1036 
1037   // popq rbp
1038   emit_opcode(cbuf, 0x58 | RBP_enc);
1039 
1040   if (do_polling() && C->is_method_compilation()) {
1041     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
1042     // XXX reg_mem doesn't support RIP-relative addressing yet
1043     cbuf.set_insts_mark();
1044     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_return_type, 0); // XXX
1045     emit_opcode(cbuf, 0x85); // testl
1046     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
1047     // cbuf.insts_mark() is beginning of instruction
1048     emit_d32_reloc(cbuf, os::get_polling_page());
1049 //                    relocInfo::poll_return_type,
1050   }
1051 }
1052 
1053 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1054 {
1055   Compile* C = ra_->C;
1056   int framesize = C->frame_slots() << LogBytesPerInt;
1057   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1058   // Remove word for return adr already pushed
1059   // and RBP
1060   framesize -= 2*wordSize;
1061 
1062   uint size = 0;
1063 
1064   if (do_polling() && C->is_method_compilation()) {
1065     size += 6;
1066   }
1067 
1068   // count popq rbp
1069   size++;
1070 
1071   if (framesize) {
1072     if (framesize < 0x80) {
1073       size += 4;
1074     } else if (framesize) {
1075       size += 7;
1076     }
1077   }
1078 
1079   return size;
1080 }
1081 
1082 int MachEpilogNode::reloc() const
1083 {
1084   return 2; // a large enough number
1085 }
1086 
1087 const Pipeline* MachEpilogNode::pipeline() const
1088 {
1089   return MachNode::pipeline_class();
1090 }
1091 
1092 int MachEpilogNode::safepoint_offset() const
1093 {
1094   return 0;
1095 }
1096 
1097 //=============================================================================
1098 
1099 enum RC {
1100   rc_bad,
1101   rc_int,
1102   rc_float,
1103   rc_stack
1104 };
1105 
1106 static enum RC rc_class(OptoReg::Name reg)
1107 {
1108   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1109 
1110   if (OptoReg::is_stack(reg)) return rc_stack;
1111 
1112   VMReg r = OptoReg::as_VMReg(reg);
1113 
1114   if (r->is_Register()) return rc_int;
1115 
1116   assert(r->is_XMMRegister(), "must be");
1117   return rc_float;
1118 }
1119 
1120 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1121                                        PhaseRegAlloc* ra_,
1122                                        bool do_size,
1123                                        outputStream* st) const
1124 {
1125 
1126   // Get registers to move
1127   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1128   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1129   OptoReg::Name dst_second = ra_->get_reg_second(this);
1130   OptoReg::Name dst_first = ra_->get_reg_first(this);
1131 
1132   enum RC src_second_rc = rc_class(src_second);
1133   enum RC src_first_rc = rc_class(src_first);
1134   enum RC dst_second_rc = rc_class(dst_second);
1135   enum RC dst_first_rc = rc_class(dst_first);
1136 
1137   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1138          "must move at least 1 register" );
1139 
1140   if (src_first == dst_first && src_second == dst_second) {
1141     // Self copy, no move
1142     return 0;
1143   } else if (src_first_rc == rc_stack) {
1144     // mem ->
1145     if (dst_first_rc == rc_stack) {
1146       // mem -> mem
1147       assert(src_second != dst_first, "overlap");
1148       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1149           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1150         // 64-bit
1151         int src_offset = ra_->reg2offset(src_first);
1152         int dst_offset = ra_->reg2offset(dst_first);
1153         if (cbuf) {
1154           emit_opcode(*cbuf, 0xFF);
1155           encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
1156 
1157           emit_opcode(*cbuf, 0x8F);
1158           encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1159 
1160 #ifndef PRODUCT
1161         } else if (!do_size) {
1162           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1163                      "popq    [rsp + #%d]",
1164                      src_offset,
1165                      dst_offset);
1166 #endif
1167         }
1168         return
1169           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1170           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1171       } else {
1172         // 32-bit
1173         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1174         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1175         // No pushl/popl, so:
1176         int src_offset = ra_->reg2offset(src_first);
1177         int dst_offset = ra_->reg2offset(dst_first);
1178         if (cbuf) {
1179           emit_opcode(*cbuf, Assembler::REX_W);
1180           emit_opcode(*cbuf, 0x89);
1181           emit_opcode(*cbuf, 0x44);
1182           emit_opcode(*cbuf, 0x24);
1183           emit_opcode(*cbuf, 0xF8);
1184 
1185           emit_opcode(*cbuf, 0x8B);
1186           encode_RegMem(*cbuf,
1187                         RAX_enc,
1188                         RSP_enc, 0x4, 0, src_offset,
1189                         false);
1190 
1191           emit_opcode(*cbuf, 0x89);
1192           encode_RegMem(*cbuf,
1193                         RAX_enc,
1194                         RSP_enc, 0x4, 0, dst_offset,
1195                         false);
1196 
1197           emit_opcode(*cbuf, Assembler::REX_W);
1198           emit_opcode(*cbuf, 0x8B);
1199           emit_opcode(*cbuf, 0x44);
1200           emit_opcode(*cbuf, 0x24);
1201           emit_opcode(*cbuf, 0xF8);
1202 
1203 #ifndef PRODUCT
1204         } else if (!do_size) {
1205           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1206                      "movl    rax, [rsp + #%d]\n\t"
1207                      "movl    [rsp + #%d], rax\n\t"
1208                      "movq    rax, [rsp - #8]",
1209                      src_offset,
1210                      dst_offset);
1211 #endif
1212         }
1213         return
1214           5 + // movq
1215           3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1216           3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1217           5; // movq
1218       }
1219     } else if (dst_first_rc == rc_int) {
1220       // mem -> gpr
1221       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1222           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1223         // 64-bit
1224         int offset = ra_->reg2offset(src_first);
1225         if (cbuf) {
1226           if (Matcher::_regEncode[dst_first] < 8) {
1227             emit_opcode(*cbuf, Assembler::REX_W);
1228           } else {
1229             emit_opcode(*cbuf, Assembler::REX_WR);
1230           }
1231           emit_opcode(*cbuf, 0x8B);
1232           encode_RegMem(*cbuf,
1233                         Matcher::_regEncode[dst_first],
1234                         RSP_enc, 0x4, 0, offset,
1235                         false);
1236 #ifndef PRODUCT
1237         } else if (!do_size) {
1238           st->print("movq    %s, [rsp + #%d]\t# spill",
1239                      Matcher::regName[dst_first],
1240                      offset);
1241 #endif
1242         }
1243         return
1244           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1245       } else {
1246         // 32-bit
1247         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1248         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1249         int offset = ra_->reg2offset(src_first);
1250         if (cbuf) {
1251           if (Matcher::_regEncode[dst_first] >= 8) {
1252             emit_opcode(*cbuf, Assembler::REX_R);
1253           }
1254           emit_opcode(*cbuf, 0x8B);
1255           encode_RegMem(*cbuf,
1256                         Matcher::_regEncode[dst_first],
1257                         RSP_enc, 0x4, 0, offset,
1258                         false);
1259 #ifndef PRODUCT
1260         } else if (!do_size) {
1261           st->print("movl    %s, [rsp + #%d]\t# spill",
1262                      Matcher::regName[dst_first],
1263                      offset);
1264 #endif
1265         }
1266         return
1267           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1268           ((Matcher::_regEncode[dst_first] < 8)
1269            ? 3
1270            : 4); // REX
1271       }
1272     } else if (dst_first_rc == rc_float) {
1273       // mem-> xmm
1274       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1275           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1276         // 64-bit
1277         int offset = ra_->reg2offset(src_first);
1278         if (cbuf) {
1279           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1280           if (Matcher::_regEncode[dst_first] >= 8) {
1281             emit_opcode(*cbuf, Assembler::REX_R);
1282           }
1283           emit_opcode(*cbuf, 0x0F);
1284           emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1285           encode_RegMem(*cbuf,
1286                         Matcher::_regEncode[dst_first],
1287                         RSP_enc, 0x4, 0, offset,
1288                         false);
1289 #ifndef PRODUCT
1290         } else if (!do_size) {
1291           st->print("%s  %s, [rsp + #%d]\t# spill",
1292                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1293                      Matcher::regName[dst_first],
1294                      offset);
1295 #endif
1296         }
1297         return
1298           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1299           ((Matcher::_regEncode[dst_first] < 8)
1300            ? 5
1301            : 6); // REX
1302       } else {
1303         // 32-bit
1304         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1305         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1306         int offset = ra_->reg2offset(src_first);
1307         if (cbuf) {
1308           emit_opcode(*cbuf, 0xF3);
1309           if (Matcher::_regEncode[dst_first] >= 8) {
1310             emit_opcode(*cbuf, Assembler::REX_R);
1311           }
1312           emit_opcode(*cbuf, 0x0F);
1313           emit_opcode(*cbuf, 0x10);
1314           encode_RegMem(*cbuf,
1315                         Matcher::_regEncode[dst_first],
1316                         RSP_enc, 0x4, 0, offset,
1317                         false);
1318 #ifndef PRODUCT
1319         } else if (!do_size) {
1320           st->print("movss   %s, [rsp + #%d]\t# spill",
1321                      Matcher::regName[dst_first],
1322                      offset);
1323 #endif
1324         }
1325         return
1326           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1327           ((Matcher::_regEncode[dst_first] < 8)
1328            ? 5
1329            : 6); // REX
1330       }
1331     }
1332   } else if (src_first_rc == rc_int) {
1333     // gpr ->
1334     if (dst_first_rc == rc_stack) {
1335       // gpr -> mem
1336       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1337           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1338         // 64-bit
1339         int offset = ra_->reg2offset(dst_first);
1340         if (cbuf) {
1341           if (Matcher::_regEncode[src_first] < 8) {
1342             emit_opcode(*cbuf, Assembler::REX_W);
1343           } else {
1344             emit_opcode(*cbuf, Assembler::REX_WR);
1345           }
1346           emit_opcode(*cbuf, 0x89);
1347           encode_RegMem(*cbuf,
1348                         Matcher::_regEncode[src_first],
1349                         RSP_enc, 0x4, 0, offset,
1350                         false);
1351 #ifndef PRODUCT
1352         } else if (!do_size) {
1353           st->print("movq    [rsp + #%d], %s\t# spill",
1354                      offset,
1355                      Matcher::regName[src_first]);
1356 #endif
1357         }
1358         return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1359       } else {
1360         // 32-bit
1361         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1362         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1363         int offset = ra_->reg2offset(dst_first);
1364         if (cbuf) {
1365           if (Matcher::_regEncode[src_first] >= 8) {
1366             emit_opcode(*cbuf, Assembler::REX_R);
1367           }
1368           emit_opcode(*cbuf, 0x89);
1369           encode_RegMem(*cbuf,
1370                         Matcher::_regEncode[src_first],
1371                         RSP_enc, 0x4, 0, offset,
1372                         false);
1373 #ifndef PRODUCT
1374         } else if (!do_size) {
1375           st->print("movl    [rsp + #%d], %s\t# spill",
1376                      offset,
1377                      Matcher::regName[src_first]);
1378 #endif
1379         }
1380         return
1381           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1382           ((Matcher::_regEncode[src_first] < 8)
1383            ? 3
1384            : 4); // REX
1385       }
1386     } else if (dst_first_rc == rc_int) {
1387       // gpr -> gpr
1388       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1389           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1390         // 64-bit
1391         if (cbuf) {
1392           if (Matcher::_regEncode[dst_first] < 8) {
1393             if (Matcher::_regEncode[src_first] < 8) {
1394               emit_opcode(*cbuf, Assembler::REX_W);
1395             } else {
1396               emit_opcode(*cbuf, Assembler::REX_WB);
1397             }
1398           } else {
1399             if (Matcher::_regEncode[src_first] < 8) {
1400               emit_opcode(*cbuf, Assembler::REX_WR);
1401             } else {
1402               emit_opcode(*cbuf, Assembler::REX_WRB);
1403             }
1404           }
1405           emit_opcode(*cbuf, 0x8B);
1406           emit_rm(*cbuf, 0x3,
1407                   Matcher::_regEncode[dst_first] & 7,
1408                   Matcher::_regEncode[src_first] & 7);
1409 #ifndef PRODUCT
1410         } else if (!do_size) {
1411           st->print("movq    %s, %s\t# spill",
1412                      Matcher::regName[dst_first],
1413                      Matcher::regName[src_first]);
1414 #endif
1415         }
1416         return 3; // REX
1417       } else {
1418         // 32-bit
1419         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1420         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1421         if (cbuf) {
1422           if (Matcher::_regEncode[dst_first] < 8) {
1423             if (Matcher::_regEncode[src_first] >= 8) {
1424               emit_opcode(*cbuf, Assembler::REX_B);
1425             }
1426           } else {
1427             if (Matcher::_regEncode[src_first] < 8) {
1428               emit_opcode(*cbuf, Assembler::REX_R);
1429             } else {
1430               emit_opcode(*cbuf, Assembler::REX_RB);
1431             }
1432           }
1433           emit_opcode(*cbuf, 0x8B);
1434           emit_rm(*cbuf, 0x3,
1435                   Matcher::_regEncode[dst_first] & 7,
1436                   Matcher::_regEncode[src_first] & 7);
1437 #ifndef PRODUCT
1438         } else if (!do_size) {
1439           st->print("movl    %s, %s\t# spill",
1440                      Matcher::regName[dst_first],
1441                      Matcher::regName[src_first]);
1442 #endif
1443         }
1444         return
1445           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1446           ? 2
1447           : 3; // REX
1448       }
1449     } else if (dst_first_rc == rc_float) {
1450       // gpr -> xmm
1451       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1452           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1453         // 64-bit
1454         if (cbuf) {
1455           emit_opcode(*cbuf, 0x66);
1456           if (Matcher::_regEncode[dst_first] < 8) {
1457             if (Matcher::_regEncode[src_first] < 8) {
1458               emit_opcode(*cbuf, Assembler::REX_W);
1459             } else {
1460               emit_opcode(*cbuf, Assembler::REX_WB);
1461             }
1462           } else {
1463             if (Matcher::_regEncode[src_first] < 8) {
1464               emit_opcode(*cbuf, Assembler::REX_WR);
1465             } else {
1466               emit_opcode(*cbuf, Assembler::REX_WRB);
1467             }
1468           }
1469           emit_opcode(*cbuf, 0x0F);
1470           emit_opcode(*cbuf, 0x6E);
1471           emit_rm(*cbuf, 0x3,
1472                   Matcher::_regEncode[dst_first] & 7,
1473                   Matcher::_regEncode[src_first] & 7);
1474 #ifndef PRODUCT
1475         } else if (!do_size) {
1476           st->print("movdq   %s, %s\t# spill",
1477                      Matcher::regName[dst_first],
1478                      Matcher::regName[src_first]);
1479 #endif
1480         }
1481         return 5; // REX
1482       } else {
1483         // 32-bit
1484         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1485         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1486         if (cbuf) {
1487           emit_opcode(*cbuf, 0x66);
1488           if (Matcher::_regEncode[dst_first] < 8) {
1489             if (Matcher::_regEncode[src_first] >= 8) {
1490               emit_opcode(*cbuf, Assembler::REX_B);
1491             }
1492           } else {
1493             if (Matcher::_regEncode[src_first] < 8) {
1494               emit_opcode(*cbuf, Assembler::REX_R);
1495             } else {
1496               emit_opcode(*cbuf, Assembler::REX_RB);
1497             }
1498           }
1499           emit_opcode(*cbuf, 0x0F);
1500           emit_opcode(*cbuf, 0x6E);
1501           emit_rm(*cbuf, 0x3,
1502                   Matcher::_regEncode[dst_first] & 7,
1503                   Matcher::_regEncode[src_first] & 7);
1504 #ifndef PRODUCT
1505         } else if (!do_size) {
1506           st->print("movdl   %s, %s\t# spill",
1507                      Matcher::regName[dst_first],
1508                      Matcher::regName[src_first]);
1509 #endif
1510         }
1511         return
1512           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1513           ? 4
1514           : 5; // REX
1515       }
1516     }
1517   } else if (src_first_rc == rc_float) {
1518     // xmm ->
1519     if (dst_first_rc == rc_stack) {
1520       // xmm -> mem
1521       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1522           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1523         // 64-bit
1524         int offset = ra_->reg2offset(dst_first);
1525         if (cbuf) {
1526           emit_opcode(*cbuf, 0xF2);
1527           if (Matcher::_regEncode[src_first] >= 8) {
1528               emit_opcode(*cbuf, Assembler::REX_R);
1529           }
1530           emit_opcode(*cbuf, 0x0F);
1531           emit_opcode(*cbuf, 0x11);
1532           encode_RegMem(*cbuf,
1533                         Matcher::_regEncode[src_first],
1534                         RSP_enc, 0x4, 0, offset,
1535                         false);
1536 #ifndef PRODUCT
1537         } else if (!do_size) {
1538           st->print("movsd   [rsp + #%d], %s\t# spill",
1539                      offset,
1540                      Matcher::regName[src_first]);
1541 #endif
1542         }
1543         return
1544           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1545           ((Matcher::_regEncode[src_first] < 8)
1546            ? 5
1547            : 6); // REX
1548       } else {
1549         // 32-bit
1550         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1551         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1552         int offset = ra_->reg2offset(dst_first);
1553         if (cbuf) {
1554           emit_opcode(*cbuf, 0xF3);
1555           if (Matcher::_regEncode[src_first] >= 8) {
1556               emit_opcode(*cbuf, Assembler::REX_R);
1557           }
1558           emit_opcode(*cbuf, 0x0F);
1559           emit_opcode(*cbuf, 0x11);
1560           encode_RegMem(*cbuf,
1561                         Matcher::_regEncode[src_first],
1562                         RSP_enc, 0x4, 0, offset,
1563                         false);
1564 #ifndef PRODUCT
1565         } else if (!do_size) {
1566           st->print("movss   [rsp + #%d], %s\t# spill",
1567                      offset,
1568                      Matcher::regName[src_first]);
1569 #endif
1570         }
1571         return
1572           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1573           ((Matcher::_regEncode[src_first] < 8)
1574            ? 5
1575            : 6); // REX
1576       }
1577     } else if (dst_first_rc == rc_int) {
1578       // xmm -> gpr
1579       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1580           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1581         // 64-bit
1582         if (cbuf) {
1583           emit_opcode(*cbuf, 0x66);
1584           if (Matcher::_regEncode[dst_first] < 8) {
1585             if (Matcher::_regEncode[src_first] < 8) {
1586               emit_opcode(*cbuf, Assembler::REX_W);
1587             } else {
1588               emit_opcode(*cbuf, Assembler::REX_WR); // attention!
1589             }
1590           } else {
1591             if (Matcher::_regEncode[src_first] < 8) {
1592               emit_opcode(*cbuf, Assembler::REX_WB); // attention!
1593             } else {
1594               emit_opcode(*cbuf, Assembler::REX_WRB);
1595             }
1596           }
1597           emit_opcode(*cbuf, 0x0F);
1598           emit_opcode(*cbuf, 0x7E);
1599           emit_rm(*cbuf, 0x3,
1600                   Matcher::_regEncode[src_first] & 7,
1601                   Matcher::_regEncode[dst_first] & 7);
1602 #ifndef PRODUCT
1603         } else if (!do_size) {
1604           st->print("movdq   %s, %s\t# spill",
1605                      Matcher::regName[dst_first],
1606                      Matcher::regName[src_first]);
1607 #endif
1608         }
1609         return 5; // REX
1610       } else {
1611         // 32-bit
1612         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1613         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1614         if (cbuf) {
1615           emit_opcode(*cbuf, 0x66);
1616           if (Matcher::_regEncode[dst_first] < 8) {
1617             if (Matcher::_regEncode[src_first] >= 8) {
1618               emit_opcode(*cbuf, Assembler::REX_R); // attention!
1619             }
1620           } else {
1621             if (Matcher::_regEncode[src_first] < 8) {
1622               emit_opcode(*cbuf, Assembler::REX_B); // attention!
1623             } else {
1624               emit_opcode(*cbuf, Assembler::REX_RB);
1625             }
1626           }
1627           emit_opcode(*cbuf, 0x0F);
1628           emit_opcode(*cbuf, 0x7E);
1629           emit_rm(*cbuf, 0x3,
1630                   Matcher::_regEncode[src_first] & 7,
1631                   Matcher::_regEncode[dst_first] & 7);
1632 #ifndef PRODUCT
1633         } else if (!do_size) {
1634           st->print("movdl   %s, %s\t# spill",
1635                      Matcher::regName[dst_first],
1636                      Matcher::regName[src_first]);
1637 #endif
1638         }
1639         return
1640           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1641           ? 4
1642           : 5; // REX
1643       }
1644     } else if (dst_first_rc == rc_float) {
1645       // xmm -> xmm
1646       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1647           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1648         // 64-bit
1649         if (cbuf) {
1650           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
1651           if (Matcher::_regEncode[dst_first] < 8) {
1652             if (Matcher::_regEncode[src_first] >= 8) {
1653               emit_opcode(*cbuf, Assembler::REX_B);
1654             }
1655           } else {
1656             if (Matcher::_regEncode[src_first] < 8) {
1657               emit_opcode(*cbuf, Assembler::REX_R);
1658             } else {
1659               emit_opcode(*cbuf, Assembler::REX_RB);
1660             }
1661           }
1662           emit_opcode(*cbuf, 0x0F);
1663           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1664           emit_rm(*cbuf, 0x3,
1665                   Matcher::_regEncode[dst_first] & 7,
1666                   Matcher::_regEncode[src_first] & 7);
1667 #ifndef PRODUCT
1668         } else if (!do_size) {
1669           st->print("%s  %s, %s\t# spill",
1670                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1671                      Matcher::regName[dst_first],
1672                      Matcher::regName[src_first]);
1673 #endif
1674         }
1675         return
1676           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1677           ? 4
1678           : 5; // REX
1679       } else {
1680         // 32-bit
1681         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1682         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1683         if (cbuf) {
1684           if (!UseXmmRegToRegMoveAll)
1685             emit_opcode(*cbuf, 0xF3);
1686           if (Matcher::_regEncode[dst_first] < 8) {
1687             if (Matcher::_regEncode[src_first] >= 8) {
1688               emit_opcode(*cbuf, Assembler::REX_B);
1689             }
1690           } else {
1691             if (Matcher::_regEncode[src_first] < 8) {
1692               emit_opcode(*cbuf, Assembler::REX_R);
1693             } else {
1694               emit_opcode(*cbuf, Assembler::REX_RB);
1695             }
1696           }
1697           emit_opcode(*cbuf, 0x0F);
1698           emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
1699           emit_rm(*cbuf, 0x3,
1700                   Matcher::_regEncode[dst_first] & 7,
1701                   Matcher::_regEncode[src_first] & 7);
1702 #ifndef PRODUCT
1703         } else if (!do_size) {
1704           st->print("%s  %s, %s\t# spill",
1705                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1706                      Matcher::regName[dst_first],
1707                      Matcher::regName[src_first]);
1708 #endif
1709         }
1710         return
1711           (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1712           ? (UseXmmRegToRegMoveAll ? 3 : 4)
1713           : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
1714       }
1715     }
1716   }
1717 
1718   assert(0," foo ");
1719   Unimplemented();
1720 
1721   return 0;
1722 }
1723 
1724 #ifndef PRODUCT
1725 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
1726 {
1727   implementation(NULL, ra_, false, st);
1728 }
1729 #endif
1730 
1731 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
1732 {
1733   implementation(&cbuf, ra_, false, NULL);
1734 }
1735 
1736 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
1737 {
1738   return implementation(NULL, ra_, true, NULL);
1739 }
1740 
1741 //=============================================================================
1742 #ifndef PRODUCT
1743 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
1744 {
1745   st->print("nop \t# %d bytes pad for loops and calls", _count);
1746 }
1747 #endif
1748 
1749 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
1750 {
1751   MacroAssembler _masm(&cbuf);
1752   __ nop(_count);
1753 }
1754 
1755 uint MachNopNode::size(PhaseRegAlloc*) const
1756 {
1757   return _count;
1758 }
1759 
1760 
1761 //=============================================================================
1762 #ifndef PRODUCT
1763 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1764 {
1765   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1766   int reg = ra_->get_reg_first(this);
1767   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1768             Matcher::regName[reg], offset);
1769 }
1770 #endif
1771 
1772 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1773 {
1774   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1775   int reg = ra_->get_encode(this);
1776   if (offset >= 0x80) {
1777     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1778     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1779     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1780     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1781     emit_d32(cbuf, offset);
1782   } else {
1783     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1784     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1785     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1786     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1787     emit_d8(cbuf, offset);
1788   }
1789 }
1790 
1791 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1792 {
1793   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1794   return (offset < 0x80) ? 5 : 8; // REX
1795 }
1796 
1797 //=============================================================================
1798 
1799 // emit call stub, compiled java to interpreter
1800 void emit_java_to_interp(CodeBuffer& cbuf)
1801 {
1802   // Stub is fixed up when the corresponding call is converted from
1803   // calling compiled code to calling interpreted code.
1804   // movq rbx, 0
1805   // jmp -5 # to self
1806 
1807   address mark = cbuf.insts_mark();  // get mark within main instrs section
1808 
1809   // Note that the code buffer's insts_mark is always relative to insts.
1810   // That's why we must use the macroassembler to generate a stub.
1811   MacroAssembler _masm(&cbuf);
1812 
1813   address base =
1814   __ start_a_stub(Compile::MAX_stubs_size);
1815   if (base == NULL)  return;  // CodeBuffer::expand failed
1816   // static stub relocation stores the instruction address of the call
1817   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM64);
1818   // static stub relocation also tags the methodOop in the code-stream.
1819   __ movoop(rbx, (jobject) NULL);  // method is zapped till fixup time
1820   // This is recognized as unresolved by relocs/nativeinst/ic code
1821   __ jump(RuntimeAddress(__ pc()));
1822 
1823   // Update current stubs pointer and restore insts_end.
1824   __ end_a_stub();
1825 }
1826 
1827 // size of call stub, compiled java to interpretor
1828 uint size_java_to_interp()
1829 {
1830   return 15;  // movq (1+1+8); jmp (1+4)
1831 }
1832 
1833 // relocation entries for call stub, compiled java to interpretor
1834 uint reloc_java_to_interp()
1835 {
1836   return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1837 }
1838 
1839 //=============================================================================
1840 #ifndef PRODUCT
1841 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1842 {
1843   if (UseCompressedOops) {
1844     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1845     if (Universe::narrow_oop_shift() != 0) {
1846       st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1");
1847     }
1848     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1849   } else {
1850     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1851                  "# Inline cache check");
1852   }
1853   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1854   st->print_cr("\tnop\t# nops to align entry point");
1855 }
1856 #endif
1857 
1858 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1859 {
1860   MacroAssembler masm(&cbuf);
1861   uint insts_size = cbuf.insts_size();
1862   if (UseCompressedOops) {
1863     masm.load_klass(rscratch1, j_rarg0);
1864     masm.cmpptr(rax, rscratch1);
1865   } else {
1866     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1867   }
1868 
1869   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1870 
1871   /* WARNING these NOPs are critical so that verified entry point is properly
1872      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1873   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1874   if (OptoBreakpoint) {
1875     // Leave space for int3
1876     nops_cnt -= 1;
1877   }
1878   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1879   if (nops_cnt > 0)
1880     masm.nop(nops_cnt);
1881 }
1882 
1883 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1884 {
1885   return MachNode::size(ra_); // too many variables; just compute it
1886                               // the hard way
1887 }
1888 
1889 
1890 //=============================================================================
1891 uint size_exception_handler()
1892 {
1893   // NativeCall instruction size is the same as NativeJump.
1894   // Note that this value is also credited (in output.cpp) to
1895   // the size of the code section.
1896   return NativeJump::instruction_size;
1897 }
1898 
1899 // Emit exception handler code.
1900 int emit_exception_handler(CodeBuffer& cbuf)
1901 {
1902 
1903   // Note that the code buffer's insts_mark is always relative to insts.
1904   // That's why we must use the macroassembler to generate a handler.
1905   MacroAssembler _masm(&cbuf);
1906   address base =
1907   __ start_a_stub(size_exception_handler());
1908   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1909   int offset = __ offset();
1910   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1911   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1912   __ end_a_stub();
1913   return offset;
1914 }
1915 
1916 uint size_deopt_handler()
1917 {
1918   // three 5 byte instructions
1919   return 15;
1920 }
1921 
1922 // Emit deopt handler code.
1923 int emit_deopt_handler(CodeBuffer& cbuf)
1924 {
1925 
1926   // Note that the code buffer's insts_mark is always relative to insts.
1927   // That's why we must use the macroassembler to generate a handler.
1928   MacroAssembler _masm(&cbuf);
1929   address base =
1930   __ start_a_stub(size_deopt_handler());
1931   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1932   int offset = __ offset();
1933   address the_pc = (address) __ pc();
1934   Label next;
1935   // push a "the_pc" on the stack without destroying any registers
1936   // as they all may be live.
1937 
1938   // push address of "next"
1939   __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1940   __ bind(next);
1941   // adjust it so it matches "the_pc"
1942   __ subptr(Address(rsp, 0), __ offset() - offset);
1943   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1944   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1945   __ end_a_stub();
1946   return offset;
1947 }
1948 
1949 
1950 const bool Matcher::match_rule_supported(int opcode) {
1951   if (!has_match_rule(opcode))
1952     return false;
1953 
1954   return true;  // Per default match rules are supported.
1955 }
1956 
1957 int Matcher::regnum_to_fpu_offset(int regnum)
1958 {
1959   return regnum - 32; // The FP registers are in the second chunk
1960 }
1961 
1962 // This is UltraSparc specific, true just means we have fast l2f conversion
1963 const bool Matcher::convL2FSupported(void) {
1964   return true;
1965 }
1966 
1967 // Vector width in bytes
1968 const uint Matcher::vector_width_in_bytes(void) {
1969   return 8;
1970 }
1971 
1972 // Vector ideal reg
1973 const uint Matcher::vector_ideal_reg(void) {
1974   return Op_RegD;
1975 }
1976 
1977 // Is this branch offset short enough that a short branch can be used?
1978 //
1979 // NOTE: If the platform does not provide any short branch variants, then
1980 //       this method should return false for offset 0.
1981 bool Matcher::is_short_branch_offset(int rule, int offset) {
1982   // the short version of jmpConUCF2 contains multiple branches,
1983   // making the reach slightly less
1984   if (rule == jmpConUCF2_rule)
1985     return (-126 <= offset && offset <= 125);
1986   return (-128 <= offset && offset <= 127);
1987 }
1988 
1989 const bool Matcher::isSimpleConstant64(jlong value) {
1990   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1991   //return value == (int) value;  // Cf. storeImmL and immL32.
1992 
1993   // Probably always true, even if a temp register is required.
1994   return true;
1995 }
1996 
1997 // The ecx parameter to rep stosq for the ClearArray node is in words.
1998 const bool Matcher::init_array_count_is_in_bytes = false;
1999 
2000 // Threshold size for cleararray.
2001 const int Matcher::init_array_short_size = 8 * BytesPerLong;
2002 
2003 // Should the Matcher clone shifts on addressing modes, expecting them
2004 // to be subsumed into complex addressing expressions or compute them
2005 // into registers?  True for Intel but false for most RISCs
2006 const bool Matcher::clone_shift_expressions = true;
2007 
2008 bool Matcher::narrow_oop_use_complex_address() {
2009   assert(UseCompressedOops, "only for compressed oops code");
2010   return (LogMinObjAlignmentInBytes <= 3);
2011 }
2012 
2013 // Is it better to copy float constants, or load them directly from
2014 // memory?  Intel can load a float constant from a direct address,
2015 // requiring no extra registers.  Most RISCs will have to materialize
2016 // an address into a register first, so they would do better to copy
2017 // the constant from stack.
2018 const bool Matcher::rematerialize_float_constants = true; // XXX
2019 
2020 // If CPU can load and store mis-aligned doubles directly then no
2021 // fixup is needed.  Else we split the double into 2 integer pieces
2022 // and move it piece-by-piece.  Only happens when passing doubles into
2023 // C code as the Java calling convention forces doubles to be aligned.
2024 const bool Matcher::misaligned_doubles_ok = true;
2025 
2026 // No-op on amd64
2027 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
2028 
2029 // Advertise here if the CPU requires explicit rounding operations to
2030 // implement the UseStrictFP mode.
2031 const bool Matcher::strict_fp_requires_explicit_rounding = true;
2032 
2033 // Are floats conerted to double when stored to stack during deoptimization?
2034 // On x64 it is stored without convertion so we can use normal access.
2035 bool Matcher::float_in_double() { return false; }
2036 
2037 // Do ints take an entire long register or just half?
2038 const bool Matcher::int_in_long = true;
2039 
2040 // Return whether or not this register is ever used as an argument.
2041 // This function is used on startup to build the trampoline stubs in
2042 // generateOptoStub.  Registers not mentioned will be killed by the VM
2043 // call in the trampoline, and arguments in those registers not be
2044 // available to the callee.
2045 bool Matcher::can_be_java_arg(int reg)
2046 {
2047   return
2048     reg ==  RDI_num || reg ==  RDI_H_num ||
2049     reg ==  RSI_num || reg ==  RSI_H_num ||
2050     reg ==  RDX_num || reg ==  RDX_H_num ||
2051     reg ==  RCX_num || reg ==  RCX_H_num ||
2052     reg ==   R8_num || reg ==   R8_H_num ||
2053     reg ==   R9_num || reg ==   R9_H_num ||
2054     reg ==  R12_num || reg ==  R12_H_num ||
2055     reg == XMM0_num || reg == XMM0_H_num ||
2056     reg == XMM1_num || reg == XMM1_H_num ||
2057     reg == XMM2_num || reg == XMM2_H_num ||
2058     reg == XMM3_num || reg == XMM3_H_num ||
2059     reg == XMM4_num || reg == XMM4_H_num ||
2060     reg == XMM5_num || reg == XMM5_H_num ||
2061     reg == XMM6_num || reg == XMM6_H_num ||
2062     reg == XMM7_num || reg == XMM7_H_num;
2063 }
2064 
2065 bool Matcher::is_spillable_arg(int reg)
2066 {
2067   return can_be_java_arg(reg);
2068 }
2069 
2070 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2071   // In 64 bit mode a code which use multiply when
2072   // devisor is constant is faster than hardware
2073   // DIV instruction (it uses MulHiL).
2074   return false;
2075 }
2076 
2077 // Register for DIVI projection of divmodI
2078 RegMask Matcher::divI_proj_mask() {
2079   return INT_RAX_REG_mask;
2080 }
2081 
2082 // Register for MODI projection of divmodI
2083 RegMask Matcher::modI_proj_mask() {
2084   return INT_RDX_REG_mask;
2085 }
2086 
2087 // Register for DIVL projection of divmodL
2088 RegMask Matcher::divL_proj_mask() {
2089   return LONG_RAX_REG_mask;
2090 }
2091 
2092 // Register for MODL projection of divmodL
2093 RegMask Matcher::modL_proj_mask() {
2094   return LONG_RDX_REG_mask;
2095 }
2096 
2097 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2098   return PTR_RBP_REG_mask;
2099 }
2100 
2101 static Address build_address(int b, int i, int s, int d) {
2102   Register index = as_Register(i);
2103   Address::ScaleFactor scale = (Address::ScaleFactor)s;
2104   if (index == rsp) {
2105     index = noreg;
2106     scale = Address::no_scale;
2107   }
2108   Address addr(as_Register(b), index, scale, d);
2109   return addr;
2110 }
2111 
2112 %}
2113 
2114 //----------ENCODING BLOCK-----------------------------------------------------
2115 // This block specifies the encoding classes used by the compiler to
2116 // output byte streams.  Encoding classes are parameterized macros
2117 // used by Machine Instruction Nodes in order to generate the bit
2118 // encoding of the instruction.  Operands specify their base encoding
2119 // interface with the interface keyword.  There are currently
2120 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2121 // COND_INTER.  REG_INTER causes an operand to generate a function
2122 // which returns its register number when queried.  CONST_INTER causes
2123 // an operand to generate a function which returns the value of the
2124 // constant when queried.  MEMORY_INTER causes an operand to generate
2125 // four functions which return the Base Register, the Index Register,
2126 // the Scale Value, and the Offset Value of the operand when queried.
2127 // COND_INTER causes an operand to generate six functions which return
2128 // the encoding code (ie - encoding bits for the instruction)
2129 // associated with each basic boolean condition for a conditional
2130 // instruction.
2131 //
2132 // Instructions specify two basic values for encoding.  Again, a
2133 // function is available to check if the constant displacement is an
2134 // oop. They use the ins_encode keyword to specify their encoding
2135 // classes (which must be a sequence of enc_class names, and their
2136 // parameters, specified in the encoding block), and they use the
2137 // opcode keyword to specify, in order, their primary, secondary, and
2138 // tertiary opcode.  Only the opcode sections which a particular
2139 // instruction needs for encoding need to be specified.
2140 encode %{
2141   // Build emit functions for each basic byte or larger field in the
2142   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2143   // from C++ code in the enc_class source block.  Emit functions will
2144   // live in the main source block for now.  In future, we can
2145   // generalize this by adding a syntax that specifies the sizes of
2146   // fields in an order, so that the adlc can build the emit functions
2147   // automagically
2148 
2149   // Emit primary opcode
2150   enc_class OpcP
2151   %{
2152     emit_opcode(cbuf, $primary);
2153   %}
2154 
2155   // Emit secondary opcode
2156   enc_class OpcS
2157   %{
2158     emit_opcode(cbuf, $secondary);
2159   %}
2160 
2161   // Emit tertiary opcode
2162   enc_class OpcT
2163   %{
2164     emit_opcode(cbuf, $tertiary);
2165   %}
2166 
2167   // Emit opcode directly
2168   enc_class Opcode(immI d8)
2169   %{
2170     emit_opcode(cbuf, $d8$$constant);
2171   %}
2172 
2173   // Emit size prefix
2174   enc_class SizePrefix
2175   %{
2176     emit_opcode(cbuf, 0x66);
2177   %}
2178 
2179   enc_class reg(rRegI reg)
2180   %{
2181     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
2182   %}
2183 
2184   enc_class reg_reg(rRegI dst, rRegI src)
2185   %{
2186     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2187   %}
2188 
2189   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
2190   %{
2191     emit_opcode(cbuf, $opcode$$constant);
2192     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2193   %}
2194 
2195   enc_class cmpfp_fixup()
2196   %{
2197     // jnp,s exit
2198     emit_opcode(cbuf, 0x7B);
2199     emit_d8(cbuf, 0x0A);
2200 
2201     // pushfq
2202     emit_opcode(cbuf, 0x9C);
2203 
2204     // andq $0xffffff2b, (%rsp)
2205     emit_opcode(cbuf, Assembler::REX_W);
2206     emit_opcode(cbuf, 0x81);
2207     emit_opcode(cbuf, 0x24);
2208     emit_opcode(cbuf, 0x24);
2209     emit_d32(cbuf, 0xffffff2b);
2210 
2211     // popfq
2212     emit_opcode(cbuf, 0x9D);
2213 
2214     // nop (target for branch to avoid branch to branch)
2215     emit_opcode(cbuf, 0x90);
2216   %}
2217 
2218   enc_class cmpfp3(rRegI dst)
2219   %{
2220     int dstenc = $dst$$reg;
2221 
2222     // movl $dst, -1
2223     if (dstenc >= 8) {
2224       emit_opcode(cbuf, Assembler::REX_B);
2225     }
2226     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2227     emit_d32(cbuf, -1);
2228 
2229     // jp,s done
2230     emit_opcode(cbuf, 0x7A);
2231     emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
2232 
2233     // jb,s done
2234     emit_opcode(cbuf, 0x72);
2235     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2236 
2237     // setne $dst
2238     if (dstenc >= 4) {
2239       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2240     }
2241     emit_opcode(cbuf, 0x0F);
2242     emit_opcode(cbuf, 0x95);
2243     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2244 
2245     // movzbl $dst, $dst
2246     if (dstenc >= 4) {
2247       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2248     }
2249     emit_opcode(cbuf, 0x0F);
2250     emit_opcode(cbuf, 0xB6);
2251     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2252   %}
2253 
2254   enc_class cdql_enc(no_rax_rdx_RegI div)
2255   %{
2256     // Full implementation of Java idiv and irem; checks for
2257     // special case as described in JVM spec., p.243 & p.271.
2258     //
2259     //         normal case                           special case
2260     //
2261     // input : rax: dividend                         min_int
2262     //         reg: divisor                          -1
2263     //
2264     // output: rax: quotient  (= rax idiv reg)       min_int
2265     //         rdx: remainder (= rax irem reg)       0
2266     //
2267     //  Code sequnce:
2268     //
2269     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
2270     //    5:   75 07/08                jne    e <normal>
2271     //    7:   33 d2                   xor    %edx,%edx
2272     //  [div >= 8 -> offset + 1]
2273     //  [REX_B]
2274     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
2275     //    c:   74 03/04                je     11 <done>
2276     // 000000000000000e <normal>:
2277     //    e:   99                      cltd
2278     //  [div >= 8 -> offset + 1]
2279     //  [REX_B]
2280     //    f:   f7 f9                   idiv   $div
2281     // 0000000000000011 <done>:
2282 
2283     // cmp    $0x80000000,%eax
2284     emit_opcode(cbuf, 0x3d);
2285     emit_d8(cbuf, 0x00);
2286     emit_d8(cbuf, 0x00);
2287     emit_d8(cbuf, 0x00);
2288     emit_d8(cbuf, 0x80);
2289 
2290     // jne    e <normal>
2291     emit_opcode(cbuf, 0x75);
2292     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
2293 
2294     // xor    %edx,%edx
2295     emit_opcode(cbuf, 0x33);
2296     emit_d8(cbuf, 0xD2);
2297 
2298     // cmp    $0xffffffffffffffff,%ecx
2299     if ($div$$reg >= 8) {
2300       emit_opcode(cbuf, Assembler::REX_B);
2301     }
2302     emit_opcode(cbuf, 0x83);
2303     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2304     emit_d8(cbuf, 0xFF);
2305 
2306     // je     11 <done>
2307     emit_opcode(cbuf, 0x74);
2308     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
2309 
2310     // <normal>
2311     // cltd
2312     emit_opcode(cbuf, 0x99);
2313 
2314     // idivl (note: must be emitted by the user of this rule)
2315     // <done>
2316   %}
2317 
2318   enc_class cdqq_enc(no_rax_rdx_RegL div)
2319   %{
2320     // Full implementation of Java ldiv and lrem; checks for
2321     // special case as described in JVM spec., p.243 & p.271.
2322     //
2323     //         normal case                           special case
2324     //
2325     // input : rax: dividend                         min_long
2326     //         reg: divisor                          -1
2327     //
2328     // output: rax: quotient  (= rax idiv reg)       min_long
2329     //         rdx: remainder (= rax irem reg)       0
2330     //
2331     //  Code sequnce:
2332     //
2333     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
2334     //    7:   00 00 80
2335     //    a:   48 39 d0                cmp    %rdx,%rax
2336     //    d:   75 08                   jne    17 <normal>
2337     //    f:   33 d2                   xor    %edx,%edx
2338     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
2339     //   15:   74 05                   je     1c <done>
2340     // 0000000000000017 <normal>:
2341     //   17:   48 99                   cqto
2342     //   19:   48 f7 f9                idiv   $div
2343     // 000000000000001c <done>:
2344 
2345     // mov    $0x8000000000000000,%rdx
2346     emit_opcode(cbuf, Assembler::REX_W);
2347     emit_opcode(cbuf, 0xBA);
2348     emit_d8(cbuf, 0x00);
2349     emit_d8(cbuf, 0x00);
2350     emit_d8(cbuf, 0x00);
2351     emit_d8(cbuf, 0x00);
2352     emit_d8(cbuf, 0x00);
2353     emit_d8(cbuf, 0x00);
2354     emit_d8(cbuf, 0x00);
2355     emit_d8(cbuf, 0x80);
2356 
2357     // cmp    %rdx,%rax
2358     emit_opcode(cbuf, Assembler::REX_W);
2359     emit_opcode(cbuf, 0x39);
2360     emit_d8(cbuf, 0xD0);
2361 
2362     // jne    17 <normal>
2363     emit_opcode(cbuf, 0x75);
2364     emit_d8(cbuf, 0x08);
2365 
2366     // xor    %edx,%edx
2367     emit_opcode(cbuf, 0x33);
2368     emit_d8(cbuf, 0xD2);
2369 
2370     // cmp    $0xffffffffffffffff,$div
2371     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
2372     emit_opcode(cbuf, 0x83);
2373     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
2374     emit_d8(cbuf, 0xFF);
2375 
2376     // je     1e <done>
2377     emit_opcode(cbuf, 0x74);
2378     emit_d8(cbuf, 0x05);
2379 
2380     // <normal>
2381     // cqto
2382     emit_opcode(cbuf, Assembler::REX_W);
2383     emit_opcode(cbuf, 0x99);
2384 
2385     // idivq (note: must be emitted by the user of this rule)
2386     // <done>
2387   %}
2388 
2389   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2390   enc_class OpcSE(immI imm)
2391   %{
2392     // Emit primary opcode and set sign-extend bit
2393     // Check for 8-bit immediate, and set sign extend bit in opcode
2394     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2395       emit_opcode(cbuf, $primary | 0x02);
2396     } else {
2397       // 32-bit immediate
2398       emit_opcode(cbuf, $primary);
2399     }
2400   %}
2401 
2402   enc_class OpcSErm(rRegI dst, immI imm)
2403   %{
2404     // OpcSEr/m
2405     int dstenc = $dst$$reg;
2406     if (dstenc >= 8) {
2407       emit_opcode(cbuf, Assembler::REX_B);
2408       dstenc -= 8;
2409     }
2410     // Emit primary opcode and set sign-extend bit
2411     // Check for 8-bit immediate, and set sign extend bit in opcode
2412     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2413       emit_opcode(cbuf, $primary | 0x02);
2414     } else {
2415       // 32-bit immediate
2416       emit_opcode(cbuf, $primary);
2417     }
2418     // Emit r/m byte with secondary opcode, after primary opcode.
2419     emit_rm(cbuf, 0x3, $secondary, dstenc);
2420   %}
2421 
2422   enc_class OpcSErm_wide(rRegL dst, immI imm)
2423   %{
2424     // OpcSEr/m
2425     int dstenc = $dst$$reg;
2426     if (dstenc < 8) {
2427       emit_opcode(cbuf, Assembler::REX_W);
2428     } else {
2429       emit_opcode(cbuf, Assembler::REX_WB);
2430       dstenc -= 8;
2431     }
2432     // Emit primary opcode and set sign-extend bit
2433     // Check for 8-bit immediate, and set sign extend bit in opcode
2434     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2435       emit_opcode(cbuf, $primary | 0x02);
2436     } else {
2437       // 32-bit immediate
2438       emit_opcode(cbuf, $primary);
2439     }
2440     // Emit r/m byte with secondary opcode, after primary opcode.
2441     emit_rm(cbuf, 0x3, $secondary, dstenc);
2442   %}
2443 
2444   enc_class Con8or32(immI imm)
2445   %{
2446     // Check for 8-bit immediate, and set sign extend bit in opcode
2447     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2448       $$$emit8$imm$$constant;
2449     } else {
2450       // 32-bit immediate
2451       $$$emit32$imm$$constant;
2452     }
2453   %}
2454 
2455   enc_class Lbl(label labl)
2456   %{
2457     // JMP, CALL
2458     Label* l = $labl$$label;
2459     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2460   %}
2461 
2462   enc_class LblShort(label labl)
2463   %{
2464     // JMP, CALL
2465     Label* l = $labl$$label;
2466     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2467     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2468     emit_d8(cbuf, disp);
2469   %}
2470 
2471   enc_class opc2_reg(rRegI dst)
2472   %{
2473     // BSWAP
2474     emit_cc(cbuf, $secondary, $dst$$reg);
2475   %}
2476 
2477   enc_class opc3_reg(rRegI dst)
2478   %{
2479     // BSWAP
2480     emit_cc(cbuf, $tertiary, $dst$$reg);
2481   %}
2482 
2483   enc_class reg_opc(rRegI div)
2484   %{
2485     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2486     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2487   %}
2488 
2489   enc_class Jcc(cmpOp cop, label labl)
2490   %{
2491     // JCC
2492     Label* l = $labl$$label;
2493     $$$emit8$primary;
2494     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2495     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
2496   %}
2497 
2498   enc_class JccShort (cmpOp cop, label labl)
2499   %{
2500   // JCC
2501     Label *l = $labl$$label;
2502     emit_cc(cbuf, $primary, $cop$$cmpcode);
2503     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
2504     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
2505     emit_d8(cbuf, disp);
2506   %}
2507 
2508   enc_class enc_cmov(cmpOp cop)
2509   %{
2510     // CMOV
2511     $$$emit8$primary;
2512     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2513   %}
2514 
2515   enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
2516   %{
2517     // Invert sense of branch from sense of cmov
2518     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2519     emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
2520                   ? (UseXmmRegToRegMoveAll ? 3 : 4)
2521                   : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
2522     // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
2523     if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
2524     if ($dst$$reg < 8) {
2525       if ($src$$reg >= 8) {
2526         emit_opcode(cbuf, Assembler::REX_B);
2527       }
2528     } else {
2529       if ($src$$reg < 8) {
2530         emit_opcode(cbuf, Assembler::REX_R);
2531       } else {
2532         emit_opcode(cbuf, Assembler::REX_RB);
2533       }
2534     }
2535     emit_opcode(cbuf, 0x0F);
2536     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2537     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2538   %}
2539 
2540   enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
2541   %{
2542     // Invert sense of branch from sense of cmov
2543     emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
2544     emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
2545 
2546     //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
2547     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
2548     if ($dst$$reg < 8) {
2549       if ($src$$reg >= 8) {
2550         emit_opcode(cbuf, Assembler::REX_B);
2551       }
2552     } else {
2553       if ($src$$reg < 8) {
2554         emit_opcode(cbuf, Assembler::REX_R);
2555       } else {
2556         emit_opcode(cbuf, Assembler::REX_RB);
2557       }
2558     }
2559     emit_opcode(cbuf, 0x0F);
2560     emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
2561     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
2562   %}
2563 
2564   enc_class enc_PartialSubtypeCheck()
2565   %{
2566     Register Rrdi = as_Register(RDI_enc); // result register
2567     Register Rrax = as_Register(RAX_enc); // super class
2568     Register Rrcx = as_Register(RCX_enc); // killed
2569     Register Rrsi = as_Register(RSI_enc); // sub class
2570     Label miss;
2571     const bool set_cond_codes = true;
2572 
2573     MacroAssembler _masm(&cbuf);
2574     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2575                                      NULL, &miss,
2576                                      /*set_cond_codes:*/ true);
2577     if ($primary) {
2578       __ xorptr(Rrdi, Rrdi);
2579     }
2580     __ bind(miss);
2581   %}
2582 
2583   enc_class Java_To_Interpreter(method meth)
2584   %{
2585     // CALL Java_To_Interpreter
2586     // This is the instruction starting address for relocation info.
2587     cbuf.set_insts_mark();
2588     $$$emit8$primary;
2589     // CALL directly to the runtime
2590     emit_d32_reloc(cbuf,
2591                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2592                    runtime_call_Relocation::spec(),
2593                    RELOC_DISP32);
2594   %}
2595 
2596   enc_class preserve_SP %{
2597     debug_only(int off0 = cbuf.insts_size());
2598     MacroAssembler _masm(&cbuf);
2599     // RBP is preserved across all calls, even compiled calls.
2600     // Use it to preserve RSP in places where the callee might change the SP.
2601     __ movptr(rbp_mh_SP_save, rsp);
2602     debug_only(int off1 = cbuf.insts_size());
2603     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
2604   %}
2605 
2606   enc_class restore_SP %{
2607     MacroAssembler _masm(&cbuf);
2608     __ movptr(rsp, rbp_mh_SP_save);
2609   %}
2610 
2611   enc_class Java_Static_Call(method meth)
2612   %{
2613     // JAVA STATIC CALL
2614     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2615     // determine who we intended to call.
2616     cbuf.set_insts_mark();
2617     $$$emit8$primary;
2618 
2619     if (!_method) {
2620       emit_d32_reloc(cbuf,
2621                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2622                      runtime_call_Relocation::spec(),
2623                      RELOC_DISP32);
2624     } else if (_optimized_virtual) {
2625       emit_d32_reloc(cbuf,
2626                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2627                      opt_virtual_call_Relocation::spec(),
2628                      RELOC_DISP32);
2629     } else {
2630       emit_d32_reloc(cbuf,
2631                      (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2632                      static_call_Relocation::spec(),
2633                      RELOC_DISP32);
2634     }
2635     if (_method) {
2636       // Emit stub for static call
2637       emit_java_to_interp(cbuf);
2638     }
2639   %}
2640 
2641   enc_class Java_Dynamic_Call(method meth)
2642   %{
2643     // JAVA DYNAMIC CALL
2644     // !!!!!
2645     // Generate  "movq rax, -1", placeholder instruction to load oop-info
2646     // emit_call_dynamic_prologue( cbuf );
2647     cbuf.set_insts_mark();
2648 
2649     // movq rax, -1
2650     emit_opcode(cbuf, Assembler::REX_W);
2651     emit_opcode(cbuf, 0xB8 | RAX_enc);
2652     emit_d64_reloc(cbuf,
2653                    (int64_t) Universe::non_oop_word(),
2654                    oop_Relocation::spec_for_immediate(), RELOC_IMM64);
2655     address virtual_call_oop_addr = cbuf.insts_mark();
2656     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
2657     // who we intended to call.
2658     cbuf.set_insts_mark();
2659     $$$emit8$primary;
2660     emit_d32_reloc(cbuf,
2661                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2662                    virtual_call_Relocation::spec(virtual_call_oop_addr),
2663                    RELOC_DISP32);
2664   %}
2665 
2666   enc_class Java_Compiled_Call(method meth)
2667   %{
2668     // JAVA COMPILED CALL
2669     int disp = in_bytes(methodOopDesc:: from_compiled_offset());
2670 
2671     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2672     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2673 
2674     // callq *disp(%rax)
2675     cbuf.set_insts_mark();
2676     $$$emit8$primary;
2677     if (disp < 0x80) {
2678       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2679       emit_d8(cbuf, disp); // Displacement
2680     } else {
2681       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2682       emit_d32(cbuf, disp); // Displacement
2683     }
2684   %}
2685 
2686   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2687   %{
2688     // SAL, SAR, SHR
2689     int dstenc = $dst$$reg;
2690     if (dstenc >= 8) {
2691       emit_opcode(cbuf, Assembler::REX_B);
2692       dstenc -= 8;
2693     }
2694     $$$emit8$primary;
2695     emit_rm(cbuf, 0x3, $secondary, dstenc);
2696     $$$emit8$shift$$constant;
2697   %}
2698 
2699   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2700   %{
2701     // SAL, SAR, SHR
2702     int dstenc = $dst$$reg;
2703     if (dstenc < 8) {
2704       emit_opcode(cbuf, Assembler::REX_W);
2705     } else {
2706       emit_opcode(cbuf, Assembler::REX_WB);
2707       dstenc -= 8;
2708     }
2709     $$$emit8$primary;
2710     emit_rm(cbuf, 0x3, $secondary, dstenc);
2711     $$$emit8$shift$$constant;
2712   %}
2713 
2714   enc_class load_immI(rRegI dst, immI src)
2715   %{
2716     int dstenc = $dst$$reg;
2717     if (dstenc >= 8) {
2718       emit_opcode(cbuf, Assembler::REX_B);
2719       dstenc -= 8;
2720     }
2721     emit_opcode(cbuf, 0xB8 | dstenc);
2722     $$$emit32$src$$constant;
2723   %}
2724 
2725   enc_class load_immL(rRegL dst, immL src)
2726   %{
2727     int dstenc = $dst$$reg;
2728     if (dstenc < 8) {
2729       emit_opcode(cbuf, Assembler::REX_W);
2730     } else {
2731       emit_opcode(cbuf, Assembler::REX_WB);
2732       dstenc -= 8;
2733     }
2734     emit_opcode(cbuf, 0xB8 | dstenc);
2735     emit_d64(cbuf, $src$$constant);
2736   %}
2737 
2738   enc_class load_immUL32(rRegL dst, immUL32 src)
2739   %{
2740     // same as load_immI, but this time we care about zeroes in the high word
2741     int dstenc = $dst$$reg;
2742     if (dstenc >= 8) {
2743       emit_opcode(cbuf, Assembler::REX_B);
2744       dstenc -= 8;
2745     }
2746     emit_opcode(cbuf, 0xB8 | dstenc);
2747     $$$emit32$src$$constant;
2748   %}
2749 
2750   enc_class load_immL32(rRegL dst, immL32 src)
2751   %{
2752     int dstenc = $dst$$reg;
2753     if (dstenc < 8) {
2754       emit_opcode(cbuf, Assembler::REX_W);
2755     } else {
2756       emit_opcode(cbuf, Assembler::REX_WB);
2757       dstenc -= 8;
2758     }
2759     emit_opcode(cbuf, 0xC7);
2760     emit_rm(cbuf, 0x03, 0x00, dstenc);
2761     $$$emit32$src$$constant;
2762   %}
2763 
2764   enc_class load_immP31(rRegP dst, immP32 src)
2765   %{
2766     // same as load_immI, but this time we care about zeroes in the high word
2767     int dstenc = $dst$$reg;
2768     if (dstenc >= 8) {
2769       emit_opcode(cbuf, Assembler::REX_B);
2770       dstenc -= 8;
2771     }
2772     emit_opcode(cbuf, 0xB8 | dstenc);
2773     $$$emit32$src$$constant;
2774   %}
2775 
2776   enc_class load_immP(rRegP dst, immP src)
2777   %{
2778     int dstenc = $dst$$reg;
2779     if (dstenc < 8) {
2780       emit_opcode(cbuf, Assembler::REX_W);
2781     } else {
2782       emit_opcode(cbuf, Assembler::REX_WB);
2783       dstenc -= 8;
2784     }
2785     emit_opcode(cbuf, 0xB8 | dstenc);
2786     // This next line should be generated from ADLC
2787     if ($src->constant_is_oop()) {
2788       emit_d64_reloc(cbuf, $src$$constant, relocInfo::oop_type, RELOC_IMM64);
2789     } else {
2790       emit_d64(cbuf, $src$$constant);
2791     }
2792   %}
2793 
2794   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2795   enc_class enc_copy(rRegI dst, rRegI src)
2796   %{
2797     encode_copy(cbuf, $dst$$reg, $src$$reg);
2798   %}
2799 
2800   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2801   enc_class enc_CopyXD( RegD dst, RegD src ) %{
2802     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2803   %}
2804 
2805   enc_class enc_copy_always(rRegI dst, rRegI src)
2806   %{
2807     int srcenc = $src$$reg;
2808     int dstenc = $dst$$reg;
2809 
2810     if (dstenc < 8) {
2811       if (srcenc >= 8) {
2812         emit_opcode(cbuf, Assembler::REX_B);
2813         srcenc -= 8;
2814       }
2815     } else {
2816       if (srcenc < 8) {
2817         emit_opcode(cbuf, Assembler::REX_R);
2818       } else {
2819         emit_opcode(cbuf, Assembler::REX_RB);
2820         srcenc -= 8;
2821       }
2822       dstenc -= 8;
2823     }
2824 
2825     emit_opcode(cbuf, 0x8B);
2826     emit_rm(cbuf, 0x3, dstenc, srcenc);
2827   %}
2828 
2829   enc_class enc_copy_wide(rRegL dst, rRegL src)
2830   %{
2831     int srcenc = $src$$reg;
2832     int dstenc = $dst$$reg;
2833 
2834     if (dstenc != srcenc) {
2835       if (dstenc < 8) {
2836         if (srcenc < 8) {
2837           emit_opcode(cbuf, Assembler::REX_W);
2838         } else {
2839           emit_opcode(cbuf, Assembler::REX_WB);
2840           srcenc -= 8;
2841         }
2842       } else {
2843         if (srcenc < 8) {
2844           emit_opcode(cbuf, Assembler::REX_WR);
2845         } else {
2846           emit_opcode(cbuf, Assembler::REX_WRB);
2847           srcenc -= 8;
2848         }
2849         dstenc -= 8;
2850       }
2851       emit_opcode(cbuf, 0x8B);
2852       emit_rm(cbuf, 0x3, dstenc, srcenc);
2853     }
2854   %}
2855 
2856   enc_class Con32(immI src)
2857   %{
2858     // Output immediate
2859     $$$emit32$src$$constant;
2860   %}
2861 
2862   enc_class Con64(immL src)
2863   %{
2864     // Output immediate
2865     emit_d64($src$$constant);
2866   %}
2867 
2868   enc_class Con32F_as_bits(immF src)
2869   %{
2870     // Output Float immediate bits
2871     jfloat jf = $src$$constant;
2872     jint jf_as_bits = jint_cast(jf);
2873     emit_d32(cbuf, jf_as_bits);
2874   %}
2875 
2876   enc_class Con16(immI src)
2877   %{
2878     // Output immediate
2879     $$$emit16$src$$constant;
2880   %}
2881 
2882   // How is this different from Con32??? XXX
2883   enc_class Con_d32(immI src)
2884   %{
2885     emit_d32(cbuf,$src$$constant);
2886   %}
2887 
2888   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2889     // Output immediate memory reference
2890     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2891     emit_d32(cbuf, 0x00);
2892   %}
2893 
2894   enc_class lock_prefix()
2895   %{
2896     if (os::is_MP()) {
2897       emit_opcode(cbuf, 0xF0); // lock
2898     }
2899   %}
2900 
2901   enc_class REX_mem(memory mem)
2902   %{
2903     if ($mem$$base >= 8) {
2904       if ($mem$$index < 8) {
2905         emit_opcode(cbuf, Assembler::REX_B);
2906       } else {
2907         emit_opcode(cbuf, Assembler::REX_XB);
2908       }
2909     } else {
2910       if ($mem$$index >= 8) {
2911         emit_opcode(cbuf, Assembler::REX_X);
2912       }
2913     }
2914   %}
2915 
2916   enc_class REX_mem_wide(memory mem)
2917   %{
2918     if ($mem$$base >= 8) {
2919       if ($mem$$index < 8) {
2920         emit_opcode(cbuf, Assembler::REX_WB);
2921       } else {
2922         emit_opcode(cbuf, Assembler::REX_WXB);
2923       }
2924     } else {
2925       if ($mem$$index < 8) {
2926         emit_opcode(cbuf, Assembler::REX_W);
2927       } else {
2928         emit_opcode(cbuf, Assembler::REX_WX);
2929       }
2930     }
2931   %}
2932 
2933   // for byte regs
2934   enc_class REX_breg(rRegI reg)
2935   %{
2936     if ($reg$$reg >= 4) {
2937       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2938     }
2939   %}
2940 
2941   // for byte regs
2942   enc_class REX_reg_breg(rRegI dst, rRegI src)
2943   %{
2944     if ($dst$$reg < 8) {
2945       if ($src$$reg >= 4) {
2946         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2947       }
2948     } else {
2949       if ($src$$reg < 8) {
2950         emit_opcode(cbuf, Assembler::REX_R);
2951       } else {
2952         emit_opcode(cbuf, Assembler::REX_RB);
2953       }
2954     }
2955   %}
2956 
2957   // for byte regs
2958   enc_class REX_breg_mem(rRegI reg, memory mem)
2959   %{
2960     if ($reg$$reg < 8) {
2961       if ($mem$$base < 8) {
2962         if ($mem$$index >= 8) {
2963           emit_opcode(cbuf, Assembler::REX_X);
2964         } else if ($reg$$reg >= 4) {
2965           emit_opcode(cbuf, Assembler::REX);
2966         }
2967       } else {
2968         if ($mem$$index < 8) {
2969           emit_opcode(cbuf, Assembler::REX_B);
2970         } else {
2971           emit_opcode(cbuf, Assembler::REX_XB);
2972         }
2973       }
2974     } else {
2975       if ($mem$$base < 8) {
2976         if ($mem$$index < 8) {
2977           emit_opcode(cbuf, Assembler::REX_R);
2978         } else {
2979           emit_opcode(cbuf, Assembler::REX_RX);
2980         }
2981       } else {
2982         if ($mem$$index < 8) {
2983           emit_opcode(cbuf, Assembler::REX_RB);
2984         } else {
2985           emit_opcode(cbuf, Assembler::REX_RXB);
2986         }
2987       }
2988     }
2989   %}
2990 
2991   enc_class REX_reg(rRegI reg)
2992   %{
2993     if ($reg$$reg >= 8) {
2994       emit_opcode(cbuf, Assembler::REX_B);
2995     }
2996   %}
2997 
2998   enc_class REX_reg_wide(rRegI reg)
2999   %{
3000     if ($reg$$reg < 8) {
3001       emit_opcode(cbuf, Assembler::REX_W);
3002     } else {
3003       emit_opcode(cbuf, Assembler::REX_WB);
3004     }
3005   %}
3006 
3007   enc_class REX_reg_reg(rRegI dst, rRegI src)
3008   %{
3009     if ($dst$$reg < 8) {
3010       if ($src$$reg >= 8) {
3011         emit_opcode(cbuf, Assembler::REX_B);
3012       }
3013     } else {
3014       if ($src$$reg < 8) {
3015         emit_opcode(cbuf, Assembler::REX_R);
3016       } else {
3017         emit_opcode(cbuf, Assembler::REX_RB);
3018       }
3019     }
3020   %}
3021 
3022   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
3023   %{
3024     if ($dst$$reg < 8) {
3025       if ($src$$reg < 8) {
3026         emit_opcode(cbuf, Assembler::REX_W);
3027       } else {
3028         emit_opcode(cbuf, Assembler::REX_WB);
3029       }
3030     } else {
3031       if ($src$$reg < 8) {
3032         emit_opcode(cbuf, Assembler::REX_WR);
3033       } else {
3034         emit_opcode(cbuf, Assembler::REX_WRB);
3035       }
3036     }
3037   %}
3038 
3039   enc_class REX_reg_mem(rRegI reg, memory mem)
3040   %{
3041     if ($reg$$reg < 8) {
3042       if ($mem$$base < 8) {
3043         if ($mem$$index >= 8) {
3044           emit_opcode(cbuf, Assembler::REX_X);
3045         }
3046       } else {
3047         if ($mem$$index < 8) {
3048           emit_opcode(cbuf, Assembler::REX_B);
3049         } else {
3050           emit_opcode(cbuf, Assembler::REX_XB);
3051         }
3052       }
3053     } else {
3054       if ($mem$$base < 8) {
3055         if ($mem$$index < 8) {
3056           emit_opcode(cbuf, Assembler::REX_R);
3057         } else {
3058           emit_opcode(cbuf, Assembler::REX_RX);
3059         }
3060       } else {
3061         if ($mem$$index < 8) {
3062           emit_opcode(cbuf, Assembler::REX_RB);
3063         } else {
3064           emit_opcode(cbuf, Assembler::REX_RXB);
3065         }
3066       }
3067     }
3068   %}
3069 
3070   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
3071   %{
3072     if ($reg$$reg < 8) {
3073       if ($mem$$base < 8) {
3074         if ($mem$$index < 8) {
3075           emit_opcode(cbuf, Assembler::REX_W);
3076         } else {
3077           emit_opcode(cbuf, Assembler::REX_WX);
3078         }
3079       } else {
3080         if ($mem$$index < 8) {
3081           emit_opcode(cbuf, Assembler::REX_WB);
3082         } else {
3083           emit_opcode(cbuf, Assembler::REX_WXB);
3084         }
3085       }
3086     } else {
3087       if ($mem$$base < 8) {
3088         if ($mem$$index < 8) {
3089           emit_opcode(cbuf, Assembler::REX_WR);
3090         } else {
3091           emit_opcode(cbuf, Assembler::REX_WRX);
3092         }
3093       } else {
3094         if ($mem$$index < 8) {
3095           emit_opcode(cbuf, Assembler::REX_WRB);
3096         } else {
3097           emit_opcode(cbuf, Assembler::REX_WRXB);
3098         }
3099       }
3100     }
3101   %}
3102 
3103   enc_class reg_mem(rRegI ereg, memory mem)
3104   %{
3105     // High registers handle in encode_RegMem
3106     int reg = $ereg$$reg;
3107     int base = $mem$$base;
3108     int index = $mem$$index;
3109     int scale = $mem$$scale;
3110     int disp = $mem$$disp;
3111     bool disp_is_oop = $mem->disp_is_oop();
3112 
3113     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_is_oop);
3114   %}
3115 
3116   enc_class RM_opc_mem(immI rm_opcode, memory mem)
3117   %{
3118     int rm_byte_opcode = $rm_opcode$$constant;
3119 
3120     // High registers handle in encode_RegMem
3121     int base = $mem$$base;
3122     int index = $mem$$index;
3123     int scale = $mem$$scale;
3124     int displace = $mem$$disp;
3125 
3126     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when
3127                                             // working with static
3128                                             // globals
3129     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
3130                   disp_is_oop);
3131   %}
3132 
3133   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
3134   %{
3135     int reg_encoding = $dst$$reg;
3136     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
3137     int index        = 0x04;            // 0x04 indicates no index
3138     int scale        = 0x00;            // 0x00 indicates no scale
3139     int displace     = $src1$$constant; // 0x00 indicates no displacement
3140     bool disp_is_oop = false;
3141     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
3142                   disp_is_oop);
3143   %}
3144 
3145   enc_class neg_reg(rRegI dst)
3146   %{
3147     int dstenc = $dst$$reg;
3148     if (dstenc >= 8) {
3149       emit_opcode(cbuf, Assembler::REX_B);
3150       dstenc -= 8;
3151     }
3152     // NEG $dst
3153     emit_opcode(cbuf, 0xF7);
3154     emit_rm(cbuf, 0x3, 0x03, dstenc);
3155   %}
3156 
3157   enc_class neg_reg_wide(rRegI dst)
3158   %{
3159     int dstenc = $dst$$reg;
3160     if (dstenc < 8) {
3161       emit_opcode(cbuf, Assembler::REX_W);
3162     } else {
3163       emit_opcode(cbuf, Assembler::REX_WB);
3164       dstenc -= 8;
3165     }
3166     // NEG $dst
3167     emit_opcode(cbuf, 0xF7);
3168     emit_rm(cbuf, 0x3, 0x03, dstenc);
3169   %}
3170 
3171   enc_class setLT_reg(rRegI dst)
3172   %{
3173     int dstenc = $dst$$reg;
3174     if (dstenc >= 8) {
3175       emit_opcode(cbuf, Assembler::REX_B);
3176       dstenc -= 8;
3177     } else if (dstenc >= 4) {
3178       emit_opcode(cbuf, Assembler::REX);
3179     }
3180     // SETLT $dst
3181     emit_opcode(cbuf, 0x0F);
3182     emit_opcode(cbuf, 0x9C);
3183     emit_rm(cbuf, 0x3, 0x0, dstenc);
3184   %}
3185 
3186   enc_class setNZ_reg(rRegI dst)
3187   %{
3188     int dstenc = $dst$$reg;
3189     if (dstenc >= 8) {
3190       emit_opcode(cbuf, Assembler::REX_B);
3191       dstenc -= 8;
3192     } else if (dstenc >= 4) {
3193       emit_opcode(cbuf, Assembler::REX);
3194     }
3195     // SETNZ $dst
3196     emit_opcode(cbuf, 0x0F);
3197     emit_opcode(cbuf, 0x95);
3198     emit_rm(cbuf, 0x3, 0x0, dstenc);
3199   %}
3200 
3201   enc_class enc_cmpLTP(no_rcx_RegI p, no_rcx_RegI q, no_rcx_RegI y,
3202                        rcx_RegI tmp)
3203   %{
3204     // cadd_cmpLT
3205 
3206     int tmpReg = $tmp$$reg;
3207 
3208     int penc = $p$$reg;
3209     int qenc = $q$$reg;
3210     int yenc = $y$$reg;
3211 
3212     // subl $p,$q
3213     if (penc < 8) {
3214       if (qenc >= 8) {
3215         emit_opcode(cbuf, Assembler::REX_B);
3216       }
3217     } else {
3218       if (qenc < 8) {
3219         emit_opcode(cbuf, Assembler::REX_R);
3220       } else {
3221         emit_opcode(cbuf, Assembler::REX_RB);
3222       }
3223     }
3224     emit_opcode(cbuf, 0x2B);
3225     emit_rm(cbuf, 0x3, penc & 7, qenc & 7);
3226 
3227     // sbbl $tmp, $tmp
3228     emit_opcode(cbuf, 0x1B);
3229     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
3230 
3231     // andl $tmp, $y
3232     if (yenc >= 8) {
3233       emit_opcode(cbuf, Assembler::REX_B);
3234     }
3235     emit_opcode(cbuf, 0x23);
3236     emit_rm(cbuf, 0x3, tmpReg, yenc & 7);
3237 
3238     // addl $p,$tmp
3239     if (penc >= 8) {
3240         emit_opcode(cbuf, Assembler::REX_R);
3241     }
3242     emit_opcode(cbuf, 0x03);
3243     emit_rm(cbuf, 0x3, penc & 7, tmpReg);
3244   %}
3245 
3246   // Compare the lonogs and set -1, 0, or 1 into dst
3247   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
3248   %{
3249     int src1enc = $src1$$reg;
3250     int src2enc = $src2$$reg;
3251     int dstenc = $dst$$reg;
3252 
3253     // cmpq $src1, $src2
3254     if (src1enc < 8) {
3255       if (src2enc < 8) {
3256         emit_opcode(cbuf, Assembler::REX_W);
3257       } else {
3258         emit_opcode(cbuf, Assembler::REX_WB);
3259       }
3260     } else {
3261       if (src2enc < 8) {
3262         emit_opcode(cbuf, Assembler::REX_WR);
3263       } else {
3264         emit_opcode(cbuf, Assembler::REX_WRB);
3265       }
3266     }
3267     emit_opcode(cbuf, 0x3B);
3268     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
3269 
3270     // movl $dst, -1
3271     if (dstenc >= 8) {
3272       emit_opcode(cbuf, Assembler::REX_B);
3273     }
3274     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
3275     emit_d32(cbuf, -1);
3276 
3277     // jl,s done
3278     emit_opcode(cbuf, 0x7C);
3279     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
3280 
3281     // setne $dst
3282     if (dstenc >= 4) {
3283       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
3284     }
3285     emit_opcode(cbuf, 0x0F);
3286     emit_opcode(cbuf, 0x95);
3287     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
3288 
3289     // movzbl $dst, $dst
3290     if (dstenc >= 4) {
3291       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
3292     }
3293     emit_opcode(cbuf, 0x0F);
3294     emit_opcode(cbuf, 0xB6);
3295     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
3296   %}
3297 
3298   enc_class Push_ResultXD(regD dst) %{
3299     int dstenc = $dst$$reg;
3300 
3301     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
3302 
3303     // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
3304     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
3305     if (dstenc >= 8) {
3306       emit_opcode(cbuf, Assembler::REX_R);
3307     }
3308     emit_opcode  (cbuf, 0x0F );
3309     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
3310     encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
3311 
3312     // add rsp,8
3313     emit_opcode(cbuf, Assembler::REX_W);
3314     emit_opcode(cbuf,0x83);
3315     emit_rm(cbuf,0x3, 0x0, RSP_enc);
3316     emit_d8(cbuf,0x08);
3317   %}
3318 
3319   enc_class Push_SrcXD(regD src) %{
3320     int srcenc = $src$$reg;
3321 
3322     // subq rsp,#8
3323     emit_opcode(cbuf, Assembler::REX_W);
3324     emit_opcode(cbuf, 0x83);
3325     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3326     emit_d8(cbuf, 0x8);
3327 
3328     // movsd [rsp],src
3329     emit_opcode(cbuf, 0xF2);
3330     if (srcenc >= 8) {
3331       emit_opcode(cbuf, Assembler::REX_R);
3332     }
3333     emit_opcode(cbuf, 0x0F);
3334     emit_opcode(cbuf, 0x11);
3335     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
3336 
3337     // fldd [rsp]
3338     emit_opcode(cbuf, 0x66);
3339     emit_opcode(cbuf, 0xDD);
3340     encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
3341   %}
3342 
3343 
3344   enc_class movq_ld(regD dst, memory mem) %{
3345     MacroAssembler _masm(&cbuf);
3346     __ movq($dst$$XMMRegister, $mem$$Address);
3347   %}
3348 
3349   enc_class movq_st(memory mem, regD src) %{
3350     MacroAssembler _masm(&cbuf);
3351     __ movq($mem$$Address, $src$$XMMRegister);
3352   %}
3353 
3354   enc_class pshufd_8x8(regF dst, regF src) %{
3355     MacroAssembler _masm(&cbuf);
3356 
3357     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3358     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3359     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3360   %}
3361 
3362   enc_class pshufd_4x16(regF dst, regF src) %{
3363     MacroAssembler _masm(&cbuf);
3364 
3365     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3366   %}
3367 
3368   enc_class pshufd(regD dst, regD src, int mode) %{
3369     MacroAssembler _masm(&cbuf);
3370 
3371     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3372   %}
3373 
3374   enc_class pxor(regD dst, regD src) %{
3375     MacroAssembler _masm(&cbuf);
3376 
3377     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3378   %}
3379 
3380   enc_class mov_i2x(regD dst, rRegI src) %{
3381     MacroAssembler _masm(&cbuf);
3382 
3383     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3384   %}
3385 
3386   // obj: object to lock
3387   // box: box address (header location) -- killed
3388   // tmp: rax -- killed
3389   // scr: rbx -- killed
3390   //
3391   // What follows is a direct transliteration of fast_lock() and fast_unlock()
3392   // from i486.ad.  See that file for comments.
3393   // TODO: where possible switch from movq (r, 0) to movl(r,0) and
3394   // use the shorter encoding.  (Movl clears the high-order 32-bits).
3395 
3396 
3397   enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
3398   %{
3399     Register objReg = as_Register((int)$obj$$reg);
3400     Register boxReg = as_Register((int)$box$$reg);
3401     Register tmpReg = as_Register($tmp$$reg);
3402     Register scrReg = as_Register($scr$$reg);
3403     MacroAssembler masm(&cbuf);
3404 
3405     // Verify uniqueness of register assignments -- necessary but not sufficient
3406     assert (objReg != boxReg && objReg != tmpReg &&
3407             objReg != scrReg && tmpReg != scrReg, "invariant") ;
3408 
3409     if (_counters != NULL) {
3410       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3411     }
3412     if (EmitSync & 1) {
3413         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3414         masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3415         masm.cmpptr(rsp, (int32_t)NULL_WORD) ; 
3416     } else
3417     if (EmitSync & 2) {
3418         Label DONE_LABEL;
3419         if (UseBiasedLocking) {
3420            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3421           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3422         }
3423         // QQQ was movl...
3424         masm.movptr(tmpReg, 0x1);
3425         masm.orptr(tmpReg, Address(objReg, 0));
3426         masm.movptr(Address(boxReg, 0), tmpReg);
3427         if (os::is_MP()) {
3428           masm.lock();
3429         }
3430         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3431         masm.jcc(Assembler::equal, DONE_LABEL);
3432 
3433         // Recursive locking
3434         masm.subptr(tmpReg, rsp);
3435         masm.andptr(tmpReg, 7 - os::vm_page_size());
3436         masm.movptr(Address(boxReg, 0), tmpReg);
3437 
3438         masm.bind(DONE_LABEL);
3439         masm.nop(); // avoid branch to branch
3440     } else {
3441         Label DONE_LABEL, IsInflated, Egress;
3442 
3443         masm.movptr(tmpReg, Address(objReg, 0)) ; 
3444         masm.testl (tmpReg, 0x02) ;         // inflated vs stack-locked|neutral|biased
3445         masm.jcc   (Assembler::notZero, IsInflated) ; 
3446          
3447         // it's stack-locked, biased or neutral
3448         // TODO: optimize markword triage order to reduce the number of
3449         // conditional branches in the most common cases.
3450         // Beware -- there's a subtle invariant that fetch of the markword
3451         // at [FETCH], below, will never observe a biased encoding (*101b).
3452         // If this invariant is not held we'll suffer exclusion (safety) failure.
3453 
3454         if (UseBiasedLocking && !UseOptoBiasInlining) {
3455           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
3456           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
3457         }
3458 
3459         // was q will it destroy high?
3460         masm.orl   (tmpReg, 1) ; 
3461         masm.movptr(Address(boxReg, 0), tmpReg) ;  
3462         if (os::is_MP()) { masm.lock(); } 
3463         masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
3464         if (_counters != NULL) {
3465            masm.cond_inc32(Assembler::equal,
3466                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3467         }
3468         masm.jcc   (Assembler::equal, DONE_LABEL);
3469 
3470         // Recursive locking
3471         masm.subptr(tmpReg, rsp);
3472         masm.andptr(tmpReg, 7 - os::vm_page_size());
3473         masm.movptr(Address(boxReg, 0), tmpReg);
3474         if (_counters != NULL) {
3475            masm.cond_inc32(Assembler::equal,
3476                            ExternalAddress((address) _counters->fast_path_entry_count_addr()));
3477         }
3478         masm.jmp   (DONE_LABEL) ;
3479 
3480         masm.bind  (IsInflated) ;
3481         // It's inflated
3482 
3483         // TODO: someday avoid the ST-before-CAS penalty by
3484         // relocating (deferring) the following ST.
3485         // We should also think about trying a CAS without having
3486         // fetched _owner.  If the CAS is successful we may
3487         // avoid an RTO->RTS upgrade on the $line.
3488         // Without cast to int32_t a movptr will destroy r10 which is typically obj
3489         masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; 
3490 
3491         masm.mov    (boxReg, tmpReg) ; 
3492         masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3493         masm.testptr(tmpReg, tmpReg) ;   
3494         masm.jcc    (Assembler::notZero, DONE_LABEL) ; 
3495 
3496         // It's inflated and appears unlocked
3497         if (os::is_MP()) { masm.lock(); } 
3498         masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3499         // Intentional fall-through into DONE_LABEL ...
3500 
3501         masm.bind  (DONE_LABEL) ;
3502         masm.nop   () ;                 // avoid jmp to jmp
3503     }
3504   %}
3505 
3506   // obj: object to unlock
3507   // box: box address (displaced header location), killed
3508   // RBX: killed tmp; cannot be obj nor box
3509   enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
3510   %{
3511 
3512     Register objReg = as_Register($obj$$reg);
3513     Register boxReg = as_Register($box$$reg);
3514     Register tmpReg = as_Register($tmp$$reg);
3515     MacroAssembler masm(&cbuf);
3516 
3517     if (EmitSync & 4) { 
3518        masm.cmpptr(rsp, 0) ; 
3519     } else
3520     if (EmitSync & 8) {
3521        Label DONE_LABEL;
3522        if (UseBiasedLocking) {
3523          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3524        }
3525 
3526        // Check whether the displaced header is 0
3527        //(=> recursive unlock)
3528        masm.movptr(tmpReg, Address(boxReg, 0));
3529        masm.testptr(tmpReg, tmpReg);
3530        masm.jcc(Assembler::zero, DONE_LABEL);
3531 
3532        // If not recursive lock, reset the header to displaced header
3533        if (os::is_MP()) {
3534          masm.lock();
3535        }
3536        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3537        masm.bind(DONE_LABEL);
3538        masm.nop(); // avoid branch to branch
3539     } else {
3540        Label DONE_LABEL, Stacked, CheckSucc ;
3541 
3542        if (UseBiasedLocking && !UseOptoBiasInlining) {
3543          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3544        }
3545         
3546        masm.movptr(tmpReg, Address(objReg, 0)) ; 
3547        masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; 
3548        masm.jcc   (Assembler::zero, DONE_LABEL) ; 
3549        masm.testl (tmpReg, 0x02) ; 
3550        masm.jcc   (Assembler::zero, Stacked) ; 
3551         
3552        // It's inflated
3553        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3554        masm.xorptr(boxReg, r15_thread) ; 
3555        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 
3556        masm.jcc   (Assembler::notZero, DONE_LABEL) ; 
3557        masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3558        masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3559        masm.jcc   (Assembler::notZero, CheckSucc) ; 
3560        masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; 
3561        masm.jmp   (DONE_LABEL) ; 
3562         
3563        if ((EmitSync & 65536) == 0) { 
3564          Label LSuccess, LGoSlowPath ;
3565          masm.bind  (CheckSucc) ;
3566          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3567          masm.jcc   (Assembler::zero, LGoSlowPath) ;
3568 
3569          // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
3570          // the explicit ST;MEMBAR combination, but masm doesn't currently support
3571          // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
3572          // are all faster when the write buffer is populated.
3573          masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3574          if (os::is_MP()) {
3575             masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
3576          }
3577          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
3578          masm.jcc   (Assembler::notZero, LSuccess) ;
3579 
3580          masm.movptr (boxReg, (int32_t)NULL_WORD) ;                   // box is really EAX
3581          if (os::is_MP()) { masm.lock(); }
3582          masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3583          masm.jcc   (Assembler::notEqual, LSuccess) ;
3584          // Intentional fall-through into slow-path
3585 
3586          masm.bind  (LGoSlowPath) ;
3587          masm.orl   (boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3588          masm.jmp   (DONE_LABEL) ;
3589 
3590          masm.bind  (LSuccess) ;
3591          masm.testl (boxReg, 0) ;                      // set ICC.ZF=1 to indicate success
3592          masm.jmp   (DONE_LABEL) ;
3593        }
3594 
3595        masm.bind  (Stacked) ; 
3596        masm.movptr(tmpReg, Address (boxReg, 0)) ;      // re-fetch
3597        if (os::is_MP()) { masm.lock(); } 
3598        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
3599 
3600        if (EmitSync & 65536) {
3601           masm.bind (CheckSucc) ;
3602        }
3603        masm.bind(DONE_LABEL);
3604        if (EmitSync & 32768) {
3605           masm.nop();                      // avoid branch to branch
3606        }
3607     }
3608   %}
3609 
3610 
3611   enc_class enc_rethrow()
3612   %{
3613     cbuf.set_insts_mark();
3614     emit_opcode(cbuf, 0xE9); // jmp entry
3615     emit_d32_reloc(cbuf,
3616                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
3617                    runtime_call_Relocation::spec(),
3618                    RELOC_DISP32);
3619   %}
3620 
3621   enc_class absF_encoding(regF dst)
3622   %{
3623     int dstenc = $dst$$reg;
3624     address signmask_address = (address) StubRoutines::x86::float_sign_mask();
3625 
3626     cbuf.set_insts_mark();
3627     if (dstenc >= 8) {
3628       emit_opcode(cbuf, Assembler::REX_R);
3629       dstenc -= 8;
3630     }
3631     // XXX reg_mem doesn't support RIP-relative addressing yet
3632     emit_opcode(cbuf, 0x0F);
3633     emit_opcode(cbuf, 0x54);
3634     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3635     emit_d32_reloc(cbuf, signmask_address);
3636   %}
3637 
3638   enc_class absD_encoding(regD dst)
3639   %{
3640     int dstenc = $dst$$reg;
3641     address signmask_address = (address) StubRoutines::x86::double_sign_mask();
3642 
3643     cbuf.set_insts_mark();
3644     emit_opcode(cbuf, 0x66);
3645     if (dstenc >= 8) {
3646       emit_opcode(cbuf, Assembler::REX_R);
3647       dstenc -= 8;
3648     }
3649     // XXX reg_mem doesn't support RIP-relative addressing yet
3650     emit_opcode(cbuf, 0x0F);
3651     emit_opcode(cbuf, 0x54);
3652     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3653     emit_d32_reloc(cbuf, signmask_address);
3654   %}
3655 
3656   enc_class negF_encoding(regF dst)
3657   %{
3658     int dstenc = $dst$$reg;
3659     address signflip_address = (address) StubRoutines::x86::float_sign_flip();
3660 
3661     cbuf.set_insts_mark();
3662     if (dstenc >= 8) {
3663       emit_opcode(cbuf, Assembler::REX_R);
3664       dstenc -= 8;
3665     }
3666     // XXX reg_mem doesn't support RIP-relative addressing yet
3667     emit_opcode(cbuf, 0x0F);
3668     emit_opcode(cbuf, 0x57);
3669     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3670     emit_d32_reloc(cbuf, signflip_address);
3671   %}
3672 
3673   enc_class negD_encoding(regD dst)
3674   %{
3675     int dstenc = $dst$$reg;
3676     address signflip_address = (address) StubRoutines::x86::double_sign_flip();
3677 
3678     cbuf.set_insts_mark();
3679     emit_opcode(cbuf, 0x66);
3680     if (dstenc >= 8) {
3681       emit_opcode(cbuf, Assembler::REX_R);
3682       dstenc -= 8;
3683     }
3684     // XXX reg_mem doesn't support RIP-relative addressing yet
3685     emit_opcode(cbuf, 0x0F);
3686     emit_opcode(cbuf, 0x57);
3687     emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
3688     emit_d32_reloc(cbuf, signflip_address);
3689   %}
3690 
3691   enc_class f2i_fixup(rRegI dst, regF src)
3692   %{
3693     int dstenc = $dst$$reg;
3694     int srcenc = $src$$reg;
3695 
3696     // cmpl $dst, #0x80000000
3697     if (dstenc >= 8) {
3698       emit_opcode(cbuf, Assembler::REX_B);
3699     }
3700     emit_opcode(cbuf, 0x81);
3701     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3702     emit_d32(cbuf, 0x80000000);
3703 
3704     // jne,s done
3705     emit_opcode(cbuf, 0x75);
3706     if (srcenc < 8 && dstenc < 8) {
3707       emit_d8(cbuf, 0xF);
3708     } else if (srcenc >= 8 && dstenc >= 8) {
3709       emit_d8(cbuf, 0x11);
3710     } else {
3711       emit_d8(cbuf, 0x10);
3712     }
3713 
3714     // subq rsp, #8
3715     emit_opcode(cbuf, Assembler::REX_W);
3716     emit_opcode(cbuf, 0x83);
3717     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3718     emit_d8(cbuf, 8);
3719 
3720     // movss [rsp], $src
3721     emit_opcode(cbuf, 0xF3);
3722     if (srcenc >= 8) {
3723       emit_opcode(cbuf, Assembler::REX_R);
3724     }
3725     emit_opcode(cbuf, 0x0F);
3726     emit_opcode(cbuf, 0x11);
3727     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3728 
3729     // call f2i_fixup
3730     cbuf.set_insts_mark();
3731     emit_opcode(cbuf, 0xE8);
3732     emit_d32_reloc(cbuf,
3733                    (int)
3734                    (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
3735                    runtime_call_Relocation::spec(),
3736                    RELOC_DISP32);
3737 
3738     // popq $dst
3739     if (dstenc >= 8) {
3740       emit_opcode(cbuf, Assembler::REX_B);
3741     }
3742     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3743 
3744     // done:
3745   %}
3746 
3747   enc_class f2l_fixup(rRegL dst, regF src)
3748   %{
3749     int dstenc = $dst$$reg;
3750     int srcenc = $src$$reg;
3751     address const_address = (address) StubRoutines::x86::double_sign_flip();
3752 
3753     // cmpq $dst, [0x8000000000000000]
3754     cbuf.set_insts_mark();
3755     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3756     emit_opcode(cbuf, 0x39);
3757     // XXX reg_mem doesn't support RIP-relative addressing yet
3758     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3759     emit_d32_reloc(cbuf, const_address);
3760 
3761 
3762     // jne,s done
3763     emit_opcode(cbuf, 0x75);
3764     if (srcenc < 8 && dstenc < 8) {
3765       emit_d8(cbuf, 0xF);
3766     } else if (srcenc >= 8 && dstenc >= 8) {
3767       emit_d8(cbuf, 0x11);
3768     } else {
3769       emit_d8(cbuf, 0x10);
3770     }
3771 
3772     // subq rsp, #8
3773     emit_opcode(cbuf, Assembler::REX_W);
3774     emit_opcode(cbuf, 0x83);
3775     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3776     emit_d8(cbuf, 8);
3777 
3778     // movss [rsp], $src
3779     emit_opcode(cbuf, 0xF3);
3780     if (srcenc >= 8) {
3781       emit_opcode(cbuf, Assembler::REX_R);
3782     }
3783     emit_opcode(cbuf, 0x0F);
3784     emit_opcode(cbuf, 0x11);
3785     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3786 
3787     // call f2l_fixup
3788     cbuf.set_insts_mark();
3789     emit_opcode(cbuf, 0xE8);
3790     emit_d32_reloc(cbuf,
3791                    (int)
3792                    (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
3793                    runtime_call_Relocation::spec(),
3794                    RELOC_DISP32);
3795 
3796     // popq $dst
3797     if (dstenc >= 8) {
3798       emit_opcode(cbuf, Assembler::REX_B);
3799     }
3800     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3801 
3802     // done:
3803   %}
3804 
3805   enc_class d2i_fixup(rRegI dst, regD src)
3806   %{
3807     int dstenc = $dst$$reg;
3808     int srcenc = $src$$reg;
3809 
3810     // cmpl $dst, #0x80000000
3811     if (dstenc >= 8) {
3812       emit_opcode(cbuf, Assembler::REX_B);
3813     }
3814     emit_opcode(cbuf, 0x81);
3815     emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
3816     emit_d32(cbuf, 0x80000000);
3817 
3818     // jne,s done
3819     emit_opcode(cbuf, 0x75);
3820     if (srcenc < 8 && dstenc < 8) {
3821       emit_d8(cbuf, 0xF);
3822     } else if (srcenc >= 8 && dstenc >= 8) {
3823       emit_d8(cbuf, 0x11);
3824     } else {
3825       emit_d8(cbuf, 0x10);
3826     }
3827 
3828     // subq rsp, #8
3829     emit_opcode(cbuf, Assembler::REX_W);
3830     emit_opcode(cbuf, 0x83);
3831     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3832     emit_d8(cbuf, 8);
3833 
3834     // movsd [rsp], $src
3835     emit_opcode(cbuf, 0xF2);
3836     if (srcenc >= 8) {
3837       emit_opcode(cbuf, Assembler::REX_R);
3838     }
3839     emit_opcode(cbuf, 0x0F);
3840     emit_opcode(cbuf, 0x11);
3841     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3842 
3843     // call d2i_fixup
3844     cbuf.set_insts_mark();
3845     emit_opcode(cbuf, 0xE8);
3846     emit_d32_reloc(cbuf,
3847                    (int)
3848                    (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
3849                    runtime_call_Relocation::spec(),
3850                    RELOC_DISP32);
3851 
3852     // popq $dst
3853     if (dstenc >= 8) {
3854       emit_opcode(cbuf, Assembler::REX_B);
3855     }
3856     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3857 
3858     // done:
3859   %}
3860 
3861   enc_class d2l_fixup(rRegL dst, regD src)
3862   %{
3863     int dstenc = $dst$$reg;
3864     int srcenc = $src$$reg;
3865     address const_address = (address) StubRoutines::x86::double_sign_flip();
3866 
3867     // cmpq $dst, [0x8000000000000000]
3868     cbuf.set_insts_mark();
3869     emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
3870     emit_opcode(cbuf, 0x39);
3871     // XXX reg_mem doesn't support RIP-relative addressing yet
3872     emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
3873     emit_d32_reloc(cbuf, const_address);
3874 
3875 
3876     // jne,s done
3877     emit_opcode(cbuf, 0x75);
3878     if (srcenc < 8 && dstenc < 8) {
3879       emit_d8(cbuf, 0xF);
3880     } else if (srcenc >= 8 && dstenc >= 8) {
3881       emit_d8(cbuf, 0x11);
3882     } else {
3883       emit_d8(cbuf, 0x10);
3884     }
3885 
3886     // subq rsp, #8
3887     emit_opcode(cbuf, Assembler::REX_W);
3888     emit_opcode(cbuf, 0x83);
3889     emit_rm(cbuf, 0x3, 0x5, RSP_enc);
3890     emit_d8(cbuf, 8);
3891 
3892     // movsd [rsp], $src
3893     emit_opcode(cbuf, 0xF2);
3894     if (srcenc >= 8) {
3895       emit_opcode(cbuf, Assembler::REX_R);
3896     }
3897     emit_opcode(cbuf, 0x0F);
3898     emit_opcode(cbuf, 0x11);
3899     encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
3900 
3901     // call d2l_fixup
3902     cbuf.set_insts_mark();
3903     emit_opcode(cbuf, 0xE8);
3904     emit_d32_reloc(cbuf,
3905                    (int)
3906                    (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
3907                    runtime_call_Relocation::spec(),
3908                    RELOC_DISP32);
3909 
3910     // popq $dst
3911     if (dstenc >= 8) {
3912       emit_opcode(cbuf, Assembler::REX_B);
3913     }
3914     emit_opcode(cbuf, 0x58 | (dstenc & 7));
3915 
3916     // done:
3917   %}
3918 
3919   // Safepoint Poll.  This polls the safepoint page, and causes an
3920   // exception if it is not readable. Unfortunately, it kills
3921   // RFLAGS in the process.
3922   enc_class enc_safepoint_poll
3923   %{
3924     // testl %rax, off(%rip) // Opcode + ModRM + Disp32 == 6 bytes
3925     // XXX reg_mem doesn't support RIP-relative addressing yet
3926     cbuf.set_insts_mark();
3927     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); // XXX
3928     emit_opcode(cbuf, 0x85); // testl
3929     emit_rm(cbuf, 0x0, RAX_enc, 0x5); // 00 rax 101 == 0x5
3930     // cbuf.insts_mark() is beginning of instruction
3931     emit_d32_reloc(cbuf, os::get_polling_page());
3932 //                    relocInfo::poll_type,
3933   %}
3934 %}
3935 
3936 
3937 
3938 //----------FRAME--------------------------------------------------------------
3939 // Definition of frame structure and management information.
3940 //
3941 //  S T A C K   L A Y O U T    Allocators stack-slot number
3942 //                             |   (to get allocators register number
3943 //  G  Owned by    |        |  v    add OptoReg::stack0())
3944 //  r   CALLER     |        |
3945 //  o     |        +--------+      pad to even-align allocators stack-slot
3946 //  w     V        |  pad0  |        numbers; owned by CALLER
3947 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3948 //  h     ^        |   in   |  5
3949 //        |        |  args  |  4   Holes in incoming args owned by SELF
3950 //  |     |        |        |  3
3951 //  |     |        +--------+
3952 //  V     |        | old out|      Empty on Intel, window on Sparc
3953 //        |    old |preserve|      Must be even aligned.
3954 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3955 //        |        |   in   |  3   area for Intel ret address
3956 //     Owned by    |preserve|      Empty on Sparc.
3957 //       SELF      +--------+
3958 //        |        |  pad2  |  2   pad to align old SP
3959 //        |        +--------+  1
3960 //        |        | locks  |  0
3961 //        |        +--------+----> OptoReg::stack0(), even aligned
3962 //        |        |  pad1  | 11   pad to align new SP
3963 //        |        +--------+
3964 //        |        |        | 10
3965 //        |        | spills |  9   spills
3966 //        V        |        |  8   (pad0 slot for callee)
3967 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3968 //        ^        |  out   |  7
3969 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3970 //     Owned by    +--------+
3971 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3972 //        |    new |preserve|      Must be even-aligned.
3973 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3974 //        |        |        |
3975 //
3976 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3977 //         known from SELF's arguments and the Java calling convention.
3978 //         Region 6-7 is determined per call site.
3979 // Note 2: If the calling convention leaves holes in the incoming argument
3980 //         area, those holes are owned by SELF.  Holes in the outgoing area
3981 //         are owned by the CALLEE.  Holes should not be nessecary in the
3982 //         incoming area, as the Java calling convention is completely under
3983 //         the control of the AD file.  Doubles can be sorted and packed to
3984 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3985 //         varargs C calling conventions.
3986 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3987 //         even aligned with pad0 as needed.
3988 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3989 //         region 6-11 is even aligned; it may be padded out more so that
3990 //         the region from SP to FP meets the minimum stack alignment.
3991 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3992 //         alignment.  Region 11, pad1, may be dynamically extended so that
3993 //         SP meets the minimum alignment.
3994 
3995 frame
3996 %{
3997   // What direction does stack grow in (assumed to be same for C & Java)
3998   stack_direction(TOWARDS_LOW);
3999 
4000   // These three registers define part of the calling convention
4001   // between compiled code and the interpreter.
4002   inline_cache_reg(RAX);                // Inline Cache Register
4003   interpreter_method_oop_reg(RBX);      // Method Oop Register when
4004                                         // calling interpreter
4005 
4006   // Optional: name the operand used by cisc-spilling to access
4007   // [stack_pointer + offset]
4008   cisc_spilling_operand_name(indOffset32);
4009 
4010   // Number of stack slots consumed by locking an object
4011   sync_stack_slots(2);
4012 
4013   // Compiled code's Frame Pointer
4014   frame_pointer(RSP);
4015 
4016   // Interpreter stores its frame pointer in a register which is
4017   // stored to the stack by I2CAdaptors.
4018   // I2CAdaptors convert from interpreted java to compiled java.
4019   interpreter_frame_pointer(RBP);
4020 
4021   // Stack alignment requirement
4022   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4023 
4024   // Number of stack slots between incoming argument block and the start of
4025   // a new frame.  The PROLOG must add this many slots to the stack.  The
4026   // EPILOG must remove this many slots.  amd64 needs two slots for
4027   // return address.
4028   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
4029 
4030   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4031   // for calls to C.  Supports the var-args backing area for register parms.
4032   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4033 
4034   // The after-PROLOG location of the return address.  Location of
4035   // return address specifies a type (REG or STACK) and a number
4036   // representing the register number (i.e. - use a register name) or
4037   // stack slot.
4038   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4039   // Otherwise, it is above the locks and verification slot and alignment word
4040   return_addr(STACK - 2 +
4041               round_to(2 + 2 * VerifyStackAtCalls +
4042                        Compile::current()->fixed_slots(),
4043                        WordsPerLong * 2));
4044 
4045   // Body of function which returns an integer array locating
4046   // arguments either in registers or in stack slots.  Passed an array
4047   // of ideal registers called "sig" and a "length" count.  Stack-slot
4048   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4049   // arguments for a CALLEE.  Incoming stack arguments are
4050   // automatically biased by the preserve_stack_slots field above.
4051 
4052   calling_convention
4053   %{
4054     // No difference between ingoing/outgoing just pass false
4055     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4056   %}
4057 
4058   c_calling_convention
4059   %{
4060     // This is obviously always outgoing
4061     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4062   %}
4063 
4064   // Location of compiled Java return values.  Same as C for now.
4065   return_value
4066   %{
4067     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4068            "only return normal values");
4069 
4070     static const int lo[Op_RegL + 1] = {
4071       0,
4072       0,
4073       RAX_num,  // Op_RegN
4074       RAX_num,  // Op_RegI
4075       RAX_num,  // Op_RegP
4076       XMM0_num, // Op_RegF
4077       XMM0_num, // Op_RegD
4078       RAX_num   // Op_RegL
4079     };
4080     static const int hi[Op_RegL + 1] = {
4081       0,
4082       0,
4083       OptoReg::Bad, // Op_RegN
4084       OptoReg::Bad, // Op_RegI
4085       RAX_H_num,    // Op_RegP
4086       OptoReg::Bad, // Op_RegF
4087       XMM0_H_num,   // Op_RegD
4088       RAX_H_num     // Op_RegL
4089     };
4090     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
4091     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4092   %}
4093 %}
4094 
4095 //----------ATTRIBUTES---------------------------------------------------------
4096 //----------Operand Attributes-------------------------------------------------
4097 op_attrib op_cost(0);        // Required cost attribute
4098 
4099 //----------Instruction Attributes---------------------------------------------
4100 ins_attrib ins_cost(100);       // Required cost attribute
4101 ins_attrib ins_size(8);         // Required size attribute (in bits)
4102 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4103 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4104                                 // a non-matching short branch variant
4105                                 // of some long branch?
4106 ins_attrib ins_alignment(1);    // Required alignment attribute (must
4107                                 // be a power of 2) specifies the
4108                                 // alignment that some part of the
4109                                 // instruction (not necessarily the
4110                                 // start) requires.  If > 1, a
4111                                 // compute_padding() function must be
4112                                 // provided for the instruction
4113 
4114 //----------OPERANDS-----------------------------------------------------------
4115 // Operand definitions must precede instruction definitions for correct parsing
4116 // in the ADLC because operands constitute user defined types which are used in
4117 // instruction definitions.
4118 
4119 //----------Simple Operands----------------------------------------------------
4120 // Immediate Operands
4121 // Integer Immediate
4122 operand immI()
4123 %{
4124   match(ConI);
4125 
4126   op_cost(10);
4127   format %{ %}
4128   interface(CONST_INTER);
4129 %}
4130 
4131 // Constant for test vs zero
4132 operand immI0()
4133 %{
4134   predicate(n->get_int() == 0);
4135   match(ConI);
4136 
4137   op_cost(0);
4138   format %{ %}
4139   interface(CONST_INTER);
4140 %}
4141 
4142 // Constant for increment
4143 operand immI1()
4144 %{
4145   predicate(n->get_int() == 1);
4146   match(ConI);
4147 
4148   op_cost(0);
4149   format %{ %}
4150   interface(CONST_INTER);
4151 %}
4152 
4153 // Constant for decrement
4154 operand immI_M1()
4155 %{
4156   predicate(n->get_int() == -1);
4157   match(ConI);
4158 
4159   op_cost(0);
4160   format %{ %}
4161   interface(CONST_INTER);
4162 %}
4163 
4164 // Valid scale values for addressing modes
4165 operand immI2()
4166 %{
4167   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4168   match(ConI);
4169 
4170   format %{ %}
4171   interface(CONST_INTER);
4172 %}
4173 
4174 operand immI8()
4175 %{
4176   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4177   match(ConI);
4178 
4179   op_cost(5);
4180   format %{ %}
4181   interface(CONST_INTER);
4182 %}
4183 
4184 operand immI16()
4185 %{
4186   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4187   match(ConI);
4188 
4189   op_cost(10);
4190   format %{ %}
4191   interface(CONST_INTER);
4192 %}
4193 
4194 // Constant for long shifts
4195 operand immI_32()
4196 %{
4197   predicate( n->get_int() == 32 );
4198   match(ConI);
4199 
4200   op_cost(0);
4201   format %{ %}
4202   interface(CONST_INTER);
4203 %}
4204 
4205 // Constant for long shifts
4206 operand immI_64()
4207 %{
4208   predicate( n->get_int() == 64 );
4209   match(ConI);
4210 
4211   op_cost(0);
4212   format %{ %}
4213   interface(CONST_INTER);
4214 %}
4215 
4216 // Pointer Immediate
4217 operand immP()
4218 %{
4219   match(ConP);
4220 
4221   op_cost(10);
4222   format %{ %}
4223   interface(CONST_INTER);
4224 %}
4225 
4226 // NULL Pointer Immediate
4227 operand immP0()
4228 %{
4229   predicate(n->get_ptr() == 0);
4230   match(ConP);
4231 
4232   op_cost(5);
4233   format %{ %}
4234   interface(CONST_INTER);
4235 %}
4236 
4237 // Pointer Immediate
4238 operand immN() %{
4239   match(ConN);
4240 
4241   op_cost(10);
4242   format %{ %}
4243   interface(CONST_INTER);
4244 %}
4245 
4246 // NULL Pointer Immediate
4247 operand immN0() %{
4248   predicate(n->get_narrowcon() == 0);
4249   match(ConN);
4250 
4251   op_cost(5);
4252   format %{ %}
4253   interface(CONST_INTER);
4254 %}
4255 
4256 operand immP31()
4257 %{
4258   predicate(!n->as_Type()->type()->isa_oopptr()
4259             && (n->get_ptr() >> 31) == 0);
4260   match(ConP);
4261 
4262   op_cost(5);
4263   format %{ %}
4264   interface(CONST_INTER);
4265 %}
4266 
4267 
4268 // Long Immediate
4269 operand immL()
4270 %{
4271   match(ConL);
4272 
4273   op_cost(20);
4274   format %{ %}
4275   interface(CONST_INTER);
4276 %}
4277 
4278 // Long Immediate 8-bit
4279 operand immL8()
4280 %{
4281   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4282   match(ConL);
4283 
4284   op_cost(5);
4285   format %{ %}
4286   interface(CONST_INTER);
4287 %}
4288 
4289 // Long Immediate 32-bit unsigned
4290 operand immUL32()
4291 %{
4292   predicate(n->get_long() == (unsigned int) (n->get_long()));
4293   match(ConL);
4294 
4295   op_cost(10);
4296   format %{ %}
4297   interface(CONST_INTER);
4298 %}
4299 
4300 // Long Immediate 32-bit signed
4301 operand immL32()
4302 %{
4303   predicate(n->get_long() == (int) (n->get_long()));
4304   match(ConL);
4305 
4306   op_cost(15);
4307   format %{ %}
4308   interface(CONST_INTER);
4309 %}
4310 
4311 // Long Immediate zero
4312 operand immL0()
4313 %{
4314   predicate(n->get_long() == 0L);
4315   match(ConL);
4316 
4317   op_cost(10);
4318   format %{ %}
4319   interface(CONST_INTER);
4320 %}
4321 
4322 // Constant for increment
4323 operand immL1()
4324 %{
4325   predicate(n->get_long() == 1);
4326   match(ConL);
4327 
4328   format %{ %}
4329   interface(CONST_INTER);
4330 %}
4331 
4332 // Constant for decrement
4333 operand immL_M1()
4334 %{
4335   predicate(n->get_long() == -1);
4336   match(ConL);
4337 
4338   format %{ %}
4339   interface(CONST_INTER);
4340 %}
4341 
4342 // Long Immediate: the value 10
4343 operand immL10()
4344 %{
4345   predicate(n->get_long() == 10);
4346   match(ConL);
4347 
4348   format %{ %}
4349   interface(CONST_INTER);
4350 %}
4351 
4352 // Long immediate from 0 to 127.
4353 // Used for a shorter form of long mul by 10.
4354 operand immL_127()
4355 %{
4356   predicate(0 <= n->get_long() && n->get_long() < 0x80);
4357   match(ConL);
4358 
4359   op_cost(10);
4360   format %{ %}
4361   interface(CONST_INTER);
4362 %}
4363 
4364 // Long Immediate: low 32-bit mask
4365 operand immL_32bits()
4366 %{
4367   predicate(n->get_long() == 0xFFFFFFFFL);
4368   match(ConL);
4369   op_cost(20);
4370 
4371   format %{ %}
4372   interface(CONST_INTER);
4373 %}
4374 
4375 // Float Immediate zero
4376 operand immF0()
4377 %{
4378   predicate(jint_cast(n->getf()) == 0);
4379   match(ConF);
4380 
4381   op_cost(5);
4382   format %{ %}
4383   interface(CONST_INTER);
4384 %}
4385 
4386 // Float Immediate
4387 operand immF()
4388 %{
4389   match(ConF);
4390 
4391   op_cost(15);
4392   format %{ %}
4393   interface(CONST_INTER);
4394 %}
4395 
4396 // Double Immediate zero
4397 operand immD0()
4398 %{
4399   predicate(jlong_cast(n->getd()) == 0);
4400   match(ConD);
4401 
4402   op_cost(5);
4403   format %{ %}
4404   interface(CONST_INTER);
4405 %}
4406 
4407 // Double Immediate
4408 operand immD()
4409 %{
4410   match(ConD);
4411 
4412   op_cost(15);
4413   format %{ %}
4414   interface(CONST_INTER);
4415 %}
4416 
4417 // Immediates for special shifts (sign extend)
4418 
4419 // Constants for increment
4420 operand immI_16()
4421 %{
4422   predicate(n->get_int() == 16);
4423   match(ConI);
4424 
4425   format %{ %}
4426   interface(CONST_INTER);
4427 %}
4428 
4429 operand immI_24()
4430 %{
4431   predicate(n->get_int() == 24);
4432   match(ConI);
4433 
4434   format %{ %}
4435   interface(CONST_INTER);
4436 %}
4437 
4438 // Constant for byte-wide masking
4439 operand immI_255()
4440 %{
4441   predicate(n->get_int() == 255);
4442   match(ConI);
4443 
4444   format %{ %}
4445   interface(CONST_INTER);
4446 %}
4447 
4448 // Constant for short-wide masking
4449 operand immI_65535()
4450 %{
4451   predicate(n->get_int() == 65535);
4452   match(ConI);
4453 
4454   format %{ %}
4455   interface(CONST_INTER);
4456 %}
4457 
4458 // Constant for byte-wide masking
4459 operand immL_255()
4460 %{
4461   predicate(n->get_long() == 255);
4462   match(ConL);
4463 
4464   format %{ %}
4465   interface(CONST_INTER);
4466 %}
4467 
4468 // Constant for short-wide masking
4469 operand immL_65535()
4470 %{
4471   predicate(n->get_long() == 65535);
4472   match(ConL);
4473 
4474   format %{ %}
4475   interface(CONST_INTER);
4476 %}
4477 
4478 // Register Operands
4479 // Integer Register
4480 operand rRegI()
4481 %{
4482   constraint(ALLOC_IN_RC(int_reg));
4483   match(RegI);
4484 
4485   match(rax_RegI);
4486   match(rbx_RegI);
4487   match(rcx_RegI);
4488   match(rdx_RegI);
4489   match(rdi_RegI);
4490 
4491   format %{ %}
4492   interface(REG_INTER);
4493 %}
4494 
4495 // Special Registers
4496 operand rax_RegI()
4497 %{
4498   constraint(ALLOC_IN_RC(int_rax_reg));
4499   match(RegI);
4500   match(rRegI);
4501 
4502   format %{ "RAX" %}
4503   interface(REG_INTER);
4504 %}
4505 
4506 // Special Registers
4507 operand rbx_RegI()
4508 %{
4509   constraint(ALLOC_IN_RC(int_rbx_reg));
4510   match(RegI);
4511   match(rRegI);
4512 
4513   format %{ "RBX" %}
4514   interface(REG_INTER);
4515 %}
4516 
4517 operand rcx_RegI()
4518 %{
4519   constraint(ALLOC_IN_RC(int_rcx_reg));
4520   match(RegI);
4521   match(rRegI);
4522 
4523   format %{ "RCX" %}
4524   interface(REG_INTER);
4525 %}
4526 
4527 operand rdx_RegI()
4528 %{
4529   constraint(ALLOC_IN_RC(int_rdx_reg));
4530   match(RegI);
4531   match(rRegI);
4532 
4533   format %{ "RDX" %}
4534   interface(REG_INTER);
4535 %}
4536 
4537 operand rdi_RegI()
4538 %{
4539   constraint(ALLOC_IN_RC(int_rdi_reg));
4540   match(RegI);
4541   match(rRegI);
4542 
4543   format %{ "RDI" %}
4544   interface(REG_INTER);
4545 %}
4546 
4547 operand no_rcx_RegI()
4548 %{
4549   constraint(ALLOC_IN_RC(int_no_rcx_reg));
4550   match(RegI);
4551   match(rax_RegI);
4552   match(rbx_RegI);
4553   match(rdx_RegI);
4554   match(rdi_RegI);
4555 
4556   format %{ %}
4557   interface(REG_INTER);
4558 %}
4559 
4560 operand no_rax_rdx_RegI()
4561 %{
4562   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
4563   match(RegI);
4564   match(rbx_RegI);
4565   match(rcx_RegI);
4566   match(rdi_RegI);
4567 
4568   format %{ %}
4569   interface(REG_INTER);
4570 %}
4571 
4572 // Pointer Register
4573 operand any_RegP()
4574 %{
4575   constraint(ALLOC_IN_RC(any_reg));
4576   match(RegP);
4577   match(rax_RegP);
4578   match(rbx_RegP);
4579   match(rdi_RegP);
4580   match(rsi_RegP);
4581   match(rbp_RegP);
4582   match(r15_RegP);
4583   match(rRegP);
4584 
4585   format %{ %}
4586   interface(REG_INTER);
4587 %}
4588 
4589 operand rRegP()
4590 %{
4591   constraint(ALLOC_IN_RC(ptr_reg));
4592   match(RegP);
4593   match(rax_RegP);
4594   match(rbx_RegP);
4595   match(rdi_RegP);
4596   match(rsi_RegP);
4597   match(rbp_RegP);
4598   match(r15_RegP);  // See Q&A below about r15_RegP.
4599 
4600   format %{ %}
4601   interface(REG_INTER);
4602 %}
4603 
4604 operand rRegN() %{
4605   constraint(ALLOC_IN_RC(int_reg));
4606   match(RegN);
4607 
4608   format %{ %}
4609   interface(REG_INTER);
4610 %}
4611 
4612 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
4613 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
4614 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
4615 // The output of an instruction is controlled by the allocator, which respects
4616 // register class masks, not match rules.  Unless an instruction mentions
4617 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
4618 // by the allocator as an input.
4619 
4620 operand no_rax_RegP()
4621 %{
4622   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
4623   match(RegP);
4624   match(rbx_RegP);
4625   match(rsi_RegP);
4626   match(rdi_RegP);
4627 
4628   format %{ %}
4629   interface(REG_INTER);
4630 %}
4631 
4632 operand no_rbp_RegP()
4633 %{
4634   constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
4635   match(RegP);
4636   match(rbx_RegP);
4637   match(rsi_RegP);
4638   match(rdi_RegP);
4639 
4640   format %{ %}
4641   interface(REG_INTER);
4642 %}
4643 
4644 operand no_rax_rbx_RegP()
4645 %{
4646   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
4647   match(RegP);
4648   match(rsi_RegP);
4649   match(rdi_RegP);
4650 
4651   format %{ %}
4652   interface(REG_INTER);
4653 %}
4654 
4655 // Special Registers
4656 // Return a pointer value
4657 operand rax_RegP()
4658 %{
4659   constraint(ALLOC_IN_RC(ptr_rax_reg));
4660   match(RegP);
4661   match(rRegP);
4662 
4663   format %{ %}
4664   interface(REG_INTER);
4665 %}
4666 
4667 // Special Registers
4668 // Return a compressed pointer value
4669 operand rax_RegN()
4670 %{
4671   constraint(ALLOC_IN_RC(int_rax_reg));
4672   match(RegN);
4673   match(rRegN);
4674 
4675   format %{ %}
4676   interface(REG_INTER);
4677 %}
4678 
4679 // Used in AtomicAdd
4680 operand rbx_RegP()
4681 %{
4682   constraint(ALLOC_IN_RC(ptr_rbx_reg));
4683   match(RegP);
4684   match(rRegP);
4685 
4686   format %{ %}
4687   interface(REG_INTER);
4688 %}
4689 
4690 operand rsi_RegP()
4691 %{
4692   constraint(ALLOC_IN_RC(ptr_rsi_reg));
4693   match(RegP);
4694   match(rRegP);
4695 
4696   format %{ %}
4697   interface(REG_INTER);
4698 %}
4699 
4700 // Used in rep stosq
4701 operand rdi_RegP()
4702 %{
4703   constraint(ALLOC_IN_RC(ptr_rdi_reg));
4704   match(RegP);
4705   match(rRegP);
4706 
4707   format %{ %}
4708   interface(REG_INTER);
4709 %}
4710 
4711 operand rbp_RegP()
4712 %{
4713   constraint(ALLOC_IN_RC(ptr_rbp_reg));
4714   match(RegP);
4715   match(rRegP);
4716 
4717   format %{ %}
4718   interface(REG_INTER);
4719 %}
4720 
4721 operand r15_RegP()
4722 %{
4723   constraint(ALLOC_IN_RC(ptr_r15_reg));
4724   match(RegP);
4725   match(rRegP);
4726 
4727   format %{ %}
4728   interface(REG_INTER);
4729 %}
4730 
4731 operand rRegL()
4732 %{
4733   constraint(ALLOC_IN_RC(long_reg));
4734   match(RegL);
4735   match(rax_RegL);
4736   match(rdx_RegL);
4737 
4738   format %{ %}
4739   interface(REG_INTER);
4740 %}
4741 
4742 // Special Registers
4743 operand no_rax_rdx_RegL()
4744 %{
4745   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4746   match(RegL);
4747   match(rRegL);
4748 
4749   format %{ %}
4750   interface(REG_INTER);
4751 %}
4752 
4753 operand no_rax_RegL()
4754 %{
4755   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
4756   match(RegL);
4757   match(rRegL);
4758   match(rdx_RegL);
4759 
4760   format %{ %}
4761   interface(REG_INTER);
4762 %}
4763 
4764 operand no_rcx_RegL()
4765 %{
4766   constraint(ALLOC_IN_RC(long_no_rcx_reg));
4767   match(RegL);
4768   match(rRegL);
4769 
4770   format %{ %}
4771   interface(REG_INTER);
4772 %}
4773 
4774 operand rax_RegL()
4775 %{
4776   constraint(ALLOC_IN_RC(long_rax_reg));
4777   match(RegL);
4778   match(rRegL);
4779 
4780   format %{ "RAX" %}
4781   interface(REG_INTER);
4782 %}
4783 
4784 operand rcx_RegL()
4785 %{
4786   constraint(ALLOC_IN_RC(long_rcx_reg));
4787   match(RegL);
4788   match(rRegL);
4789 
4790   format %{ %}
4791   interface(REG_INTER);
4792 %}
4793 
4794 operand rdx_RegL()
4795 %{
4796   constraint(ALLOC_IN_RC(long_rdx_reg));
4797   match(RegL);
4798   match(rRegL);
4799 
4800   format %{ %}
4801   interface(REG_INTER);
4802 %}
4803 
4804 // Flags register, used as output of compare instructions
4805 operand rFlagsReg()
4806 %{
4807   constraint(ALLOC_IN_RC(int_flags));
4808   match(RegFlags);
4809 
4810   format %{ "RFLAGS" %}
4811   interface(REG_INTER);
4812 %}
4813 
4814 // Flags register, used as output of FLOATING POINT compare instructions
4815 operand rFlagsRegU()
4816 %{
4817   constraint(ALLOC_IN_RC(int_flags));
4818   match(RegFlags);
4819 
4820   format %{ "RFLAGS_U" %}
4821   interface(REG_INTER);
4822 %}
4823 
4824 operand rFlagsRegUCF() %{
4825   constraint(ALLOC_IN_RC(int_flags));
4826   match(RegFlags);
4827   predicate(false);
4828 
4829   format %{ "RFLAGS_U_CF" %}
4830   interface(REG_INTER);
4831 %}
4832 
4833 // Float register operands
4834 operand regF()
4835 %{
4836   constraint(ALLOC_IN_RC(float_reg));
4837   match(RegF);
4838 
4839   format %{ %}
4840   interface(REG_INTER);
4841 %}
4842 
4843 // Double register operands
4844 operand regD() 
4845 %{
4846   constraint(ALLOC_IN_RC(double_reg));
4847   match(RegD);
4848 
4849   format %{ %}
4850   interface(REG_INTER);
4851 %}
4852 
4853 
4854 //----------Memory Operands----------------------------------------------------
4855 // Direct Memory Operand
4856 // operand direct(immP addr)
4857 // %{
4858 //   match(addr);
4859 
4860 //   format %{ "[$addr]" %}
4861 //   interface(MEMORY_INTER) %{
4862 //     base(0xFFFFFFFF);
4863 //     index(0x4);
4864 //     scale(0x0);
4865 //     disp($addr);
4866 //   %}
4867 // %}
4868 
4869 // Indirect Memory Operand
4870 operand indirect(any_RegP reg)
4871 %{
4872   constraint(ALLOC_IN_RC(ptr_reg));
4873   match(reg);
4874 
4875   format %{ "[$reg]" %}
4876   interface(MEMORY_INTER) %{
4877     base($reg);
4878     index(0x4);
4879     scale(0x0);
4880     disp(0x0);
4881   %}
4882 %}
4883 
4884 // Indirect Memory Plus Short Offset Operand
4885 operand indOffset8(any_RegP reg, immL8 off)
4886 %{
4887   constraint(ALLOC_IN_RC(ptr_reg));
4888   match(AddP reg off);
4889 
4890   format %{ "[$reg + $off (8-bit)]" %}
4891   interface(MEMORY_INTER) %{
4892     base($reg);
4893     index(0x4);
4894     scale(0x0);
4895     disp($off);
4896   %}
4897 %}
4898 
4899 // Indirect Memory Plus Long Offset Operand
4900 operand indOffset32(any_RegP reg, immL32 off)
4901 %{
4902   constraint(ALLOC_IN_RC(ptr_reg));
4903   match(AddP reg off);
4904 
4905   format %{ "[$reg + $off (32-bit)]" %}
4906   interface(MEMORY_INTER) %{
4907     base($reg);
4908     index(0x4);
4909     scale(0x0);
4910     disp($off);
4911   %}
4912 %}
4913 
4914 // Indirect Memory Plus Index Register Plus Offset Operand
4915 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
4916 %{
4917   constraint(ALLOC_IN_RC(ptr_reg));
4918   match(AddP (AddP reg lreg) off);
4919 
4920   op_cost(10);
4921   format %{"[$reg + $off + $lreg]" %}
4922   interface(MEMORY_INTER) %{
4923     base($reg);
4924     index($lreg);
4925     scale(0x0);
4926     disp($off);
4927   %}
4928 %}
4929 
4930 // Indirect Memory Plus Index Register Plus Offset Operand
4931 operand indIndex(any_RegP reg, rRegL lreg)
4932 %{
4933   constraint(ALLOC_IN_RC(ptr_reg));
4934   match(AddP reg lreg);
4935 
4936   op_cost(10);
4937   format %{"[$reg + $lreg]" %}
4938   interface(MEMORY_INTER) %{
4939     base($reg);
4940     index($lreg);
4941     scale(0x0);
4942     disp(0x0);
4943   %}
4944 %}
4945 
4946 // Indirect Memory Times Scale Plus Index Register
4947 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
4948 %{
4949   constraint(ALLOC_IN_RC(ptr_reg));
4950   match(AddP reg (LShiftL lreg scale));
4951 
4952   op_cost(10);
4953   format %{"[$reg + $lreg << $scale]" %}
4954   interface(MEMORY_INTER) %{
4955     base($reg);
4956     index($lreg);
4957     scale($scale);
4958     disp(0x0);
4959   %}
4960 %}
4961 
4962 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4963 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
4964 %{
4965   constraint(ALLOC_IN_RC(ptr_reg));
4966   match(AddP (AddP reg (LShiftL lreg scale)) off);
4967 
4968   op_cost(10);
4969   format %{"[$reg + $off + $lreg << $scale]" %}
4970   interface(MEMORY_INTER) %{
4971     base($reg);
4972     index($lreg);
4973     scale($scale);
4974     disp($off);
4975   %}
4976 %}
4977 
4978 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4979 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
4980 %{
4981   constraint(ALLOC_IN_RC(ptr_reg));
4982   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4983   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
4984 
4985   op_cost(10);
4986   format %{"[$reg + $off + $idx << $scale]" %}
4987   interface(MEMORY_INTER) %{
4988     base($reg);
4989     index($idx);
4990     scale($scale);
4991     disp($off);
4992   %}
4993 %}
4994 
4995 // Indirect Narrow Oop Plus Offset Operand
4996 // Note: x86 architecture doesn't support "scale * index + offset" without a base
4997 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
4998 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
4999   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
5000   constraint(ALLOC_IN_RC(ptr_reg));
5001   match(AddP (DecodeN reg) off);
5002 
5003   op_cost(10);
5004   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5005   interface(MEMORY_INTER) %{
5006     base(0xc); // R12
5007     index($reg);
5008     scale(0x3);
5009     disp($off);
5010   %}
5011 %}
5012 
5013 // Indirect Memory Operand
5014 operand indirectNarrow(rRegN reg)
5015 %{
5016   predicate(Universe::narrow_oop_shift() == 0);
5017   constraint(ALLOC_IN_RC(ptr_reg));
5018   match(DecodeN reg);
5019 
5020   format %{ "[$reg]" %}
5021   interface(MEMORY_INTER) %{
5022     base($reg);
5023     index(0x4);
5024     scale(0x0);
5025     disp(0x0);
5026   %}
5027 %}
5028 
5029 // Indirect Memory Plus Short Offset Operand
5030 operand indOffset8Narrow(rRegN reg, immL8 off)
5031 %{
5032   predicate(Universe::narrow_oop_shift() == 0);
5033   constraint(ALLOC_IN_RC(ptr_reg));
5034   match(AddP (DecodeN reg) off);
5035 
5036   format %{ "[$reg + $off (8-bit)]" %}
5037   interface(MEMORY_INTER) %{
5038     base($reg);
5039     index(0x4);
5040     scale(0x0);
5041     disp($off);
5042   %}
5043 %}
5044 
5045 // Indirect Memory Plus Long Offset Operand
5046 operand indOffset32Narrow(rRegN reg, immL32 off)
5047 %{
5048   predicate(Universe::narrow_oop_shift() == 0);
5049   constraint(ALLOC_IN_RC(ptr_reg));
5050   match(AddP (DecodeN reg) off);
5051 
5052   format %{ "[$reg + $off (32-bit)]" %}
5053   interface(MEMORY_INTER) %{
5054     base($reg);
5055     index(0x4);
5056     scale(0x0);
5057     disp($off);
5058   %}
5059 %}
5060 
5061 // Indirect Memory Plus Index Register Plus Offset Operand
5062 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5063 %{
5064   predicate(Universe::narrow_oop_shift() == 0);
5065   constraint(ALLOC_IN_RC(ptr_reg));
5066   match(AddP (AddP (DecodeN reg) lreg) off);
5067 
5068   op_cost(10);
5069   format %{"[$reg + $off + $lreg]" %}
5070   interface(MEMORY_INTER) %{
5071     base($reg);
5072     index($lreg);
5073     scale(0x0);
5074     disp($off);
5075   %}
5076 %}
5077 
5078 // Indirect Memory Plus Index Register Plus Offset Operand
5079 operand indIndexNarrow(rRegN reg, rRegL lreg)
5080 %{
5081   predicate(Universe::narrow_oop_shift() == 0);
5082   constraint(ALLOC_IN_RC(ptr_reg));
5083   match(AddP (DecodeN reg) lreg);
5084 
5085   op_cost(10);
5086   format %{"[$reg + $lreg]" %}
5087   interface(MEMORY_INTER) %{
5088     base($reg);
5089     index($lreg);
5090     scale(0x0);
5091     disp(0x0);
5092   %}
5093 %}
5094 
5095 // Indirect Memory Times Scale Plus Index Register
5096 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5097 %{
5098   predicate(Universe::narrow_oop_shift() == 0);
5099   constraint(ALLOC_IN_RC(ptr_reg));
5100   match(AddP (DecodeN reg) (LShiftL lreg scale));
5101 
5102   op_cost(10);
5103   format %{"[$reg + $lreg << $scale]" %}
5104   interface(MEMORY_INTER) %{
5105     base($reg);
5106     index($lreg);
5107     scale($scale);
5108     disp(0x0);
5109   %}
5110 %}
5111 
5112 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5113 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5114 %{
5115   predicate(Universe::narrow_oop_shift() == 0);
5116   constraint(ALLOC_IN_RC(ptr_reg));
5117   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5118 
5119   op_cost(10);
5120   format %{"[$reg + $off + $lreg << $scale]" %}
5121   interface(MEMORY_INTER) %{
5122     base($reg);
5123     index($lreg);
5124     scale($scale);
5125     disp($off);
5126   %}
5127 %}
5128 
5129 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5130 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5131 %{
5132   constraint(ALLOC_IN_RC(ptr_reg));
5133   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5134   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5135 
5136   op_cost(10);
5137   format %{"[$reg + $off + $idx << $scale]" %}
5138   interface(MEMORY_INTER) %{
5139     base($reg);
5140     index($idx);
5141     scale($scale);
5142     disp($off);
5143   %}
5144 %}
5145 
5146 
5147 //----------Special Memory Operands--------------------------------------------
5148 // Stack Slot Operand - This operand is used for loading and storing temporary
5149 //                      values on the stack where a match requires a value to
5150 //                      flow through memory.
5151 operand stackSlotP(sRegP reg)
5152 %{
5153   constraint(ALLOC_IN_RC(stack_slots));
5154   // No match rule because this operand is only generated in matching
5155 
5156   format %{ "[$reg]" %}
5157   interface(MEMORY_INTER) %{
5158     base(0x4);   // RSP
5159     index(0x4);  // No Index
5160     scale(0x0);  // No Scale
5161     disp($reg);  // Stack Offset
5162   %}
5163 %}
5164 
5165 operand stackSlotI(sRegI reg)
5166 %{
5167   constraint(ALLOC_IN_RC(stack_slots));
5168   // No match rule because this operand is only generated in matching
5169 
5170   format %{ "[$reg]" %}
5171   interface(MEMORY_INTER) %{
5172     base(0x4);   // RSP
5173     index(0x4);  // No Index
5174     scale(0x0);  // No Scale
5175     disp($reg);  // Stack Offset
5176   %}
5177 %}
5178 
5179 operand stackSlotF(sRegF reg)
5180 %{
5181   constraint(ALLOC_IN_RC(stack_slots));
5182   // No match rule because this operand is only generated in matching
5183 
5184   format %{ "[$reg]" %}
5185   interface(MEMORY_INTER) %{
5186     base(0x4);   // RSP
5187     index(0x4);  // No Index
5188     scale(0x0);  // No Scale
5189     disp($reg);  // Stack Offset
5190   %}
5191 %}
5192 
5193 operand stackSlotD(sRegD reg)
5194 %{
5195   constraint(ALLOC_IN_RC(stack_slots));
5196   // No match rule because this operand is only generated in matching
5197 
5198   format %{ "[$reg]" %}
5199   interface(MEMORY_INTER) %{
5200     base(0x4);   // RSP
5201     index(0x4);  // No Index
5202     scale(0x0);  // No Scale
5203     disp($reg);  // Stack Offset
5204   %}
5205 %}
5206 operand stackSlotL(sRegL reg)
5207 %{
5208   constraint(ALLOC_IN_RC(stack_slots));
5209   // No match rule because this operand is only generated in matching
5210 
5211   format %{ "[$reg]" %}
5212   interface(MEMORY_INTER) %{
5213     base(0x4);   // RSP
5214     index(0x4);  // No Index
5215     scale(0x0);  // No Scale
5216     disp($reg);  // Stack Offset
5217   %}
5218 %}
5219 
5220 //----------Conditional Branch Operands----------------------------------------
5221 // Comparison Op  - This is the operation of the comparison, and is limited to
5222 //                  the following set of codes:
5223 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5224 //
5225 // Other attributes of the comparison, such as unsignedness, are specified
5226 // by the comparison instruction that sets a condition code flags register.
5227 // That result is represented by a flags operand whose subtype is appropriate
5228 // to the unsignedness (etc.) of the comparison.
5229 //
5230 // Later, the instruction which matches both the Comparison Op (a Bool) and
5231 // the flags (produced by the Cmp) specifies the coding of the comparison op
5232 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5233 
5234 // Comparision Code
5235 operand cmpOp()
5236 %{
5237   match(Bool);
5238 
5239   format %{ "" %}
5240   interface(COND_INTER) %{
5241     equal(0x4, "e");
5242     not_equal(0x5, "ne");
5243     less(0xC, "l");
5244     greater_equal(0xD, "ge");
5245     less_equal(0xE, "le");
5246     greater(0xF, "g");
5247   %}
5248 %}
5249 
5250 // Comparison Code, unsigned compare.  Used by FP also, with
5251 // C2 (unordered) turned into GT or LT already.  The other bits
5252 // C0 and C3 are turned into Carry & Zero flags.
5253 operand cmpOpU()
5254 %{
5255   match(Bool);
5256 
5257   format %{ "" %}
5258   interface(COND_INTER) %{
5259     equal(0x4, "e");
5260     not_equal(0x5, "ne");
5261     less(0x2, "b");
5262     greater_equal(0x3, "nb");
5263     less_equal(0x6, "be");
5264     greater(0x7, "nbe");
5265   %}
5266 %}
5267 
5268 
5269 // Floating comparisons that don't require any fixup for the unordered case
5270 operand cmpOpUCF() %{
5271   match(Bool);
5272   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5273             n->as_Bool()->_test._test == BoolTest::ge ||
5274             n->as_Bool()->_test._test == BoolTest::le ||
5275             n->as_Bool()->_test._test == BoolTest::gt);
5276   format %{ "" %}
5277   interface(COND_INTER) %{
5278     equal(0x4, "e");
5279     not_equal(0x5, "ne");
5280     less(0x2, "b");
5281     greater_equal(0x3, "nb");
5282     less_equal(0x6, "be");
5283     greater(0x7, "nbe");
5284   %}
5285 %}
5286 
5287 
5288 // Floating comparisons that can be fixed up with extra conditional jumps
5289 operand cmpOpUCF2() %{
5290   match(Bool);
5291   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5292             n->as_Bool()->_test._test == BoolTest::eq);
5293   format %{ "" %}
5294   interface(COND_INTER) %{
5295     equal(0x4, "e");
5296     not_equal(0x5, "ne");
5297     less(0x2, "b");
5298     greater_equal(0x3, "nb");
5299     less_equal(0x6, "be");
5300     greater(0x7, "nbe");
5301   %}
5302 %}
5303 
5304 
5305 //----------OPERAND CLASSES----------------------------------------------------
5306 // Operand Classes are groups of operands that are used as to simplify
5307 // instruction definitions by not requiring the AD writer to specify separate
5308 // instructions for every form of operand when the instruction accepts
5309 // multiple operand types with the same basic encoding and format.  The classic
5310 // case of this is memory operands.
5311 
5312 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
5313                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
5314                indCompressedOopOffset,
5315                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
5316                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
5317                indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
5318 
5319 //----------PIPELINE-----------------------------------------------------------
5320 // Rules which define the behavior of the target architectures pipeline.
5321 pipeline %{
5322 
5323 //----------ATTRIBUTES---------------------------------------------------------
5324 attributes %{
5325   variable_size_instructions;        // Fixed size instructions
5326   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5327   instruction_unit_size = 1;         // An instruction is 1 bytes long
5328   instruction_fetch_unit_size = 16;  // The processor fetches one line
5329   instruction_fetch_units = 1;       // of 16 bytes
5330 
5331   // List of nop instructions
5332   nops( MachNop );
5333 %}
5334 
5335 //----------RESOURCES----------------------------------------------------------
5336 // Resources are the functional units available to the machine
5337 
5338 // Generic P2/P3 pipeline
5339 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5340 // 3 instructions decoded per cycle.
5341 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5342 // 3 ALU op, only ALU0 handles mul instructions.
5343 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5344            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
5345            BR, FPU,
5346            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
5347 
5348 //----------PIPELINE DESCRIPTION-----------------------------------------------
5349 // Pipeline Description specifies the stages in the machine's pipeline
5350 
5351 // Generic P2/P3 pipeline
5352 pipe_desc(S0, S1, S2, S3, S4, S5);
5353 
5354 //----------PIPELINE CLASSES---------------------------------------------------
5355 // Pipeline Classes describe the stages in which input and output are
5356 // referenced by the hardware pipeline.
5357 
5358 // Naming convention: ialu or fpu
5359 // Then: _reg
5360 // Then: _reg if there is a 2nd register
5361 // Then: _long if it's a pair of instructions implementing a long
5362 // Then: _fat if it requires the big decoder
5363 //   Or: _mem if it requires the big decoder and a memory unit.
5364 
5365 // Integer ALU reg operation
5366 pipe_class ialu_reg(rRegI dst)
5367 %{
5368     single_instruction;
5369     dst    : S4(write);
5370     dst    : S3(read);
5371     DECODE : S0;        // any decoder
5372     ALU    : S3;        // any alu
5373 %}
5374 
5375 // Long ALU reg operation
5376 pipe_class ialu_reg_long(rRegL dst)
5377 %{
5378     instruction_count(2);
5379     dst    : S4(write);
5380     dst    : S3(read);
5381     DECODE : S0(2);     // any 2 decoders
5382     ALU    : S3(2);     // both alus
5383 %}
5384 
5385 // Integer ALU reg operation using big decoder
5386 pipe_class ialu_reg_fat(rRegI dst)
5387 %{
5388     single_instruction;
5389     dst    : S4(write);
5390     dst    : S3(read);
5391     D0     : S0;        // big decoder only
5392     ALU    : S3;        // any alu
5393 %}
5394 
5395 // Long ALU reg operation using big decoder
5396 pipe_class ialu_reg_long_fat(rRegL dst)
5397 %{
5398     instruction_count(2);
5399     dst    : S4(write);
5400     dst    : S3(read);
5401     D0     : S0(2);     // big decoder only; twice
5402     ALU    : S3(2);     // any 2 alus
5403 %}
5404 
5405 // Integer ALU reg-reg operation
5406 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
5407 %{
5408     single_instruction;
5409     dst    : S4(write);
5410     src    : S3(read);
5411     DECODE : S0;        // any decoder
5412     ALU    : S3;        // any alu
5413 %}
5414 
5415 // Long ALU reg-reg operation
5416 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
5417 %{
5418     instruction_count(2);
5419     dst    : S4(write);
5420     src    : S3(read);
5421     DECODE : S0(2);     // any 2 decoders
5422     ALU    : S3(2);     // both alus
5423 %}
5424 
5425 // Integer ALU reg-reg operation
5426 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
5427 %{
5428     single_instruction;
5429     dst    : S4(write);
5430     src    : S3(read);
5431     D0     : S0;        // big decoder only
5432     ALU    : S3;        // any alu
5433 %}
5434 
5435 // Long ALU reg-reg operation
5436 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
5437 %{
5438     instruction_count(2);
5439     dst    : S4(write);
5440     src    : S3(read);
5441     D0     : S0(2);     // big decoder only; twice
5442     ALU    : S3(2);     // both alus
5443 %}
5444 
5445 // Integer ALU reg-mem operation
5446 pipe_class ialu_reg_mem(rRegI dst, memory mem)
5447 %{
5448     single_instruction;
5449     dst    : S5(write);
5450     mem    : S3(read);
5451     D0     : S0;        // big decoder only
5452     ALU    : S4;        // any alu
5453     MEM    : S3;        // any mem
5454 %}
5455 
5456 // Integer mem operation (prefetch)
5457 pipe_class ialu_mem(memory mem)
5458 %{
5459     single_instruction;
5460     mem    : S3(read);
5461     D0     : S0;        // big decoder only
5462     MEM    : S3;        // any mem
5463 %}
5464 
5465 // Integer Store to Memory
5466 pipe_class ialu_mem_reg(memory mem, rRegI src)
5467 %{
5468     single_instruction;
5469     mem    : S3(read);
5470     src    : S5(read);
5471     D0     : S0;        // big decoder only
5472     ALU    : S4;        // any alu
5473     MEM    : S3;
5474 %}
5475 
5476 // // Long Store to Memory
5477 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
5478 // %{
5479 //     instruction_count(2);
5480 //     mem    : S3(read);
5481 //     src    : S5(read);
5482 //     D0     : S0(2);          // big decoder only; twice
5483 //     ALU    : S4(2);     // any 2 alus
5484 //     MEM    : S3(2);  // Both mems
5485 // %}
5486 
5487 // Integer Store to Memory
5488 pipe_class ialu_mem_imm(memory mem)
5489 %{
5490     single_instruction;
5491     mem    : S3(read);
5492     D0     : S0;        // big decoder only
5493     ALU    : S4;        // any alu
5494     MEM    : S3;
5495 %}
5496 
5497 // Integer ALU0 reg-reg operation
5498 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
5499 %{
5500     single_instruction;
5501     dst    : S4(write);
5502     src    : S3(read);
5503     D0     : S0;        // Big decoder only
5504     ALU0   : S3;        // only alu0
5505 %}
5506 
5507 // Integer ALU0 reg-mem operation
5508 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
5509 %{
5510     single_instruction;
5511     dst    : S5(write);
5512     mem    : S3(read);
5513     D0     : S0;        // big decoder only
5514     ALU0   : S4;        // ALU0 only
5515     MEM    : S3;        // any mem
5516 %}
5517 
5518 // Integer ALU reg-reg operation
5519 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
5520 %{
5521     single_instruction;
5522     cr     : S4(write);
5523     src1   : S3(read);
5524     src2   : S3(read);
5525     DECODE : S0;        // any decoder
5526     ALU    : S3;        // any alu
5527 %}
5528 
5529 // Integer ALU reg-imm operation
5530 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
5531 %{
5532     single_instruction;
5533     cr     : S4(write);
5534     src1   : S3(read);
5535     DECODE : S0;        // any decoder
5536     ALU    : S3;        // any alu
5537 %}
5538 
5539 // Integer ALU reg-mem operation
5540 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
5541 %{
5542     single_instruction;
5543     cr     : S4(write);
5544     src1   : S3(read);
5545     src2   : S3(read);
5546     D0     : S0;        // big decoder only
5547     ALU    : S4;        // any alu
5548     MEM    : S3;
5549 %}
5550 
5551 // Conditional move reg-reg
5552 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
5553 %{
5554     instruction_count(4);
5555     y      : S4(read);
5556     q      : S3(read);
5557     p      : S3(read);
5558     DECODE : S0(4);     // any decoder
5559 %}
5560 
5561 // Conditional move reg-reg
5562 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
5563 %{
5564     single_instruction;
5565     dst    : S4(write);
5566     src    : S3(read);
5567     cr     : S3(read);
5568     DECODE : S0;        // any decoder
5569 %}
5570 
5571 // Conditional move reg-mem
5572 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
5573 %{
5574     single_instruction;
5575     dst    : S4(write);
5576     src    : S3(read);
5577     cr     : S3(read);
5578     DECODE : S0;        // any decoder
5579     MEM    : S3;
5580 %}
5581 
5582 // Conditional move reg-reg long
5583 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
5584 %{
5585     single_instruction;
5586     dst    : S4(write);
5587     src    : S3(read);
5588     cr     : S3(read);
5589     DECODE : S0(2);     // any 2 decoders
5590 %}
5591 
5592 // XXX
5593 // // Conditional move double reg-reg
5594 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
5595 // %{
5596 //     single_instruction;
5597 //     dst    : S4(write);
5598 //     src    : S3(read);
5599 //     cr     : S3(read);
5600 //     DECODE : S0;     // any decoder
5601 // %}
5602 
5603 // Float reg-reg operation
5604 pipe_class fpu_reg(regD dst)
5605 %{
5606     instruction_count(2);
5607     dst    : S3(read);
5608     DECODE : S0(2);     // any 2 decoders
5609     FPU    : S3;
5610 %}
5611 
5612 // Float reg-reg operation
5613 pipe_class fpu_reg_reg(regD dst, regD src)
5614 %{
5615     instruction_count(2);
5616     dst    : S4(write);
5617     src    : S3(read);
5618     DECODE : S0(2);     // any 2 decoders
5619     FPU    : S3;
5620 %}
5621 
5622 // Float reg-reg operation
5623 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
5624 %{
5625     instruction_count(3);
5626     dst    : S4(write);
5627     src1   : S3(read);
5628     src2   : S3(read);
5629     DECODE : S0(3);     // any 3 decoders
5630     FPU    : S3(2);
5631 %}
5632 
5633 // Float reg-reg operation
5634 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
5635 %{
5636     instruction_count(4);
5637     dst    : S4(write);
5638     src1   : S3(read);
5639     src2   : S3(read);
5640     src3   : S3(read);
5641     DECODE : S0(4);     // any 3 decoders
5642     FPU    : S3(2);
5643 %}
5644 
5645 // Float reg-reg operation
5646 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
5647 %{
5648     instruction_count(4);
5649     dst    : S4(write);
5650     src1   : S3(read);
5651     src2   : S3(read);
5652     src3   : S3(read);
5653     DECODE : S1(3);     // any 3 decoders
5654     D0     : S0;        // Big decoder only
5655     FPU    : S3(2);
5656     MEM    : S3;
5657 %}
5658 
5659 // Float reg-mem operation
5660 pipe_class fpu_reg_mem(regD dst, memory mem)
5661 %{
5662     instruction_count(2);
5663     dst    : S5(write);
5664     mem    : S3(read);
5665     D0     : S0;        // big decoder only
5666     DECODE : S1;        // any decoder for FPU POP
5667     FPU    : S4;
5668     MEM    : S3;        // any mem
5669 %}
5670 
5671 // Float reg-mem operation
5672 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
5673 %{
5674     instruction_count(3);
5675     dst    : S5(write);
5676     src1   : S3(read);
5677     mem    : S3(read);
5678     D0     : S0;        // big decoder only
5679     DECODE : S1(2);     // any decoder for FPU POP
5680     FPU    : S4;
5681     MEM    : S3;        // any mem
5682 %}
5683 
5684 // Float mem-reg operation
5685 pipe_class fpu_mem_reg(memory mem, regD src)
5686 %{
5687     instruction_count(2);
5688     src    : S5(read);
5689     mem    : S3(read);
5690     DECODE : S0;        // any decoder for FPU PUSH
5691     D0     : S1;        // big decoder only
5692     FPU    : S4;
5693     MEM    : S3;        // any mem
5694 %}
5695 
5696 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
5697 %{
5698     instruction_count(3);
5699     src1   : S3(read);
5700     src2   : S3(read);
5701     mem    : S3(read);
5702     DECODE : S0(2);     // any decoder for FPU PUSH
5703     D0     : S1;        // big decoder only
5704     FPU    : S4;
5705     MEM    : S3;        // any mem
5706 %}
5707 
5708 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
5709 %{
5710     instruction_count(3);
5711     src1   : S3(read);
5712     src2   : S3(read);
5713     mem    : S4(read);
5714     DECODE : S0;        // any decoder for FPU PUSH
5715     D0     : S0(2);     // big decoder only
5716     FPU    : S4;
5717     MEM    : S3(2);     // any mem
5718 %}
5719 
5720 pipe_class fpu_mem_mem(memory dst, memory src1)
5721 %{
5722     instruction_count(2);
5723     src1   : S3(read);
5724     dst    : S4(read);
5725     D0     : S0(2);     // big decoder only
5726     MEM    : S3(2);     // any mem
5727 %}
5728 
5729 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
5730 %{
5731     instruction_count(3);
5732     src1   : S3(read);
5733     src2   : S3(read);
5734     dst    : S4(read);
5735     D0     : S0(3);     // big decoder only
5736     FPU    : S4;
5737     MEM    : S3(3);     // any mem
5738 %}
5739 
5740 pipe_class fpu_mem_reg_con(memory mem, regD src1)
5741 %{
5742     instruction_count(3);
5743     src1   : S4(read);
5744     mem    : S4(read);
5745     DECODE : S0;        // any decoder for FPU PUSH
5746     D0     : S0(2);     // big decoder only
5747     FPU    : S4;
5748     MEM    : S3(2);     // any mem
5749 %}
5750 
5751 // Float load constant
5752 pipe_class fpu_reg_con(regD dst)
5753 %{
5754     instruction_count(2);
5755     dst    : S5(write);
5756     D0     : S0;        // big decoder only for the load
5757     DECODE : S1;        // any decoder for FPU POP
5758     FPU    : S4;
5759     MEM    : S3;        // any mem
5760 %}
5761 
5762 // Float load constant
5763 pipe_class fpu_reg_reg_con(regD dst, regD src)
5764 %{
5765     instruction_count(3);
5766     dst    : S5(write);
5767     src    : S3(read);
5768     D0     : S0;        // big decoder only for the load
5769     DECODE : S1(2);     // any decoder for FPU POP
5770     FPU    : S4;
5771     MEM    : S3;        // any mem
5772 %}
5773 
5774 // UnConditional branch
5775 pipe_class pipe_jmp(label labl)
5776 %{
5777     single_instruction;
5778     BR   : S3;
5779 %}
5780 
5781 // Conditional branch
5782 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
5783 %{
5784     single_instruction;
5785     cr    : S1(read);
5786     BR    : S3;
5787 %}
5788 
5789 // Allocation idiom
5790 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
5791 %{
5792     instruction_count(1); force_serialization;
5793     fixed_latency(6);
5794     heap_ptr : S3(read);
5795     DECODE   : S0(3);
5796     D0       : S2;
5797     MEM      : S3;
5798     ALU      : S3(2);
5799     dst      : S5(write);
5800     BR       : S5;
5801 %}
5802 
5803 // Generic big/slow expanded idiom
5804 pipe_class pipe_slow()
5805 %{
5806     instruction_count(10); multiple_bundles; force_serialization;
5807     fixed_latency(100);
5808     D0  : S0(2);
5809     MEM : S3(2);
5810 %}
5811 
5812 // The real do-nothing guy
5813 pipe_class empty()
5814 %{
5815     instruction_count(0);
5816 %}
5817 
5818 // Define the class for the Nop node
5819 define
5820 %{
5821    MachNop = empty;
5822 %}
5823 
5824 %}
5825 
5826 //----------INSTRUCTIONS-------------------------------------------------------
5827 //
5828 // match      -- States which machine-independent subtree may be replaced
5829 //               by this instruction.
5830 // ins_cost   -- The estimated cost of this instruction is used by instruction
5831 //               selection to identify a minimum cost tree of machine
5832 //               instructions that matches a tree of machine-independent
5833 //               instructions.
5834 // format     -- A string providing the disassembly for this instruction.
5835 //               The value of an instruction's operand may be inserted
5836 //               by referring to it with a '$' prefix.
5837 // opcode     -- Three instruction opcodes may be provided.  These are referred
5838 //               to within an encode class as $primary, $secondary, and $tertiary
5839 //               rrspectively.  The primary opcode is commonly used to
5840 //               indicate the type of machine instruction, while secondary
5841 //               and tertiary are often used for prefix options or addressing
5842 //               modes.
5843 // ins_encode -- A list of encode classes with parameters. The encode class
5844 //               name must have been defined in an 'enc_class' specification
5845 //               in the encode section of the architecture description.
5846 
5847 
5848 //----------Load/Store/Move Instructions---------------------------------------
5849 //----------Load Instructions--------------------------------------------------
5850 
5851 // Load Byte (8 bit signed)
5852 instruct loadB(rRegI dst, memory mem)
5853 %{
5854   match(Set dst (LoadB mem));
5855 
5856   ins_cost(125);
5857   format %{ "movsbl  $dst, $mem\t# byte" %}
5858 
5859   ins_encode %{
5860     __ movsbl($dst$$Register, $mem$$Address);
5861   %}
5862 
5863   ins_pipe(ialu_reg_mem);
5864 %}
5865 
5866 // Load Byte (8 bit signed) into Long Register
5867 instruct loadB2L(rRegL dst, memory mem)
5868 %{
5869   match(Set dst (ConvI2L (LoadB mem)));
5870 
5871   ins_cost(125);
5872   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5873 
5874   ins_encode %{
5875     __ movsbq($dst$$Register, $mem$$Address);
5876   %}
5877 
5878   ins_pipe(ialu_reg_mem);
5879 %}
5880 
5881 // Load Unsigned Byte (8 bit UNsigned)
5882 instruct loadUB(rRegI dst, memory mem)
5883 %{
5884   match(Set dst (LoadUB mem));
5885 
5886   ins_cost(125);
5887   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5888 
5889   ins_encode %{
5890     __ movzbl($dst$$Register, $mem$$Address);
5891   %}
5892 
5893   ins_pipe(ialu_reg_mem);
5894 %}
5895 
5896 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5897 instruct loadUB2L(rRegL dst, memory mem)
5898 %{
5899   match(Set dst (ConvI2L (LoadUB mem)));
5900 
5901   ins_cost(125);
5902   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5903 
5904   ins_encode %{
5905     __ movzbq($dst$$Register, $mem$$Address);
5906   %}
5907 
5908   ins_pipe(ialu_reg_mem);
5909 %}
5910 
5911 // Load Unsigned Byte (8 bit UNsigned) with a 8-bit mask into Long Register
5912 instruct loadUB2L_immI8(rRegL dst, memory mem, immI8 mask, rFlagsReg cr) %{
5913   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5914   effect(KILL cr);
5915 
5916   format %{ "movzbq  $dst, $mem\t# ubyte & 8-bit mask -> long\n\t"
5917             "andl    $dst, $mask" %}
5918   ins_encode %{
5919     Register Rdst = $dst$$Register;
5920     __ movzbq(Rdst, $mem$$Address);
5921     __ andl(Rdst, $mask$$constant);
5922   %}
5923   ins_pipe(ialu_reg_mem);
5924 %}
5925 
5926 // Load Short (16 bit signed)
5927 instruct loadS(rRegI dst, memory mem)
5928 %{
5929   match(Set dst (LoadS mem));
5930 
5931   ins_cost(125);
5932   format %{ "movswl $dst, $mem\t# short" %}
5933 
5934   ins_encode %{
5935     __ movswl($dst$$Register, $mem$$Address);
5936   %}
5937 
5938   ins_pipe(ialu_reg_mem);
5939 %}
5940 
5941 // Load Short (16 bit signed) to Byte (8 bit signed)
5942 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5943   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5944 
5945   ins_cost(125);
5946   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5947   ins_encode %{
5948     __ movsbl($dst$$Register, $mem$$Address);
5949   %}
5950   ins_pipe(ialu_reg_mem);
5951 %}
5952 
5953 // Load Short (16 bit signed) into Long Register
5954 instruct loadS2L(rRegL dst, memory mem)
5955 %{
5956   match(Set dst (ConvI2L (LoadS mem)));
5957 
5958   ins_cost(125);
5959   format %{ "movswq $dst, $mem\t# short -> long" %}
5960 
5961   ins_encode %{
5962     __ movswq($dst$$Register, $mem$$Address);
5963   %}
5964 
5965   ins_pipe(ialu_reg_mem);
5966 %}
5967 
5968 // Load Unsigned Short/Char (16 bit UNsigned)
5969 instruct loadUS(rRegI dst, memory mem)
5970 %{
5971   match(Set dst (LoadUS mem));
5972 
5973   ins_cost(125);
5974   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5975 
5976   ins_encode %{
5977     __ movzwl($dst$$Register, $mem$$Address);
5978   %}
5979 
5980   ins_pipe(ialu_reg_mem);
5981 %}
5982 
5983 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5984 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5985   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5986 
5987   ins_cost(125);
5988   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5989   ins_encode %{
5990     __ movsbl($dst$$Register, $mem$$Address);
5991   %}
5992   ins_pipe(ialu_reg_mem);
5993 %}
5994 
5995 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5996 instruct loadUS2L(rRegL dst, memory mem)
5997 %{
5998   match(Set dst (ConvI2L (LoadUS mem)));
5999 
6000   ins_cost(125);
6001   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
6002 
6003   ins_encode %{
6004     __ movzwq($dst$$Register, $mem$$Address);
6005   %}
6006 
6007   ins_pipe(ialu_reg_mem);
6008 %}
6009 
6010 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6011 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6012   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6013 
6014   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
6015   ins_encode %{
6016     __ movzbq($dst$$Register, $mem$$Address);
6017   %}
6018   ins_pipe(ialu_reg_mem);
6019 %}
6020 
6021 // Load Unsigned Short/Char (16 bit UNsigned) with mask into Long Register
6022 instruct loadUS2L_immI16(rRegL dst, memory mem, immI16 mask, rFlagsReg cr) %{
6023   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6024   effect(KILL cr);
6025 
6026   format %{ "movzwq  $dst, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6027             "andl    $dst, $mask" %}
6028   ins_encode %{
6029     Register Rdst = $dst$$Register;
6030     __ movzwq(Rdst, $mem$$Address);
6031     __ andl(Rdst, $mask$$constant);
6032   %}
6033   ins_pipe(ialu_reg_mem);
6034 %}
6035 
6036 // Load Integer
6037 instruct loadI(rRegI dst, memory mem)
6038 %{
6039   match(Set dst (LoadI mem));
6040 
6041   ins_cost(125);
6042   format %{ "movl    $dst, $mem\t# int" %}
6043 
6044   ins_encode %{
6045     __ movl($dst$$Register, $mem$$Address);
6046   %}
6047 
6048   ins_pipe(ialu_reg_mem);
6049 %}
6050 
6051 // Load Integer (32 bit signed) to Byte (8 bit signed)
6052 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6053   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6054 
6055   ins_cost(125);
6056   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
6057   ins_encode %{
6058     __ movsbl($dst$$Register, $mem$$Address);
6059   %}
6060   ins_pipe(ialu_reg_mem);
6061 %}
6062 
6063 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6064 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6065   match(Set dst (AndI (LoadI mem) mask));
6066 
6067   ins_cost(125);
6068   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
6069   ins_encode %{
6070     __ movzbl($dst$$Register, $mem$$Address);
6071   %}
6072   ins_pipe(ialu_reg_mem);
6073 %}
6074 
6075 // Load Integer (32 bit signed) to Short (16 bit signed)
6076 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6077   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6078 
6079   ins_cost(125);
6080   format %{ "movswl  $dst, $mem\t# int -> short" %}
6081   ins_encode %{
6082     __ movswl($dst$$Register, $mem$$Address);
6083   %}
6084   ins_pipe(ialu_reg_mem);
6085 %}
6086 
6087 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6088 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6089   match(Set dst (AndI (LoadI mem) mask));
6090 
6091   ins_cost(125);
6092   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
6093   ins_encode %{
6094     __ movzwl($dst$$Register, $mem$$Address);
6095   %}
6096   ins_pipe(ialu_reg_mem);
6097 %}
6098 
6099 // Load Integer into Long Register
6100 instruct loadI2L(rRegL dst, memory mem)
6101 %{
6102   match(Set dst (ConvI2L (LoadI mem)));
6103 
6104   ins_cost(125);
6105   format %{ "movslq  $dst, $mem\t# int -> long" %}
6106 
6107   ins_encode %{
6108     __ movslq($dst$$Register, $mem$$Address);
6109   %}
6110 
6111   ins_pipe(ialu_reg_mem);
6112 %}
6113 
6114 // Load Integer with mask 0xFF into Long Register
6115 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6116   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6117 
6118   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
6119   ins_encode %{
6120     __ movzbq($dst$$Register, $mem$$Address);
6121   %}
6122   ins_pipe(ialu_reg_mem);
6123 %}
6124 
6125 // Load Integer with mask 0xFFFF into Long Register
6126 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
6127   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6128 
6129   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
6130   ins_encode %{
6131     __ movzwq($dst$$Register, $mem$$Address);
6132   %}
6133   ins_pipe(ialu_reg_mem);
6134 %}
6135 
6136 // Load Integer with a 32-bit mask into Long Register
6137 instruct loadI2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6138   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6139   effect(KILL cr);
6140 
6141   format %{ "movl    $dst, $mem\t# int & 32-bit mask -> long\n\t"
6142             "andl    $dst, $mask" %}
6143   ins_encode %{
6144     Register Rdst = $dst$$Register;
6145     __ movl(Rdst, $mem$$Address);
6146     __ andl(Rdst, $mask$$constant);
6147   %}
6148   ins_pipe(ialu_reg_mem);
6149 %}
6150 
6151 // Load Unsigned Integer into Long Register
6152 instruct loadUI2L(rRegL dst, memory mem)
6153 %{
6154   match(Set dst (LoadUI2L mem));
6155 
6156   ins_cost(125);
6157   format %{ "movl    $dst, $mem\t# uint -> long" %}
6158 
6159   ins_encode %{
6160     __ movl($dst$$Register, $mem$$Address);
6161   %}
6162 
6163   ins_pipe(ialu_reg_mem);
6164 %}
6165 
6166 // Load Long
6167 instruct loadL(rRegL dst, memory mem)
6168 %{
6169   match(Set dst (LoadL mem));
6170 
6171   ins_cost(125);
6172   format %{ "movq    $dst, $mem\t# long" %}
6173 
6174   ins_encode %{
6175     __ movq($dst$$Register, $mem$$Address);
6176   %}
6177 
6178   ins_pipe(ialu_reg_mem); // XXX
6179 %}
6180 
6181 // Load Range
6182 instruct loadRange(rRegI dst, memory mem)
6183 %{
6184   match(Set dst (LoadRange mem));
6185 
6186   ins_cost(125); // XXX
6187   format %{ "movl    $dst, $mem\t# range" %}
6188   opcode(0x8B);
6189   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
6190   ins_pipe(ialu_reg_mem);
6191 %}
6192 
6193 // Load Pointer
6194 instruct loadP(rRegP dst, memory mem)
6195 %{
6196   match(Set dst (LoadP mem));
6197 
6198   ins_cost(125); // XXX
6199   format %{ "movq    $dst, $mem\t# ptr" %}
6200   opcode(0x8B);
6201   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6202   ins_pipe(ialu_reg_mem); // XXX
6203 %}
6204 
6205 // Load Compressed Pointer
6206 instruct loadN(rRegN dst, memory mem)
6207 %{
6208    match(Set dst (LoadN mem));
6209 
6210    ins_cost(125); // XXX
6211    format %{ "movl    $dst, $mem\t# compressed ptr" %}
6212    ins_encode %{
6213      __ movl($dst$$Register, $mem$$Address);
6214    %}
6215    ins_pipe(ialu_reg_mem); // XXX
6216 %}
6217 
6218 
6219 // Load Klass Pointer
6220 instruct loadKlass(rRegP dst, memory mem)
6221 %{
6222   match(Set dst (LoadKlass mem));
6223 
6224   ins_cost(125); // XXX
6225   format %{ "movq    $dst, $mem\t# class" %}
6226   opcode(0x8B);
6227   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6228   ins_pipe(ialu_reg_mem); // XXX
6229 %}
6230 
6231 // Load narrow Klass Pointer
6232 instruct loadNKlass(rRegN dst, memory mem)
6233 %{
6234   match(Set dst (LoadNKlass mem));
6235 
6236   ins_cost(125); // XXX
6237   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
6238   ins_encode %{
6239     __ movl($dst$$Register, $mem$$Address);
6240   %}
6241   ins_pipe(ialu_reg_mem); // XXX
6242 %}
6243 
6244 // Load Float
6245 instruct loadF(regF dst, memory mem)
6246 %{
6247   match(Set dst (LoadF mem));
6248 
6249   ins_cost(145); // XXX
6250   format %{ "movss   $dst, $mem\t# float" %}
6251   opcode(0xF3, 0x0F, 0x10);
6252   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6253   ins_pipe(pipe_slow); // XXX
6254 %}
6255 
6256 // Load Double
6257 instruct loadD_partial(regD dst, memory mem)
6258 %{
6259   predicate(!UseXmmLoadAndClearUpper);
6260   match(Set dst (LoadD mem));
6261 
6262   ins_cost(145); // XXX
6263   format %{ "movlpd  $dst, $mem\t# double" %}
6264   opcode(0x66, 0x0F, 0x12);
6265   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6266   ins_pipe(pipe_slow); // XXX
6267 %}
6268 
6269 instruct loadD(regD dst, memory mem)
6270 %{
6271   predicate(UseXmmLoadAndClearUpper);
6272   match(Set dst (LoadD mem));
6273 
6274   ins_cost(145); // XXX
6275   format %{ "movsd   $dst, $mem\t# double" %}
6276   opcode(0xF2, 0x0F, 0x10);
6277   ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
6278   ins_pipe(pipe_slow); // XXX
6279 %}
6280 
6281 // Load Aligned Packed Byte to XMM register
6282 instruct loadA8B(regD dst, memory mem) %{
6283   match(Set dst (Load8B mem));
6284   ins_cost(125);
6285   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6286   ins_encode( movq_ld(dst, mem));
6287   ins_pipe( pipe_slow );
6288 %}
6289 
6290 // Load Aligned Packed Short to XMM register
6291 instruct loadA4S(regD dst, memory mem) %{
6292   match(Set dst (Load4S mem));
6293   ins_cost(125);
6294   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6295   ins_encode( movq_ld(dst, mem));
6296   ins_pipe( pipe_slow );
6297 %}
6298 
6299 // Load Aligned Packed Char to XMM register
6300 instruct loadA4C(regD dst, memory mem) %{
6301   match(Set dst (Load4C mem));
6302   ins_cost(125);
6303   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6304   ins_encode( movq_ld(dst, mem));
6305   ins_pipe( pipe_slow );
6306 %}
6307 
6308 // Load Aligned Packed Integer to XMM register
6309 instruct load2IU(regD dst, memory mem) %{
6310   match(Set dst (Load2I mem));
6311   ins_cost(125);
6312   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6313   ins_encode( movq_ld(dst, mem));
6314   ins_pipe( pipe_slow );
6315 %}
6316 
6317 // Load Aligned Packed Single to XMM
6318 instruct loadA2F(regD dst, memory mem) %{
6319   match(Set dst (Load2F mem));
6320   ins_cost(145);
6321   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6322   ins_encode( movq_ld(dst, mem));
6323   ins_pipe( pipe_slow );
6324 %}
6325 
6326 // Load Effective Address
6327 instruct leaP8(rRegP dst, indOffset8 mem)
6328 %{
6329   match(Set dst mem);
6330 
6331   ins_cost(110); // XXX
6332   format %{ "leaq    $dst, $mem\t# ptr 8" %}
6333   opcode(0x8D);
6334   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6335   ins_pipe(ialu_reg_reg_fat);
6336 %}
6337 
6338 instruct leaP32(rRegP dst, indOffset32 mem)
6339 %{
6340   match(Set dst mem);
6341 
6342   ins_cost(110);
6343   format %{ "leaq    $dst, $mem\t# ptr 32" %}
6344   opcode(0x8D);
6345   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6346   ins_pipe(ialu_reg_reg_fat);
6347 %}
6348 
6349 // instruct leaPIdx(rRegP dst, indIndex mem)
6350 // %{
6351 //   match(Set dst mem);
6352 
6353 //   ins_cost(110);
6354 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
6355 //   opcode(0x8D);
6356 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6357 //   ins_pipe(ialu_reg_reg_fat);
6358 // %}
6359 
6360 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
6361 %{
6362   match(Set dst mem);
6363 
6364   ins_cost(110);
6365   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
6366   opcode(0x8D);
6367   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6368   ins_pipe(ialu_reg_reg_fat);
6369 %}
6370 
6371 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
6372 %{
6373   match(Set dst mem);
6374 
6375   ins_cost(110);
6376   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
6377   opcode(0x8D);
6378   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6379   ins_pipe(ialu_reg_reg_fat);
6380 %}
6381 
6382 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
6383 %{
6384   match(Set dst mem);
6385 
6386   ins_cost(110);
6387   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
6388   opcode(0x8D);
6389   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6390   ins_pipe(ialu_reg_reg_fat);
6391 %}
6392 
6393 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
6394 %{
6395   match(Set dst mem);
6396 
6397   ins_cost(110);
6398   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
6399   opcode(0x8D);
6400   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6401   ins_pipe(ialu_reg_reg_fat);
6402 %}
6403 
6404 // Load Effective Address which uses Narrow (32-bits) oop
6405 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
6406 %{
6407   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
6408   match(Set dst mem);
6409 
6410   ins_cost(110);
6411   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
6412   opcode(0x8D);
6413   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6414   ins_pipe(ialu_reg_reg_fat);
6415 %}
6416 
6417 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
6418 %{
6419   predicate(Universe::narrow_oop_shift() == 0);
6420   match(Set dst mem);
6421 
6422   ins_cost(110); // XXX
6423   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
6424   opcode(0x8D);
6425   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6426   ins_pipe(ialu_reg_reg_fat);
6427 %}
6428 
6429 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
6430 %{
6431   predicate(Universe::narrow_oop_shift() == 0);
6432   match(Set dst mem);
6433 
6434   ins_cost(110);
6435   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
6436   opcode(0x8D);
6437   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6438   ins_pipe(ialu_reg_reg_fat);
6439 %}
6440 
6441 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
6442 %{
6443   predicate(Universe::narrow_oop_shift() == 0);
6444   match(Set dst mem);
6445 
6446   ins_cost(110);
6447   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
6448   opcode(0x8D);
6449   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6450   ins_pipe(ialu_reg_reg_fat);
6451 %}
6452 
6453 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
6454 %{
6455   predicate(Universe::narrow_oop_shift() == 0);
6456   match(Set dst mem);
6457 
6458   ins_cost(110);
6459   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
6460   opcode(0x8D);
6461   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6462   ins_pipe(ialu_reg_reg_fat);
6463 %}
6464 
6465 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
6466 %{
6467   predicate(Universe::narrow_oop_shift() == 0);
6468   match(Set dst mem);
6469 
6470   ins_cost(110);
6471   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
6472   opcode(0x8D);
6473   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6474   ins_pipe(ialu_reg_reg_fat);
6475 %}
6476 
6477 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
6478 %{
6479   predicate(Universe::narrow_oop_shift() == 0);
6480   match(Set dst mem);
6481 
6482   ins_cost(110);
6483   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
6484   opcode(0x8D);
6485   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
6486   ins_pipe(ialu_reg_reg_fat);
6487 %}
6488 
6489 instruct loadConI(rRegI dst, immI src)
6490 %{
6491   match(Set dst src);
6492 
6493   format %{ "movl    $dst, $src\t# int" %}
6494   ins_encode(load_immI(dst, src));
6495   ins_pipe(ialu_reg_fat); // XXX
6496 %}
6497 
6498 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
6499 %{
6500   match(Set dst src);
6501   effect(KILL cr);
6502 
6503   ins_cost(50);
6504   format %{ "xorl    $dst, $dst\t# int" %}
6505   opcode(0x33); /* + rd */
6506   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6507   ins_pipe(ialu_reg);
6508 %}
6509 
6510 instruct loadConL(rRegL dst, immL src)
6511 %{
6512   match(Set dst src);
6513 
6514   ins_cost(150);
6515   format %{ "movq    $dst, $src\t# long" %}
6516   ins_encode(load_immL(dst, src));
6517   ins_pipe(ialu_reg);
6518 %}
6519 
6520 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
6521 %{
6522   match(Set dst src);
6523   effect(KILL cr);
6524 
6525   ins_cost(50);
6526   format %{ "xorl    $dst, $dst\t# long" %}
6527   opcode(0x33); /* + rd */
6528   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6529   ins_pipe(ialu_reg); // XXX
6530 %}
6531 
6532 instruct loadConUL32(rRegL dst, immUL32 src)
6533 %{
6534   match(Set dst src);
6535 
6536   ins_cost(60);
6537   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
6538   ins_encode(load_immUL32(dst, src));
6539   ins_pipe(ialu_reg);
6540 %}
6541 
6542 instruct loadConL32(rRegL dst, immL32 src)
6543 %{
6544   match(Set dst src);
6545 
6546   ins_cost(70);
6547   format %{ "movq    $dst, $src\t# long (32-bit)" %}
6548   ins_encode(load_immL32(dst, src));
6549   ins_pipe(ialu_reg);
6550 %}
6551 
6552 instruct loadConP(rRegP dst, immP con) %{
6553   match(Set dst con);
6554 
6555   format %{ "movq    $dst, $con\t# ptr" %}
6556   ins_encode(load_immP(dst, con));
6557   ins_pipe(ialu_reg_fat); // XXX
6558 %}
6559 
6560 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
6561 %{
6562   match(Set dst src);
6563   effect(KILL cr);
6564 
6565   ins_cost(50);
6566   format %{ "xorl    $dst, $dst\t# ptr" %}
6567   opcode(0x33); /* + rd */
6568   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
6569   ins_pipe(ialu_reg);
6570 %}
6571 
6572 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
6573 %{
6574   match(Set dst src);
6575   effect(KILL cr);
6576 
6577   ins_cost(60);
6578   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
6579   ins_encode(load_immP31(dst, src));
6580   ins_pipe(ialu_reg);
6581 %}
6582 
6583 instruct loadConF(regF dst, immF con) %{
6584   match(Set dst con);
6585   ins_cost(125);
6586   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
6587   ins_encode %{
6588     __ movflt($dst$$XMMRegister, $constantaddress($con));
6589   %}
6590   ins_pipe(pipe_slow);
6591 %}
6592 
6593 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
6594   match(Set dst src);
6595   effect(KILL cr);
6596   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
6597   ins_encode %{
6598     __ xorq($dst$$Register, $dst$$Register);
6599   %}
6600   ins_pipe(ialu_reg);
6601 %}
6602 
6603 instruct loadConN(rRegN dst, immN src) %{
6604   match(Set dst src);
6605 
6606   ins_cost(125);
6607   format %{ "movl    $dst, $src\t# compressed ptr" %}
6608   ins_encode %{
6609     address con = (address)$src$$constant;
6610     if (con == NULL) {
6611       ShouldNotReachHere();
6612     } else {
6613       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
6614     }
6615   %}
6616   ins_pipe(ialu_reg_fat); // XXX
6617 %}
6618 
6619 instruct loadConF0(regF dst, immF0 src)
6620 %{
6621   match(Set dst src);
6622   ins_cost(100);
6623 
6624   format %{ "xorps   $dst, $dst\t# float 0.0" %}
6625   opcode(0x0F, 0x57);
6626   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
6627   ins_pipe(pipe_slow);
6628 %}
6629 
6630 // Use the same format since predicate() can not be used here.
6631 instruct loadConD(regD dst, immD con) %{
6632   match(Set dst con);
6633   ins_cost(125);
6634   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
6635   ins_encode %{
6636     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6637   %}
6638   ins_pipe(pipe_slow);
6639 %}
6640 
6641 instruct loadConD0(regD dst, immD0 src)
6642 %{
6643   match(Set dst src);
6644   ins_cost(100);
6645 
6646   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
6647   opcode(0x66, 0x0F, 0x57);
6648   ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
6649   ins_pipe(pipe_slow);
6650 %}
6651 
6652 instruct loadSSI(rRegI dst, stackSlotI src)
6653 %{
6654   match(Set dst src);
6655 
6656   ins_cost(125);
6657   format %{ "movl    $dst, $src\t# int stk" %}
6658   opcode(0x8B);
6659   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
6660   ins_pipe(ialu_reg_mem);
6661 %}
6662 
6663 instruct loadSSL(rRegL dst, stackSlotL src)
6664 %{
6665   match(Set dst src);
6666 
6667   ins_cost(125);
6668   format %{ "movq    $dst, $src\t# long stk" %}
6669   opcode(0x8B);
6670   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6671   ins_pipe(ialu_reg_mem);
6672 %}
6673 
6674 instruct loadSSP(rRegP dst, stackSlotP src)
6675 %{
6676   match(Set dst src);
6677 
6678   ins_cost(125);
6679   format %{ "movq    $dst, $src\t# ptr stk" %}
6680   opcode(0x8B);
6681   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
6682   ins_pipe(ialu_reg_mem);
6683 %}
6684 
6685 instruct loadSSF(regF dst, stackSlotF src)
6686 %{
6687   match(Set dst src);
6688 
6689   ins_cost(125);
6690   format %{ "movss   $dst, $src\t# float stk" %}
6691   opcode(0xF3, 0x0F, 0x10);
6692   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
6693   ins_pipe(pipe_slow); // XXX
6694 %}
6695 
6696 // Use the same format since predicate() can not be used here.
6697 instruct loadSSD(regD dst, stackSlotD src)
6698 %{
6699   match(Set dst src);
6700 
6701   ins_cost(125);
6702   format %{ "movsd   $dst, $src\t# double stk" %}
6703   ins_encode  %{
6704     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
6705   %}
6706   ins_pipe(pipe_slow); // XXX
6707 %}
6708 
6709 // Prefetch instructions.
6710 // Must be safe to execute with invalid address (cannot fault).
6711 
6712 instruct prefetchr( memory mem ) %{
6713   predicate(ReadPrefetchInstr==3);
6714   match(PrefetchRead mem);
6715   ins_cost(125);
6716 
6717   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
6718   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
6719   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6720   ins_pipe(ialu_mem);
6721 %}
6722 
6723 instruct prefetchrNTA( memory mem ) %{
6724   predicate(ReadPrefetchInstr==0);
6725   match(PrefetchRead mem);
6726   ins_cost(125);
6727 
6728   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
6729   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6730   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6731   ins_pipe(ialu_mem);
6732 %}
6733 
6734 instruct prefetchrT0( memory mem ) %{
6735   predicate(ReadPrefetchInstr==1);
6736   match(PrefetchRead mem);
6737   ins_cost(125);
6738 
6739   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
6740   opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
6741   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6742   ins_pipe(ialu_mem);
6743 %}
6744 
6745 instruct prefetchrT2( memory mem ) %{
6746   predicate(ReadPrefetchInstr==2);
6747   match(PrefetchRead mem);
6748   ins_cost(125);
6749 
6750   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
6751   opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
6752   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6753   ins_pipe(ialu_mem);
6754 %}
6755 
6756 instruct prefetchw( memory mem ) %{
6757   predicate(AllocatePrefetchInstr==3);
6758   match(PrefetchWrite mem);
6759   ins_cost(125);
6760 
6761   format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
6762   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6763   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6764   ins_pipe(ialu_mem);
6765 %}
6766 
6767 instruct prefetchwNTA( memory mem ) %{
6768   predicate(AllocatePrefetchInstr==0);
6769   match(PrefetchWrite mem);
6770   ins_cost(125);
6771 
6772   format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
6773   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6774   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
6775   ins_pipe(ialu_mem);
6776 %}
6777 
6778 instruct prefetchwT0( memory mem ) %{
6779   predicate(AllocatePrefetchInstr==1);
6780   match(PrefetchWrite mem);
6781   ins_cost(125);
6782 
6783   format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
6784   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6785   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
6786   ins_pipe(ialu_mem);
6787 %}
6788 
6789 instruct prefetchwT2( memory mem ) %{
6790   predicate(AllocatePrefetchInstr==2);
6791   match(PrefetchWrite mem);
6792   ins_cost(125);
6793 
6794   format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
6795   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6796   ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
6797   ins_pipe(ialu_mem);
6798 %}
6799 
6800 //----------Store Instructions-------------------------------------------------
6801 
6802 // Store Byte
6803 instruct storeB(memory mem, rRegI src)
6804 %{
6805   match(Set mem (StoreB mem src));
6806 
6807   ins_cost(125); // XXX
6808   format %{ "movb    $mem, $src\t# byte" %}
6809   opcode(0x88);
6810   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
6811   ins_pipe(ialu_mem_reg);
6812 %}
6813 
6814 // Store Char/Short
6815 instruct storeC(memory mem, rRegI src)
6816 %{
6817   match(Set mem (StoreC mem src));
6818 
6819   ins_cost(125); // XXX
6820   format %{ "movw    $mem, $src\t# char/short" %}
6821   opcode(0x89);
6822   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6823   ins_pipe(ialu_mem_reg);
6824 %}
6825 
6826 // Store Integer
6827 instruct storeI(memory mem, rRegI src)
6828 %{
6829   match(Set mem (StoreI mem src));
6830 
6831   ins_cost(125); // XXX
6832   format %{ "movl    $mem, $src\t# int" %}
6833   opcode(0x89);
6834   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
6835   ins_pipe(ialu_mem_reg);
6836 %}
6837 
6838 // Store Long
6839 instruct storeL(memory mem, rRegL src)
6840 %{
6841   match(Set mem (StoreL mem src));
6842 
6843   ins_cost(125); // XXX
6844   format %{ "movq    $mem, $src\t# long" %}
6845   opcode(0x89);
6846   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6847   ins_pipe(ialu_mem_reg); // XXX
6848 %}
6849 
6850 // Store Pointer
6851 instruct storeP(memory mem, any_RegP src)
6852 %{
6853   match(Set mem (StoreP mem src));
6854 
6855   ins_cost(125); // XXX
6856   format %{ "movq    $mem, $src\t# ptr" %}
6857   opcode(0x89);
6858   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6859   ins_pipe(ialu_mem_reg);
6860 %}
6861 
6862 instruct storeImmP0(memory mem, immP0 zero)
6863 %{
6864   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6865   match(Set mem (StoreP mem zero));
6866 
6867   ins_cost(125); // XXX
6868   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6869   ins_encode %{
6870     __ movq($mem$$Address, r12);
6871   %}
6872   ins_pipe(ialu_mem_reg);
6873 %}
6874 
6875 // Store NULL Pointer, mark word, or other simple pointer constant.
6876 instruct storeImmP(memory mem, immP31 src)
6877 %{
6878   match(Set mem (StoreP mem src));
6879 
6880   ins_cost(150); // XXX
6881   format %{ "movq    $mem, $src\t# ptr" %}
6882   opcode(0xC7); /* C7 /0 */
6883   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6884   ins_pipe(ialu_mem_imm);
6885 %}
6886 
6887 // Store Compressed Pointer
6888 instruct storeN(memory mem, rRegN src)
6889 %{
6890   match(Set mem (StoreN mem src));
6891 
6892   ins_cost(125); // XXX
6893   format %{ "movl    $mem, $src\t# compressed ptr" %}
6894   ins_encode %{
6895     __ movl($mem$$Address, $src$$Register);
6896   %}
6897   ins_pipe(ialu_mem_reg);
6898 %}
6899 
6900 instruct storeImmN0(memory mem, immN0 zero)
6901 %{
6902   predicate(Universe::narrow_oop_base() == NULL);
6903   match(Set mem (StoreN mem zero));
6904 
6905   ins_cost(125); // XXX
6906   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6907   ins_encode %{
6908     __ movl($mem$$Address, r12);
6909   %}
6910   ins_pipe(ialu_mem_reg);
6911 %}
6912 
6913 instruct storeImmN(memory mem, immN src)
6914 %{
6915   match(Set mem (StoreN mem src));
6916 
6917   ins_cost(150); // XXX
6918   format %{ "movl    $mem, $src\t# compressed ptr" %}
6919   ins_encode %{
6920     address con = (address)$src$$constant;
6921     if (con == NULL) {
6922       __ movl($mem$$Address, (int32_t)0);
6923     } else {
6924       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6925     }
6926   %}
6927   ins_pipe(ialu_mem_imm);
6928 %}
6929 
6930 // Store Integer Immediate
6931 instruct storeImmI0(memory mem, immI0 zero)
6932 %{
6933   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6934   match(Set mem (StoreI mem zero));
6935 
6936   ins_cost(125); // XXX
6937   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6938   ins_encode %{
6939     __ movl($mem$$Address, r12);
6940   %}
6941   ins_pipe(ialu_mem_reg);
6942 %}
6943 
6944 instruct storeImmI(memory mem, immI src)
6945 %{
6946   match(Set mem (StoreI mem src));
6947 
6948   ins_cost(150);
6949   format %{ "movl    $mem, $src\t# int" %}
6950   opcode(0xC7); /* C7 /0 */
6951   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6952   ins_pipe(ialu_mem_imm);
6953 %}
6954 
6955 // Store Long Immediate
6956 instruct storeImmL0(memory mem, immL0 zero)
6957 %{
6958   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6959   match(Set mem (StoreL mem zero));
6960 
6961   ins_cost(125); // XXX
6962   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6963   ins_encode %{
6964     __ movq($mem$$Address, r12);
6965   %}
6966   ins_pipe(ialu_mem_reg);
6967 %}
6968 
6969 instruct storeImmL(memory mem, immL32 src)
6970 %{
6971   match(Set mem (StoreL mem src));
6972 
6973   ins_cost(150);
6974   format %{ "movq    $mem, $src\t# long" %}
6975   opcode(0xC7); /* C7 /0 */
6976   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6977   ins_pipe(ialu_mem_imm);
6978 %}
6979 
6980 // Store Short/Char Immediate
6981 instruct storeImmC0(memory mem, immI0 zero)
6982 %{
6983   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6984   match(Set mem (StoreC mem zero));
6985 
6986   ins_cost(125); // XXX
6987   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6988   ins_encode %{
6989     __ movw($mem$$Address, r12);
6990   %}
6991   ins_pipe(ialu_mem_reg);
6992 %}
6993 
6994 instruct storeImmI16(memory mem, immI16 src)
6995 %{
6996   predicate(UseStoreImmI16);
6997   match(Set mem (StoreC mem src));
6998 
6999   ins_cost(150);
7000   format %{ "movw    $mem, $src\t# short/char" %}
7001   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
7002   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
7003   ins_pipe(ialu_mem_imm);
7004 %}
7005 
7006 // Store Byte Immediate
7007 instruct storeImmB0(memory mem, immI0 zero)
7008 %{
7009   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7010   match(Set mem (StoreB mem zero));
7011 
7012   ins_cost(125); // XXX
7013   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
7014   ins_encode %{
7015     __ movb($mem$$Address, r12);
7016   %}
7017   ins_pipe(ialu_mem_reg);
7018 %}
7019 
7020 instruct storeImmB(memory mem, immI8 src)
7021 %{
7022   match(Set mem (StoreB mem src));
7023 
7024   ins_cost(150); // XXX
7025   format %{ "movb    $mem, $src\t# byte" %}
7026   opcode(0xC6); /* C6 /0 */
7027   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7028   ins_pipe(ialu_mem_imm);
7029 %}
7030 
7031 // Store Aligned Packed Byte XMM register to memory
7032 instruct storeA8B(memory mem, regD src) %{
7033   match(Set mem (Store8B mem src));
7034   ins_cost(145);
7035   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7036   ins_encode( movq_st(mem, src));
7037   ins_pipe( pipe_slow );
7038 %}
7039 
7040 // Store Aligned Packed Char/Short XMM register to memory
7041 instruct storeA4C(memory mem, regD src) %{
7042   match(Set mem (Store4C mem src));
7043   ins_cost(145);
7044   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7045   ins_encode( movq_st(mem, src));
7046   ins_pipe( pipe_slow );
7047 %}
7048 
7049 // Store Aligned Packed Integer XMM register to memory
7050 instruct storeA2I(memory mem, regD src) %{
7051   match(Set mem (Store2I mem src));
7052   ins_cost(145);
7053   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7054   ins_encode( movq_st(mem, src));
7055   ins_pipe( pipe_slow );
7056 %}
7057 
7058 // Store CMS card-mark Immediate
7059 instruct storeImmCM0_reg(memory mem, immI0 zero)
7060 %{
7061   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7062   match(Set mem (StoreCM mem zero));
7063 
7064   ins_cost(125); // XXX
7065   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
7066   ins_encode %{
7067     __ movb($mem$$Address, r12);
7068   %}
7069   ins_pipe(ialu_mem_reg);
7070 %}
7071 
7072 instruct storeImmCM0(memory mem, immI0 src)
7073 %{
7074   match(Set mem (StoreCM mem src));
7075 
7076   ins_cost(150); // XXX
7077   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
7078   opcode(0xC6); /* C6 /0 */
7079   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
7080   ins_pipe(ialu_mem_imm);
7081 %}
7082 
7083 // Store Aligned Packed Single Float XMM register to memory
7084 instruct storeA2F(memory mem, regD src) %{
7085   match(Set mem (Store2F mem src));
7086   ins_cost(145);
7087   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7088   ins_encode( movq_st(mem, src));
7089   ins_pipe( pipe_slow );
7090 %}
7091 
7092 // Store Float
7093 instruct storeF(memory mem, regF src)
7094 %{
7095   match(Set mem (StoreF mem src));
7096 
7097   ins_cost(95); // XXX
7098   format %{ "movss   $mem, $src\t# float" %}
7099   opcode(0xF3, 0x0F, 0x11);
7100   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7101   ins_pipe(pipe_slow); // XXX
7102 %}
7103 
7104 // Store immediate Float value (it is faster than store from XMM register)
7105 instruct storeF0(memory mem, immF0 zero)
7106 %{
7107   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7108   match(Set mem (StoreF mem zero));
7109 
7110   ins_cost(25); // XXX
7111   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
7112   ins_encode %{
7113     __ movl($mem$$Address, r12);
7114   %}
7115   ins_pipe(ialu_mem_reg);
7116 %}
7117 
7118 instruct storeF_imm(memory mem, immF src)
7119 %{
7120   match(Set mem (StoreF mem src));
7121 
7122   ins_cost(50);
7123   format %{ "movl    $mem, $src\t# float" %}
7124   opcode(0xC7); /* C7 /0 */
7125   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7126   ins_pipe(ialu_mem_imm);
7127 %}
7128 
7129 // Store Double
7130 instruct storeD(memory mem, regD src)
7131 %{
7132   match(Set mem (StoreD mem src));
7133 
7134   ins_cost(95); // XXX
7135   format %{ "movsd   $mem, $src\t# double" %}
7136   opcode(0xF2, 0x0F, 0x11);
7137   ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
7138   ins_pipe(pipe_slow); // XXX
7139 %}
7140 
7141 // Store immediate double 0.0 (it is faster than store from XMM register)
7142 instruct storeD0_imm(memory mem, immD0 src)
7143 %{
7144   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
7145   match(Set mem (StoreD mem src));
7146 
7147   ins_cost(50);
7148   format %{ "movq    $mem, $src\t# double 0." %}
7149   opcode(0xC7); /* C7 /0 */
7150   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
7151   ins_pipe(ialu_mem_imm);
7152 %}
7153 
7154 instruct storeD0(memory mem, immD0 zero)
7155 %{
7156   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
7157   match(Set mem (StoreD mem zero));
7158 
7159   ins_cost(25); // XXX
7160   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
7161   ins_encode %{
7162     __ movq($mem$$Address, r12);
7163   %}
7164   ins_pipe(ialu_mem_reg);
7165 %}
7166 
7167 instruct storeSSI(stackSlotI dst, rRegI src)
7168 %{
7169   match(Set dst src);
7170 
7171   ins_cost(100);
7172   format %{ "movl    $dst, $src\t# int stk" %}
7173   opcode(0x89);
7174   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7175   ins_pipe( ialu_mem_reg );
7176 %}
7177 
7178 instruct storeSSL(stackSlotL dst, rRegL src)
7179 %{
7180   match(Set dst src);
7181 
7182   ins_cost(100);
7183   format %{ "movq    $dst, $src\t# long stk" %}
7184   opcode(0x89);
7185   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7186   ins_pipe(ialu_mem_reg);
7187 %}
7188 
7189 instruct storeSSP(stackSlotP dst, rRegP src)
7190 %{
7191   match(Set dst src);
7192 
7193   ins_cost(100);
7194   format %{ "movq    $dst, $src\t# ptr stk" %}
7195   opcode(0x89);
7196   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7197   ins_pipe(ialu_mem_reg);
7198 %}
7199 
7200 instruct storeSSF(stackSlotF dst, regF src)
7201 %{
7202   match(Set dst src);
7203 
7204   ins_cost(95); // XXX
7205   format %{ "movss   $dst, $src\t# float stk" %}
7206   opcode(0xF3, 0x0F, 0x11);
7207   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7208   ins_pipe(pipe_slow); // XXX
7209 %}
7210 
7211 instruct storeSSD(stackSlotD dst, regD src)
7212 %{
7213   match(Set dst src);
7214 
7215   ins_cost(95); // XXX
7216   format %{ "movsd   $dst, $src\t# double stk" %}
7217   opcode(0xF2, 0x0F, 0x11);
7218   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
7219   ins_pipe(pipe_slow); // XXX
7220 %}
7221 
7222 //----------BSWAP Instructions-------------------------------------------------
7223 instruct bytes_reverse_int(rRegI dst) %{
7224   match(Set dst (ReverseBytesI dst));
7225 
7226   format %{ "bswapl  $dst" %}
7227   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
7228   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
7229   ins_pipe( ialu_reg );
7230 %}
7231 
7232 instruct bytes_reverse_long(rRegL dst) %{
7233   match(Set dst (ReverseBytesL dst));
7234 
7235   format %{ "bswapq  $dst" %}
7236 
7237   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
7238   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
7239   ins_pipe( ialu_reg);
7240 %}
7241 
7242 instruct bytes_reverse_unsigned_short(rRegI dst) %{
7243   match(Set dst (ReverseBytesUS dst));
7244 
7245   format %{ "bswapl  $dst\n\t" 
7246             "shrl    $dst,16\n\t" %}
7247   ins_encode %{
7248     __ bswapl($dst$$Register);
7249     __ shrl($dst$$Register, 16); 
7250   %}
7251   ins_pipe( ialu_reg );
7252 %}
7253 
7254 instruct bytes_reverse_short(rRegI dst) %{
7255   match(Set dst (ReverseBytesS dst));
7256 
7257   format %{ "bswapl  $dst\n\t" 
7258             "sar     $dst,16\n\t" %}
7259   ins_encode %{
7260     __ bswapl($dst$$Register);
7261     __ sarl($dst$$Register, 16); 
7262   %}
7263   ins_pipe( ialu_reg );
7264 %}
7265 
7266 //---------- Zeros Count Instructions ------------------------------------------
7267 
7268 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7269   predicate(UseCountLeadingZerosInstruction);
7270   match(Set dst (CountLeadingZerosI src));
7271   effect(KILL cr);
7272 
7273   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
7274   ins_encode %{
7275     __ lzcntl($dst$$Register, $src$$Register);
7276   %}
7277   ins_pipe(ialu_reg);
7278 %}
7279 
7280 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
7281   predicate(!UseCountLeadingZerosInstruction);
7282   match(Set dst (CountLeadingZerosI src));
7283   effect(KILL cr);
7284 
7285   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
7286             "jnz     skip\n\t"
7287             "movl    $dst, -1\n"
7288       "skip:\n\t"
7289             "negl    $dst\n\t"
7290             "addl    $dst, 31" %}
7291   ins_encode %{
7292     Register Rdst = $dst$$Register;
7293     Register Rsrc = $src$$Register;
7294     Label skip;
7295     __ bsrl(Rdst, Rsrc);
7296     __ jccb(Assembler::notZero, skip);
7297     __ movl(Rdst, -1);
7298     __ bind(skip);
7299     __ negl(Rdst);
7300     __ addl(Rdst, BitsPerInt - 1);
7301   %}
7302   ins_pipe(ialu_reg);
7303 %}
7304 
7305 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7306   predicate(UseCountLeadingZerosInstruction);
7307   match(Set dst (CountLeadingZerosL src));
7308   effect(KILL cr);
7309 
7310   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
7311   ins_encode %{
7312     __ lzcntq($dst$$Register, $src$$Register);
7313   %}
7314   ins_pipe(ialu_reg);
7315 %}
7316 
7317 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
7318   predicate(!UseCountLeadingZerosInstruction);
7319   match(Set dst (CountLeadingZerosL src));
7320   effect(KILL cr);
7321 
7322   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
7323             "jnz     skip\n\t"
7324             "movl    $dst, -1\n"
7325       "skip:\n\t"
7326             "negl    $dst\n\t"
7327             "addl    $dst, 63" %}
7328   ins_encode %{
7329     Register Rdst = $dst$$Register;
7330     Register Rsrc = $src$$Register;
7331     Label skip;
7332     __ bsrq(Rdst, Rsrc);
7333     __ jccb(Assembler::notZero, skip);
7334     __ movl(Rdst, -1);
7335     __ bind(skip);
7336     __ negl(Rdst);
7337     __ addl(Rdst, BitsPerLong - 1);
7338   %}
7339   ins_pipe(ialu_reg);
7340 %}
7341 
7342 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
7343   match(Set dst (CountTrailingZerosI src));
7344   effect(KILL cr);
7345 
7346   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
7347             "jnz     done\n\t"
7348             "movl    $dst, 32\n"
7349       "done:" %}
7350   ins_encode %{
7351     Register Rdst = $dst$$Register;
7352     Label done;
7353     __ bsfl(Rdst, $src$$Register);
7354     __ jccb(Assembler::notZero, done);
7355     __ movl(Rdst, BitsPerInt);
7356     __ bind(done);
7357   %}
7358   ins_pipe(ialu_reg);
7359 %}
7360 
7361 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
7362   match(Set dst (CountTrailingZerosL src));
7363   effect(KILL cr);
7364 
7365   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
7366             "jnz     done\n\t"
7367             "movl    $dst, 64\n"
7368       "done:" %}
7369   ins_encode %{
7370     Register Rdst = $dst$$Register;
7371     Label done;
7372     __ bsfq(Rdst, $src$$Register);
7373     __ jccb(Assembler::notZero, done);
7374     __ movl(Rdst, BitsPerLong);
7375     __ bind(done);
7376   %}
7377   ins_pipe(ialu_reg);
7378 %}
7379 
7380 
7381 //---------- Population Count Instructions -------------------------------------
7382 
7383 instruct popCountI(rRegI dst, rRegI src) %{
7384   predicate(UsePopCountInstruction);
7385   match(Set dst (PopCountI src));
7386 
7387   format %{ "popcnt  $dst, $src" %}
7388   ins_encode %{
7389     __ popcntl($dst$$Register, $src$$Register);
7390   %}
7391   ins_pipe(ialu_reg);
7392 %}
7393 
7394 instruct popCountI_mem(rRegI dst, memory mem) %{
7395   predicate(UsePopCountInstruction);
7396   match(Set dst (PopCountI (LoadI mem)));
7397 
7398   format %{ "popcnt  $dst, $mem" %}
7399   ins_encode %{
7400     __ popcntl($dst$$Register, $mem$$Address);
7401   %}
7402   ins_pipe(ialu_reg);
7403 %}
7404 
7405 // Note: Long.bitCount(long) returns an int.
7406 instruct popCountL(rRegI dst, rRegL src) %{
7407   predicate(UsePopCountInstruction);
7408   match(Set dst (PopCountL src));
7409 
7410   format %{ "popcnt  $dst, $src" %}
7411   ins_encode %{
7412     __ popcntq($dst$$Register, $src$$Register);
7413   %}
7414   ins_pipe(ialu_reg);
7415 %}
7416 
7417 // Note: Long.bitCount(long) returns an int.
7418 instruct popCountL_mem(rRegI dst, memory mem) %{
7419   predicate(UsePopCountInstruction);
7420   match(Set dst (PopCountL (LoadL mem)));
7421 
7422   format %{ "popcnt  $dst, $mem" %}
7423   ins_encode %{
7424     __ popcntq($dst$$Register, $mem$$Address);
7425   %}
7426   ins_pipe(ialu_reg);
7427 %}
7428 
7429 
7430 //----------MemBar Instructions-----------------------------------------------
7431 // Memory barrier flavors
7432 
7433 instruct membar_acquire()
7434 %{
7435   match(MemBarAcquire);
7436   ins_cost(0);
7437 
7438   size(0);
7439   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7440   ins_encode();
7441   ins_pipe(empty);
7442 %}
7443 
7444 instruct membar_acquire_lock()
7445 %{
7446   match(MemBarAcquire);
7447   predicate(Matcher::prior_fast_lock(n));
7448   ins_cost(0);
7449 
7450   size(0);
7451   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7452   ins_encode();
7453   ins_pipe(empty);
7454 %}
7455 
7456 instruct membar_release()
7457 %{
7458   match(MemBarRelease);
7459   ins_cost(0);
7460 
7461   size(0);
7462   format %{ "MEMBAR-release ! (empty encoding)" %}
7463   ins_encode();
7464   ins_pipe(empty);
7465 %}
7466 
7467 instruct membar_release_lock()
7468 %{
7469   match(MemBarRelease);
7470   predicate(Matcher::post_fast_unlock(n));
7471   ins_cost(0);
7472 
7473   size(0);
7474   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7475   ins_encode();
7476   ins_pipe(empty);
7477 %}
7478 
7479 instruct membar_volatile(rFlagsReg cr) %{
7480   match(MemBarVolatile);
7481   effect(KILL cr);
7482   ins_cost(400);
7483 
7484   format %{ 
7485     $$template
7486     if (os::is_MP()) {
7487       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
7488     } else {
7489       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7490     }
7491   %}
7492   ins_encode %{
7493     __ membar(Assembler::StoreLoad);
7494   %}
7495   ins_pipe(pipe_slow);
7496 %}
7497 
7498 instruct unnecessary_membar_volatile()
7499 %{
7500   match(MemBarVolatile);
7501   predicate(Matcher::post_store_load_barrier(n));
7502   ins_cost(0);
7503 
7504   size(0);
7505   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7506   ins_encode();
7507   ins_pipe(empty);
7508 %}
7509 
7510 //----------Move Instructions--------------------------------------------------
7511 
7512 instruct castX2P(rRegP dst, rRegL src)
7513 %{
7514   match(Set dst (CastX2P src));
7515 
7516   format %{ "movq    $dst, $src\t# long->ptr" %}
7517   ins_encode(enc_copy_wide(dst, src));
7518   ins_pipe(ialu_reg_reg); // XXX
7519 %}
7520 
7521 instruct castP2X(rRegL dst, rRegP src)
7522 %{
7523   match(Set dst (CastP2X src));
7524 
7525   format %{ "movq    $dst, $src\t# ptr -> long" %}
7526   ins_encode(enc_copy_wide(dst, src));
7527   ins_pipe(ialu_reg_reg); // XXX
7528 %}
7529 
7530 
7531 // Convert oop pointer into compressed form
7532 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
7533   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7534   match(Set dst (EncodeP src));
7535   effect(KILL cr);
7536   format %{ "encode_heap_oop $dst,$src" %}
7537   ins_encode %{
7538     Register s = $src$$Register;
7539     Register d = $dst$$Register;
7540     if (s != d) {
7541       __ movq(d, s);
7542     }
7543     __ encode_heap_oop(d);
7544   %}
7545   ins_pipe(ialu_reg_long);
7546 %}
7547 
7548 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
7549   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7550   match(Set dst (EncodeP src));
7551   effect(KILL cr);
7552   format %{ "encode_heap_oop_not_null $dst,$src" %}
7553   ins_encode %{
7554     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7555   %}
7556   ins_pipe(ialu_reg_long);
7557 %}
7558 
7559 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
7560   predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
7561             n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
7562   match(Set dst (DecodeN src));
7563   effect(KILL cr);
7564   format %{ "decode_heap_oop $dst,$src" %}
7565   ins_encode %{
7566     Register s = $src$$Register;
7567     Register d = $dst$$Register;
7568     if (s != d) {
7569       __ movq(d, s);
7570     }
7571     __ decode_heap_oop(d);
7572   %}
7573   ins_pipe(ialu_reg_long);
7574 %}
7575 
7576 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
7577   predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
7578             n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
7579   match(Set dst (DecodeN src));
7580   effect(KILL cr);
7581   format %{ "decode_heap_oop_not_null $dst,$src" %}
7582   ins_encode %{
7583     Register s = $src$$Register;
7584     Register d = $dst$$Register;
7585     if (s != d) {
7586       __ decode_heap_oop_not_null(d, s);
7587     } else {
7588       __ decode_heap_oop_not_null(d);
7589     }
7590   %}
7591   ins_pipe(ialu_reg_long);
7592 %}
7593 
7594 
7595 //----------Conditional Move---------------------------------------------------
7596 // Jump
7597 // dummy instruction for generating temp registers
7598 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
7599   match(Jump (LShiftL switch_val shift));
7600   ins_cost(350);
7601   predicate(false);
7602   effect(TEMP dest);
7603 
7604   format %{ "leaq    $dest, [$constantaddress]\n\t"
7605             "jmp     [$dest + $switch_val << $shift]\n\t" %}
7606   ins_encode %{
7607     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7608     // to do that and the compiler is using that register as one it can allocate.
7609     // So we build it all by hand.
7610     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
7611     // ArrayAddress dispatch(table, index);
7612     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
7613     __ lea($dest$$Register, $constantaddress);
7614     __ jmp(dispatch);
7615   %}
7616   ins_pipe(pipe_jmp);
7617   ins_pc_relative(1);
7618 %}
7619 
7620 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
7621   match(Jump (AddL (LShiftL switch_val shift) offset));
7622   ins_cost(350);
7623   effect(TEMP dest);
7624 
7625   format %{ "leaq    $dest, [$constantaddress]\n\t"
7626             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
7627   ins_encode %{
7628     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7629     // to do that and the compiler is using that register as one it can allocate.
7630     // So we build it all by hand.
7631     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7632     // ArrayAddress dispatch(table, index);
7633     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
7634     __ lea($dest$$Register, $constantaddress);
7635     __ jmp(dispatch);
7636   %}
7637   ins_pipe(pipe_jmp);
7638   ins_pc_relative(1);
7639 %}
7640 
7641 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
7642   match(Jump switch_val);
7643   ins_cost(350);
7644   effect(TEMP dest);
7645 
7646   format %{ "leaq    $dest, [$constantaddress]\n\t"
7647             "jmp     [$dest + $switch_val]\n\t" %}
7648   ins_encode %{
7649     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
7650     // to do that and the compiler is using that register as one it can allocate.
7651     // So we build it all by hand.
7652     // Address index(noreg, switch_reg, Address::times_1);
7653     // ArrayAddress dispatch(table, index);
7654     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
7655     __ lea($dest$$Register, $constantaddress);
7656     __ jmp(dispatch);
7657   %}
7658   ins_pipe(pipe_jmp);
7659   ins_pc_relative(1);
7660 %}
7661 
7662 // Conditional move
7663 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
7664 %{
7665   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7666 
7667   ins_cost(200); // XXX
7668   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7669   opcode(0x0F, 0x40);
7670   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7671   ins_pipe(pipe_cmov_reg);
7672 %}
7673 
7674 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
7675   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7676 
7677   ins_cost(200); // XXX
7678   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7679   opcode(0x0F, 0x40);
7680   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7681   ins_pipe(pipe_cmov_reg);
7682 %}
7683 
7684 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
7685   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7686   ins_cost(200);
7687   expand %{
7688     cmovI_regU(cop, cr, dst, src);
7689   %}
7690 %}
7691 
7692 // Conditional move
7693 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
7694   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7695 
7696   ins_cost(250); // XXX
7697   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
7698   opcode(0x0F, 0x40);
7699   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7700   ins_pipe(pipe_cmov_mem);
7701 %}
7702 
7703 // Conditional move
7704 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
7705 %{
7706   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7707 
7708   ins_cost(250); // XXX
7709   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
7710   opcode(0x0F, 0x40);
7711   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
7712   ins_pipe(pipe_cmov_mem);
7713 %}
7714 
7715 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
7716   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7717   ins_cost(250);
7718   expand %{
7719     cmovI_memU(cop, cr, dst, src);
7720   %}
7721 %}
7722 
7723 // Conditional move
7724 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
7725 %{
7726   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7727 
7728   ins_cost(200); // XXX
7729   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
7730   opcode(0x0F, 0x40);
7731   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7732   ins_pipe(pipe_cmov_reg);
7733 %}
7734 
7735 // Conditional move
7736 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
7737 %{
7738   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7739 
7740   ins_cost(200); // XXX
7741   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
7742   opcode(0x0F, 0x40);
7743   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
7744   ins_pipe(pipe_cmov_reg);
7745 %}
7746 
7747 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
7748   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
7749   ins_cost(200);
7750   expand %{
7751     cmovN_regU(cop, cr, dst, src);
7752   %}
7753 %}
7754 
7755 // Conditional move
7756 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7757 %{
7758   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7759 
7760   ins_cost(200); // XXX
7761   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7762   opcode(0x0F, 0x40);
7763   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7764   ins_pipe(pipe_cmov_reg);  // XXX
7765 %}
7766 
7767 // Conditional move
7768 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7769 %{
7770   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7771 
7772   ins_cost(200); // XXX
7773   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7774   opcode(0x0F, 0x40);
7775   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7776   ins_pipe(pipe_cmov_reg); // XXX
7777 %}
7778 
7779 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7780   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7781   ins_cost(200);
7782   expand %{
7783     cmovP_regU(cop, cr, dst, src);
7784   %}
7785 %}
7786 
7787 // DISABLED: Requires the ADLC to emit a bottom_type call that
7788 // correctly meets the two pointer arguments; one is an incoming
7789 // register but the other is a memory operand.  ALSO appears to
7790 // be buggy with implicit null checks.
7791 //
7792 //// Conditional move
7793 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7794 //%{
7795 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7796 //  ins_cost(250);
7797 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7798 //  opcode(0x0F,0x40);
7799 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7800 //  ins_pipe( pipe_cmov_mem );
7801 //%}
7802 //
7803 //// Conditional move
7804 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7805 //%{
7806 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7807 //  ins_cost(250);
7808 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7809 //  opcode(0x0F,0x40);
7810 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7811 //  ins_pipe( pipe_cmov_mem );
7812 //%}
7813 
7814 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7815 %{
7816   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7817 
7818   ins_cost(200); // XXX
7819   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7820   opcode(0x0F, 0x40);
7821   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7822   ins_pipe(pipe_cmov_reg);  // XXX
7823 %}
7824 
7825 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7826 %{
7827   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7828 
7829   ins_cost(200); // XXX
7830   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7831   opcode(0x0F, 0x40);
7832   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7833   ins_pipe(pipe_cmov_mem);  // XXX
7834 %}
7835 
7836 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7837 %{
7838   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7839 
7840   ins_cost(200); // XXX
7841   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7842   opcode(0x0F, 0x40);
7843   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7844   ins_pipe(pipe_cmov_reg); // XXX
7845 %}
7846 
7847 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7848   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7849   ins_cost(200);
7850   expand %{
7851     cmovL_regU(cop, cr, dst, src);
7852   %}
7853 %}
7854 
7855 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7856 %{
7857   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7858 
7859   ins_cost(200); // XXX
7860   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7861   opcode(0x0F, 0x40);
7862   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7863   ins_pipe(pipe_cmov_mem); // XXX
7864 %}
7865 
7866 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7867   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7868   ins_cost(200);
7869   expand %{
7870     cmovL_memU(cop, cr, dst, src);
7871   %}
7872 %}
7873 
7874 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7875 %{
7876   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7877 
7878   ins_cost(200); // XXX
7879   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7880             "movss     $dst, $src\n"
7881     "skip:" %}
7882   ins_encode(enc_cmovf_branch(cop, dst, src));
7883   ins_pipe(pipe_slow);
7884 %}
7885 
7886 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7887 // %{
7888 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7889 
7890 //   ins_cost(200); // XXX
7891 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7892 //             "movss     $dst, $src\n"
7893 //     "skip:" %}
7894 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7895 //   ins_pipe(pipe_slow);
7896 // %}
7897 
7898 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7899 %{
7900   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7901 
7902   ins_cost(200); // XXX
7903   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7904             "movss     $dst, $src\n"
7905     "skip:" %}
7906   ins_encode(enc_cmovf_branch(cop, dst, src));
7907   ins_pipe(pipe_slow);
7908 %}
7909 
7910 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7911   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7912   ins_cost(200);
7913   expand %{
7914     cmovF_regU(cop, cr, dst, src);
7915   %}
7916 %}
7917 
7918 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7919 %{
7920   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7921 
7922   ins_cost(200); // XXX
7923   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7924             "movsd     $dst, $src\n"
7925     "skip:" %}
7926   ins_encode(enc_cmovd_branch(cop, dst, src));
7927   ins_pipe(pipe_slow);
7928 %}
7929 
7930 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7931 %{
7932   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7933 
7934   ins_cost(200); // XXX
7935   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7936             "movsd     $dst, $src\n"
7937     "skip:" %}
7938   ins_encode(enc_cmovd_branch(cop, dst, src));
7939   ins_pipe(pipe_slow);
7940 %}
7941 
7942 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7943   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7944   ins_cost(200);
7945   expand %{
7946     cmovD_regU(cop, cr, dst, src);
7947   %}
7948 %}
7949 
7950 //----------Arithmetic Instructions--------------------------------------------
7951 //----------Addition Instructions----------------------------------------------
7952 
7953 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7954 %{
7955   match(Set dst (AddI dst src));
7956   effect(KILL cr);
7957 
7958   format %{ "addl    $dst, $src\t# int" %}
7959   opcode(0x03);
7960   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7961   ins_pipe(ialu_reg_reg);
7962 %}
7963 
7964 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7965 %{
7966   match(Set dst (AddI dst src));
7967   effect(KILL cr);
7968 
7969   format %{ "addl    $dst, $src\t# int" %}
7970   opcode(0x81, 0x00); /* /0 id */
7971   ins_encode(OpcSErm(dst, src), Con8or32(src));
7972   ins_pipe( ialu_reg );
7973 %}
7974 
7975 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7976 %{
7977   match(Set dst (AddI dst (LoadI src)));
7978   effect(KILL cr);
7979 
7980   ins_cost(125); // XXX
7981   format %{ "addl    $dst, $src\t# int" %}
7982   opcode(0x03);
7983   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7984   ins_pipe(ialu_reg_mem);
7985 %}
7986 
7987 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7988 %{
7989   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7990   effect(KILL cr);
7991 
7992   ins_cost(150); // XXX
7993   format %{ "addl    $dst, $src\t# int" %}
7994   opcode(0x01); /* Opcode 01 /r */
7995   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7996   ins_pipe(ialu_mem_reg);
7997 %}
7998 
7999 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
8000 %{
8001   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8002   effect(KILL cr);
8003 
8004   ins_cost(125); // XXX
8005   format %{ "addl    $dst, $src\t# int" %}
8006   opcode(0x81); /* Opcode 81 /0 id */
8007   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8008   ins_pipe(ialu_mem_imm);
8009 %}
8010 
8011 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
8012 %{
8013   predicate(UseIncDec);
8014   match(Set dst (AddI dst src));
8015   effect(KILL cr);
8016 
8017   format %{ "incl    $dst\t# int" %}
8018   opcode(0xFF, 0x00); // FF /0
8019   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8020   ins_pipe(ialu_reg);
8021 %}
8022 
8023 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
8024 %{
8025   predicate(UseIncDec);
8026   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8027   effect(KILL cr);
8028 
8029   ins_cost(125); // XXX
8030   format %{ "incl    $dst\t# int" %}
8031   opcode(0xFF); /* Opcode FF /0 */
8032   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
8033   ins_pipe(ialu_mem_imm);
8034 %}
8035 
8036 // XXX why does that use AddI
8037 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
8038 %{
8039   predicate(UseIncDec);
8040   match(Set dst (AddI dst src));
8041   effect(KILL cr);
8042 
8043   format %{ "decl    $dst\t# int" %}
8044   opcode(0xFF, 0x01); // FF /1
8045   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8046   ins_pipe(ialu_reg);
8047 %}
8048 
8049 // XXX why does that use AddI
8050 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
8051 %{
8052   predicate(UseIncDec);
8053   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8054   effect(KILL cr);
8055 
8056   ins_cost(125); // XXX
8057   format %{ "decl    $dst\t# int" %}
8058   opcode(0xFF); /* Opcode FF /1 */
8059   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
8060   ins_pipe(ialu_mem_imm);
8061 %}
8062 
8063 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
8064 %{
8065   match(Set dst (AddI src0 src1));
8066 
8067   ins_cost(110);
8068   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
8069   opcode(0x8D); /* 0x8D /r */
8070   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8071   ins_pipe(ialu_reg_reg);
8072 %}
8073 
8074 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8075 %{
8076   match(Set dst (AddL dst src));
8077   effect(KILL cr);
8078 
8079   format %{ "addq    $dst, $src\t# long" %}
8080   opcode(0x03);
8081   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8082   ins_pipe(ialu_reg_reg);
8083 %}
8084 
8085 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
8086 %{
8087   match(Set dst (AddL dst src));
8088   effect(KILL cr);
8089 
8090   format %{ "addq    $dst, $src\t# long" %}
8091   opcode(0x81, 0x00); /* /0 id */
8092   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8093   ins_pipe( ialu_reg );
8094 %}
8095 
8096 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8097 %{
8098   match(Set dst (AddL dst (LoadL src)));
8099   effect(KILL cr);
8100 
8101   ins_cost(125); // XXX
8102   format %{ "addq    $dst, $src\t# long" %}
8103   opcode(0x03);
8104   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8105   ins_pipe(ialu_reg_mem);
8106 %}
8107 
8108 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8109 %{
8110   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8111   effect(KILL cr);
8112 
8113   ins_cost(150); // XXX
8114   format %{ "addq    $dst, $src\t# long" %}
8115   opcode(0x01); /* Opcode 01 /r */
8116   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8117   ins_pipe(ialu_mem_reg);
8118 %}
8119 
8120 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8121 %{
8122   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8123   effect(KILL cr);
8124 
8125   ins_cost(125); // XXX
8126   format %{ "addq    $dst, $src\t# long" %}
8127   opcode(0x81); /* Opcode 81 /0 id */
8128   ins_encode(REX_mem_wide(dst),
8129              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
8130   ins_pipe(ialu_mem_imm);
8131 %}
8132 
8133 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
8134 %{
8135   predicate(UseIncDec);
8136   match(Set dst (AddL dst src));
8137   effect(KILL cr);
8138 
8139   format %{ "incq    $dst\t# long" %}
8140   opcode(0xFF, 0x00); // FF /0
8141   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8142   ins_pipe(ialu_reg);
8143 %}
8144 
8145 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
8146 %{
8147   predicate(UseIncDec);
8148   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8149   effect(KILL cr);
8150 
8151   ins_cost(125); // XXX
8152   format %{ "incq    $dst\t# long" %}
8153   opcode(0xFF); /* Opcode FF /0 */
8154   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
8155   ins_pipe(ialu_mem_imm);
8156 %}
8157 
8158 // XXX why does that use AddL
8159 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
8160 %{
8161   predicate(UseIncDec);
8162   match(Set dst (AddL dst src));
8163   effect(KILL cr);
8164 
8165   format %{ "decq    $dst\t# long" %}
8166   opcode(0xFF, 0x01); // FF /1
8167   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8168   ins_pipe(ialu_reg);
8169 %}
8170 
8171 // XXX why does that use AddL
8172 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
8173 %{
8174   predicate(UseIncDec);
8175   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
8176   effect(KILL cr);
8177 
8178   ins_cost(125); // XXX
8179   format %{ "decq    $dst\t# long" %}
8180   opcode(0xFF); /* Opcode FF /1 */
8181   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
8182   ins_pipe(ialu_mem_imm);
8183 %}
8184 
8185 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
8186 %{
8187   match(Set dst (AddL src0 src1));
8188 
8189   ins_cost(110);
8190   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
8191   opcode(0x8D); /* 0x8D /r */
8192   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
8193   ins_pipe(ialu_reg_reg);
8194 %}
8195 
8196 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
8197 %{
8198   match(Set dst (AddP dst src));
8199   effect(KILL cr);
8200 
8201   format %{ "addq    $dst, $src\t# ptr" %}
8202   opcode(0x03);
8203   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8204   ins_pipe(ialu_reg_reg);
8205 %}
8206 
8207 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
8208 %{
8209   match(Set dst (AddP dst src));
8210   effect(KILL cr);
8211 
8212   format %{ "addq    $dst, $src\t# ptr" %}
8213   opcode(0x81, 0x00); /* /0 id */
8214   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8215   ins_pipe( ialu_reg );
8216 %}
8217 
8218 // XXX addP mem ops ????
8219 
8220 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
8221 %{
8222   match(Set dst (AddP src0 src1));
8223 
8224   ins_cost(110);
8225   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
8226   opcode(0x8D); /* 0x8D /r */
8227   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
8228   ins_pipe(ialu_reg_reg);
8229 %}
8230 
8231 instruct checkCastPP(rRegP dst)
8232 %{
8233   match(Set dst (CheckCastPP dst));
8234 
8235   size(0);
8236   format %{ "# checkcastPP of $dst" %}
8237   ins_encode(/* empty encoding */);
8238   ins_pipe(empty);
8239 %}
8240 
8241 instruct castPP(rRegP dst)
8242 %{
8243   match(Set dst (CastPP dst));
8244 
8245   size(0);
8246   format %{ "# castPP of $dst" %}
8247   ins_encode(/* empty encoding */);
8248   ins_pipe(empty);
8249 %}
8250 
8251 instruct castII(rRegI dst)
8252 %{
8253   match(Set dst (CastII dst));
8254 
8255   size(0);
8256   format %{ "# castII of $dst" %}
8257   ins_encode(/* empty encoding */);
8258   ins_cost(0);
8259   ins_pipe(empty);
8260 %}
8261 
8262 // LoadP-locked same as a regular LoadP when used with compare-swap
8263 instruct loadPLocked(rRegP dst, memory mem)
8264 %{
8265   match(Set dst (LoadPLocked mem));
8266 
8267   ins_cost(125); // XXX
8268   format %{ "movq    $dst, $mem\t# ptr locked" %}
8269   opcode(0x8B);
8270   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8271   ins_pipe(ialu_reg_mem); // XXX
8272 %}
8273 
8274 // LoadL-locked - same as a regular LoadL when used with compare-swap
8275 instruct loadLLocked(rRegL dst, memory mem)
8276 %{
8277   match(Set dst (LoadLLocked mem));
8278 
8279   ins_cost(125); // XXX
8280   format %{ "movq    $dst, $mem\t# long locked" %}
8281   opcode(0x8B);
8282   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
8283   ins_pipe(ialu_reg_mem); // XXX
8284 %}
8285 
8286 // Conditional-store of the updated heap-top.
8287 // Used during allocation of the shared heap.
8288 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8289 
8290 instruct storePConditional(memory heap_top_ptr,
8291                            rax_RegP oldval, rRegP newval,
8292                            rFlagsReg cr)
8293 %{
8294   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8295  
8296   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
8297             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
8298   opcode(0x0F, 0xB1);
8299   ins_encode(lock_prefix,
8300              REX_reg_mem_wide(newval, heap_top_ptr),
8301              OpcP, OpcS,
8302              reg_mem(newval, heap_top_ptr));
8303   ins_pipe(pipe_cmpxchg);
8304 %}
8305 
8306 // Conditional-store of an int value.
8307 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8308 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
8309 %{
8310   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8311   effect(KILL oldval);
8312 
8313   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8314   opcode(0x0F, 0xB1);
8315   ins_encode(lock_prefix,
8316              REX_reg_mem(newval, mem),
8317              OpcP, OpcS,
8318              reg_mem(newval, mem));
8319   ins_pipe(pipe_cmpxchg);
8320 %}
8321 
8322 // Conditional-store of a long value.
8323 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
8324 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
8325 %{
8326   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8327   effect(KILL oldval);
8328 
8329   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
8330   opcode(0x0F, 0xB1);
8331   ins_encode(lock_prefix,
8332              REX_reg_mem_wide(newval, mem),
8333              OpcP, OpcS,
8334              reg_mem(newval, mem));
8335   ins_pipe(pipe_cmpxchg);
8336 %}
8337 
8338 
8339 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8340 instruct compareAndSwapP(rRegI res,
8341                          memory mem_ptr,
8342                          rax_RegP oldval, rRegP newval,
8343                          rFlagsReg cr)
8344 %{
8345   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8346   effect(KILL cr, KILL oldval);
8347 
8348   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8349             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8350             "sete    $res\n\t"
8351             "movzbl  $res, $res" %}
8352   opcode(0x0F, 0xB1);
8353   ins_encode(lock_prefix,
8354              REX_reg_mem_wide(newval, mem_ptr),
8355              OpcP, OpcS,
8356              reg_mem(newval, mem_ptr),
8357              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8358              REX_reg_breg(res, res), // movzbl
8359              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8360   ins_pipe( pipe_cmpxchg );
8361 %}
8362 
8363 instruct compareAndSwapL(rRegI res,
8364                          memory mem_ptr,
8365                          rax_RegL oldval, rRegL newval,
8366                          rFlagsReg cr)
8367 %{
8368   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8369   effect(KILL cr, KILL oldval);
8370 
8371   format %{ "cmpxchgq $mem_ptr,$newval\t# "
8372             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8373             "sete    $res\n\t"
8374             "movzbl  $res, $res" %}
8375   opcode(0x0F, 0xB1);
8376   ins_encode(lock_prefix,
8377              REX_reg_mem_wide(newval, mem_ptr),
8378              OpcP, OpcS,
8379              reg_mem(newval, mem_ptr),
8380              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8381              REX_reg_breg(res, res), // movzbl
8382              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8383   ins_pipe( pipe_cmpxchg );
8384 %}
8385 
8386 instruct compareAndSwapI(rRegI res,
8387                          memory mem_ptr,
8388                          rax_RegI oldval, rRegI newval,
8389                          rFlagsReg cr)
8390 %{
8391   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8392   effect(KILL cr, KILL oldval);
8393 
8394   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8395             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8396             "sete    $res\n\t"
8397             "movzbl  $res, $res" %}
8398   opcode(0x0F, 0xB1);
8399   ins_encode(lock_prefix,
8400              REX_reg_mem(newval, mem_ptr),
8401              OpcP, OpcS,
8402              reg_mem(newval, mem_ptr),
8403              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8404              REX_reg_breg(res, res), // movzbl
8405              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8406   ins_pipe( pipe_cmpxchg );
8407 %}
8408 
8409 
8410 instruct compareAndSwapN(rRegI res,
8411                           memory mem_ptr,
8412                           rax_RegN oldval, rRegN newval,
8413                           rFlagsReg cr) %{
8414   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
8415   effect(KILL cr, KILL oldval);
8416 
8417   format %{ "cmpxchgl $mem_ptr,$newval\t# "
8418             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
8419             "sete    $res\n\t"
8420             "movzbl  $res, $res" %}
8421   opcode(0x0F, 0xB1);
8422   ins_encode(lock_prefix,
8423              REX_reg_mem(newval, mem_ptr),
8424              OpcP, OpcS,
8425              reg_mem(newval, mem_ptr),
8426              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
8427              REX_reg_breg(res, res), // movzbl
8428              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
8429   ins_pipe( pipe_cmpxchg );
8430 %}
8431 
8432 //----------Subtraction Instructions-------------------------------------------
8433 
8434 // Integer Subtraction Instructions
8435 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8436 %{
8437   match(Set dst (SubI dst src));
8438   effect(KILL cr);
8439 
8440   format %{ "subl    $dst, $src\t# int" %}
8441   opcode(0x2B);
8442   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8443   ins_pipe(ialu_reg_reg);
8444 %}
8445 
8446 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8447 %{
8448   match(Set dst (SubI dst src));
8449   effect(KILL cr);
8450 
8451   format %{ "subl    $dst, $src\t# int" %}
8452   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8453   ins_encode(OpcSErm(dst, src), Con8or32(src));
8454   ins_pipe(ialu_reg);
8455 %}
8456 
8457 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8458 %{
8459   match(Set dst (SubI dst (LoadI src)));
8460   effect(KILL cr);
8461 
8462   ins_cost(125);
8463   format %{ "subl    $dst, $src\t# int" %}
8464   opcode(0x2B);
8465   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8466   ins_pipe(ialu_reg_mem);
8467 %}
8468 
8469 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8470 %{
8471   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8472   effect(KILL cr);
8473 
8474   ins_cost(150);
8475   format %{ "subl    $dst, $src\t# int" %}
8476   opcode(0x29); /* Opcode 29 /r */
8477   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8478   ins_pipe(ialu_mem_reg);
8479 %}
8480 
8481 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8482 %{
8483   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8484   effect(KILL cr);
8485 
8486   ins_cost(125); // XXX
8487   format %{ "subl    $dst, $src\t# int" %}
8488   opcode(0x81); /* Opcode 81 /5 id */
8489   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8490   ins_pipe(ialu_mem_imm);
8491 %}
8492 
8493 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8494 %{
8495   match(Set dst (SubL dst src));
8496   effect(KILL cr);
8497 
8498   format %{ "subq    $dst, $src\t# long" %}
8499   opcode(0x2B);
8500   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8501   ins_pipe(ialu_reg_reg);
8502 %}
8503 
8504 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8505 %{
8506   match(Set dst (SubL dst src));
8507   effect(KILL cr);
8508 
8509   format %{ "subq    $dst, $src\t# long" %}
8510   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8511   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8512   ins_pipe(ialu_reg);
8513 %}
8514 
8515 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8516 %{
8517   match(Set dst (SubL dst (LoadL src)));
8518   effect(KILL cr);
8519 
8520   ins_cost(125);
8521   format %{ "subq    $dst, $src\t# long" %}
8522   opcode(0x2B);
8523   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8524   ins_pipe(ialu_reg_mem);
8525 %}
8526 
8527 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8528 %{
8529   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8530   effect(KILL cr);
8531 
8532   ins_cost(150);
8533   format %{ "subq    $dst, $src\t# long" %}
8534   opcode(0x29); /* Opcode 29 /r */
8535   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8536   ins_pipe(ialu_mem_reg);
8537 %}
8538 
8539 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8540 %{
8541   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8542   effect(KILL cr);
8543 
8544   ins_cost(125); // XXX
8545   format %{ "subq    $dst, $src\t# long" %}
8546   opcode(0x81); /* Opcode 81 /5 id */
8547   ins_encode(REX_mem_wide(dst),
8548              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8549   ins_pipe(ialu_mem_imm);
8550 %}
8551 
8552 // Subtract from a pointer
8553 // XXX hmpf???
8554 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8555 %{
8556   match(Set dst (AddP dst (SubI zero src)));
8557   effect(KILL cr);
8558 
8559   format %{ "subq    $dst, $src\t# ptr - int" %}
8560   opcode(0x2B);
8561   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8562   ins_pipe(ialu_reg_reg);
8563 %}
8564 
8565 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8566 %{
8567   match(Set dst (SubI zero dst));
8568   effect(KILL cr);
8569 
8570   format %{ "negl    $dst\t# int" %}
8571   opcode(0xF7, 0x03);  // Opcode F7 /3
8572   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8573   ins_pipe(ialu_reg);
8574 %}
8575 
8576 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8577 %{
8578   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8579   effect(KILL cr);
8580 
8581   format %{ "negl    $dst\t# int" %}
8582   opcode(0xF7, 0x03);  // Opcode F7 /3
8583   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8584   ins_pipe(ialu_reg);
8585 %}
8586 
8587 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8588 %{
8589   match(Set dst (SubL zero dst));
8590   effect(KILL cr);
8591 
8592   format %{ "negq    $dst\t# long" %}
8593   opcode(0xF7, 0x03);  // Opcode F7 /3
8594   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8595   ins_pipe(ialu_reg);
8596 %}
8597 
8598 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8599 %{
8600   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8601   effect(KILL cr);
8602 
8603   format %{ "negq    $dst\t# long" %}
8604   opcode(0xF7, 0x03);  // Opcode F7 /3
8605   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8606   ins_pipe(ialu_reg);
8607 %}
8608 
8609 
8610 //----------Multiplication/Division Instructions-------------------------------
8611 // Integer Multiplication Instructions
8612 // Multiply Register
8613 
8614 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8615 %{
8616   match(Set dst (MulI dst src));
8617   effect(KILL cr);
8618 
8619   ins_cost(300);
8620   format %{ "imull   $dst, $src\t# int" %}
8621   opcode(0x0F, 0xAF);
8622   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8623   ins_pipe(ialu_reg_reg_alu0);
8624 %}
8625 
8626 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8627 %{
8628   match(Set dst (MulI src imm));
8629   effect(KILL cr);
8630 
8631   ins_cost(300);
8632   format %{ "imull   $dst, $src, $imm\t# int" %}
8633   opcode(0x69); /* 69 /r id */
8634   ins_encode(REX_reg_reg(dst, src),
8635              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8636   ins_pipe(ialu_reg_reg_alu0);
8637 %}
8638 
8639 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8640 %{
8641   match(Set dst (MulI dst (LoadI src)));
8642   effect(KILL cr);
8643 
8644   ins_cost(350);
8645   format %{ "imull   $dst, $src\t# int" %}
8646   opcode(0x0F, 0xAF);
8647   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8648   ins_pipe(ialu_reg_mem_alu0);
8649 %}
8650 
8651 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8652 %{
8653   match(Set dst (MulI (LoadI src) imm));
8654   effect(KILL cr);
8655 
8656   ins_cost(300);
8657   format %{ "imull   $dst, $src, $imm\t# int" %}
8658   opcode(0x69); /* 69 /r id */
8659   ins_encode(REX_reg_mem(dst, src),
8660              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8661   ins_pipe(ialu_reg_mem_alu0);
8662 %}
8663 
8664 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8665 %{
8666   match(Set dst (MulL dst src));
8667   effect(KILL cr);
8668 
8669   ins_cost(300);
8670   format %{ "imulq   $dst, $src\t# long" %}
8671   opcode(0x0F, 0xAF);
8672   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8673   ins_pipe(ialu_reg_reg_alu0);
8674 %}
8675 
8676 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8677 %{
8678   match(Set dst (MulL src imm));
8679   effect(KILL cr);
8680 
8681   ins_cost(300);
8682   format %{ "imulq   $dst, $src, $imm\t# long" %}
8683   opcode(0x69); /* 69 /r id */
8684   ins_encode(REX_reg_reg_wide(dst, src),
8685              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8686   ins_pipe(ialu_reg_reg_alu0);
8687 %}
8688 
8689 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8690 %{
8691   match(Set dst (MulL dst (LoadL src)));
8692   effect(KILL cr);
8693 
8694   ins_cost(350);
8695   format %{ "imulq   $dst, $src\t# long" %}
8696   opcode(0x0F, 0xAF);
8697   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8698   ins_pipe(ialu_reg_mem_alu0);
8699 %}
8700 
8701 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8702 %{
8703   match(Set dst (MulL (LoadL src) imm));
8704   effect(KILL cr);
8705 
8706   ins_cost(300);
8707   format %{ "imulq   $dst, $src, $imm\t# long" %}
8708   opcode(0x69); /* 69 /r id */
8709   ins_encode(REX_reg_mem_wide(dst, src),
8710              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8711   ins_pipe(ialu_reg_mem_alu0);
8712 %}
8713 
8714 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8715 %{
8716   match(Set dst (MulHiL src rax));
8717   effect(USE_KILL rax, KILL cr);
8718 
8719   ins_cost(300);
8720   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8721   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8722   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8723   ins_pipe(ialu_reg_reg_alu0);
8724 %}
8725 
8726 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8727                    rFlagsReg cr)
8728 %{
8729   match(Set rax (DivI rax div));
8730   effect(KILL rdx, KILL cr);
8731 
8732   ins_cost(30*100+10*100); // XXX
8733   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8734             "jne,s   normal\n\t"
8735             "xorl    rdx, rdx\n\t"
8736             "cmpl    $div, -1\n\t"
8737             "je,s    done\n"
8738     "normal: cdql\n\t"
8739             "idivl   $div\n"
8740     "done:"        %}
8741   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8742   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8743   ins_pipe(ialu_reg_reg_alu0);
8744 %}
8745 
8746 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8747                    rFlagsReg cr)
8748 %{
8749   match(Set rax (DivL rax div));
8750   effect(KILL rdx, KILL cr);
8751 
8752   ins_cost(30*100+10*100); // XXX
8753   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8754             "cmpq    rax, rdx\n\t"
8755             "jne,s   normal\n\t"
8756             "xorl    rdx, rdx\n\t"
8757             "cmpq    $div, -1\n\t"
8758             "je,s    done\n"
8759     "normal: cdqq\n\t"
8760             "idivq   $div\n"
8761     "done:"        %}
8762   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8763   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8764   ins_pipe(ialu_reg_reg_alu0);
8765 %}
8766 
8767 // Integer DIVMOD with Register, both quotient and mod results
8768 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8769                              rFlagsReg cr)
8770 %{
8771   match(DivModI rax div);
8772   effect(KILL cr);
8773 
8774   ins_cost(30*100+10*100); // XXX
8775   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8776             "jne,s   normal\n\t"
8777             "xorl    rdx, rdx\n\t"
8778             "cmpl    $div, -1\n\t"
8779             "je,s    done\n"
8780     "normal: cdql\n\t"
8781             "idivl   $div\n"
8782     "done:"        %}
8783   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8784   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8785   ins_pipe(pipe_slow);
8786 %}
8787 
8788 // Long DIVMOD with Register, both quotient and mod results
8789 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8790                              rFlagsReg cr)
8791 %{
8792   match(DivModL rax div);
8793   effect(KILL cr);
8794 
8795   ins_cost(30*100+10*100); // XXX
8796   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8797             "cmpq    rax, rdx\n\t"
8798             "jne,s   normal\n\t"
8799             "xorl    rdx, rdx\n\t"
8800             "cmpq    $div, -1\n\t"
8801             "je,s    done\n"
8802     "normal: cdqq\n\t"
8803             "idivq   $div\n"
8804     "done:"        %}
8805   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8806   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8807   ins_pipe(pipe_slow);
8808 %}
8809 
8810 //----------- DivL-By-Constant-Expansions--------------------------------------
8811 // DivI cases are handled by the compiler
8812 
8813 // Magic constant, reciprocal of 10
8814 instruct loadConL_0x6666666666666667(rRegL dst)
8815 %{
8816   effect(DEF dst);
8817 
8818   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8819   ins_encode(load_immL(dst, 0x6666666666666667));
8820   ins_pipe(ialu_reg);
8821 %}
8822 
8823 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8824 %{
8825   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8826 
8827   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8828   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8829   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8830   ins_pipe(ialu_reg_reg_alu0);
8831 %}
8832 
8833 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8834 %{
8835   effect(USE_DEF dst, KILL cr);
8836 
8837   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8838   opcode(0xC1, 0x7); /* C1 /7 ib */
8839   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8840   ins_pipe(ialu_reg);
8841 %}
8842 
8843 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8844 %{
8845   effect(USE_DEF dst, KILL cr);
8846 
8847   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8848   opcode(0xC1, 0x7); /* C1 /7 ib */
8849   ins_encode(reg_opc_imm_wide(dst, 0x2));
8850   ins_pipe(ialu_reg);
8851 %}
8852 
8853 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8854 %{
8855   match(Set dst (DivL src div));
8856 
8857   ins_cost((5+8)*100);
8858   expand %{
8859     rax_RegL rax;                     // Killed temp
8860     rFlagsReg cr;                     // Killed
8861     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8862     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8863     sarL_rReg_63(src, cr);            // sarq  src, 63
8864     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8865     subL_rReg(dst, src, cr);          // subl  rdx, src
8866   %}
8867 %}
8868 
8869 //-----------------------------------------------------------------------------
8870 
8871 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8872                    rFlagsReg cr)
8873 %{
8874   match(Set rdx (ModI rax div));
8875   effect(KILL rax, KILL cr);
8876 
8877   ins_cost(300); // XXX
8878   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8879             "jne,s   normal\n\t"
8880             "xorl    rdx, rdx\n\t"
8881             "cmpl    $div, -1\n\t"
8882             "je,s    done\n"
8883     "normal: cdql\n\t"
8884             "idivl   $div\n"
8885     "done:"        %}
8886   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8887   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8888   ins_pipe(ialu_reg_reg_alu0);
8889 %}
8890 
8891 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8892                    rFlagsReg cr)
8893 %{
8894   match(Set rdx (ModL rax div));
8895   effect(KILL rax, KILL cr);
8896 
8897   ins_cost(300); // XXX
8898   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8899             "cmpq    rax, rdx\n\t"
8900             "jne,s   normal\n\t"
8901             "xorl    rdx, rdx\n\t"
8902             "cmpq    $div, -1\n\t"
8903             "je,s    done\n"
8904     "normal: cdqq\n\t"
8905             "idivq   $div\n"
8906     "done:"        %}
8907   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8908   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8909   ins_pipe(ialu_reg_reg_alu0);
8910 %}
8911 
8912 // Integer Shift Instructions
8913 // Shift Left by one
8914 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8915 %{
8916   match(Set dst (LShiftI dst shift));
8917   effect(KILL cr);
8918 
8919   format %{ "sall    $dst, $shift" %}
8920   opcode(0xD1, 0x4); /* D1 /4 */
8921   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8922   ins_pipe(ialu_reg);
8923 %}
8924 
8925 // Shift Left by one
8926 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8927 %{
8928   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8929   effect(KILL cr);
8930 
8931   format %{ "sall    $dst, $shift\t" %}
8932   opcode(0xD1, 0x4); /* D1 /4 */
8933   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8934   ins_pipe(ialu_mem_imm);
8935 %}
8936 
8937 // Shift Left by 8-bit immediate
8938 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8939 %{
8940   match(Set dst (LShiftI dst shift));
8941   effect(KILL cr);
8942 
8943   format %{ "sall    $dst, $shift" %}
8944   opcode(0xC1, 0x4); /* C1 /4 ib */
8945   ins_encode(reg_opc_imm(dst, shift));
8946   ins_pipe(ialu_reg);
8947 %}
8948 
8949 // Shift Left by 8-bit immediate
8950 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8951 %{
8952   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8953   effect(KILL cr);
8954 
8955   format %{ "sall    $dst, $shift" %}
8956   opcode(0xC1, 0x4); /* C1 /4 ib */
8957   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8958   ins_pipe(ialu_mem_imm);
8959 %}
8960 
8961 // Shift Left by variable
8962 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8963 %{
8964   match(Set dst (LShiftI dst shift));
8965   effect(KILL cr);
8966 
8967   format %{ "sall    $dst, $shift" %}
8968   opcode(0xD3, 0x4); /* D3 /4 */
8969   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8970   ins_pipe(ialu_reg_reg);
8971 %}
8972 
8973 // Shift Left by variable
8974 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8975 %{
8976   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8977   effect(KILL cr);
8978 
8979   format %{ "sall    $dst, $shift" %}
8980   opcode(0xD3, 0x4); /* D3 /4 */
8981   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8982   ins_pipe(ialu_mem_reg);
8983 %}
8984 
8985 // Arithmetic shift right by one
8986 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8987 %{
8988   match(Set dst (RShiftI dst shift));
8989   effect(KILL cr);
8990 
8991   format %{ "sarl    $dst, $shift" %}
8992   opcode(0xD1, 0x7); /* D1 /7 */
8993   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8994   ins_pipe(ialu_reg);
8995 %}
8996 
8997 // Arithmetic shift right by one
8998 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8999 %{
9000   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9001   effect(KILL cr);
9002 
9003   format %{ "sarl    $dst, $shift" %}
9004   opcode(0xD1, 0x7); /* D1 /7 */
9005   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9006   ins_pipe(ialu_mem_imm);
9007 %}
9008 
9009 // Arithmetic Shift Right by 8-bit immediate
9010 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9011 %{
9012   match(Set dst (RShiftI dst shift));
9013   effect(KILL cr);
9014 
9015   format %{ "sarl    $dst, $shift" %}
9016   opcode(0xC1, 0x7); /* C1 /7 ib */
9017   ins_encode(reg_opc_imm(dst, shift));
9018   ins_pipe(ialu_mem_imm);
9019 %}
9020 
9021 // Arithmetic Shift Right by 8-bit immediate
9022 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9023 %{
9024   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9025   effect(KILL cr);
9026 
9027   format %{ "sarl    $dst, $shift" %}
9028   opcode(0xC1, 0x7); /* C1 /7 ib */
9029   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9030   ins_pipe(ialu_mem_imm);
9031 %}
9032 
9033 // Arithmetic Shift Right by variable
9034 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9035 %{
9036   match(Set dst (RShiftI dst shift));
9037   effect(KILL cr);
9038 
9039   format %{ "sarl    $dst, $shift" %}
9040   opcode(0xD3, 0x7); /* D3 /7 */
9041   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9042   ins_pipe(ialu_reg_reg);
9043 %}
9044 
9045 // Arithmetic Shift Right by variable
9046 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9047 %{
9048   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9049   effect(KILL cr);
9050 
9051   format %{ "sarl    $dst, $shift" %}
9052   opcode(0xD3, 0x7); /* D3 /7 */
9053   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9054   ins_pipe(ialu_mem_reg);
9055 %}
9056 
9057 // Logical shift right by one
9058 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
9059 %{
9060   match(Set dst (URShiftI dst shift));
9061   effect(KILL cr);
9062 
9063   format %{ "shrl    $dst, $shift" %}
9064   opcode(0xD1, 0x5); /* D1 /5 */
9065   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9066   ins_pipe(ialu_reg);
9067 %}
9068 
9069 // Logical shift right by one
9070 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9071 %{
9072   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9073   effect(KILL cr);
9074 
9075   format %{ "shrl    $dst, $shift" %}
9076   opcode(0xD1, 0x5); /* D1 /5 */
9077   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9078   ins_pipe(ialu_mem_imm);
9079 %}
9080 
9081 // Logical Shift Right by 8-bit immediate
9082 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
9083 %{
9084   match(Set dst (URShiftI dst shift));
9085   effect(KILL cr);
9086 
9087   format %{ "shrl    $dst, $shift" %}
9088   opcode(0xC1, 0x5); /* C1 /5 ib */
9089   ins_encode(reg_opc_imm(dst, shift));
9090   ins_pipe(ialu_reg);
9091 %}
9092 
9093 // Logical Shift Right by 8-bit immediate
9094 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9095 %{
9096   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9097   effect(KILL cr);
9098 
9099   format %{ "shrl    $dst, $shift" %}
9100   opcode(0xC1, 0x5); /* C1 /5 ib */
9101   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
9102   ins_pipe(ialu_mem_imm);
9103 %}
9104 
9105 // Logical Shift Right by variable
9106 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
9107 %{
9108   match(Set dst (URShiftI dst shift));
9109   effect(KILL cr);
9110 
9111   format %{ "shrl    $dst, $shift" %}
9112   opcode(0xD3, 0x5); /* D3 /5 */
9113   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9114   ins_pipe(ialu_reg_reg);
9115 %}
9116 
9117 // Logical Shift Right by variable
9118 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9119 %{
9120   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
9121   effect(KILL cr);
9122 
9123   format %{ "shrl    $dst, $shift" %}
9124   opcode(0xD3, 0x5); /* D3 /5 */
9125   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
9126   ins_pipe(ialu_mem_reg);
9127 %}
9128 
9129 // Long Shift Instructions
9130 // Shift Left by one
9131 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9132 %{
9133   match(Set dst (LShiftL dst shift));
9134   effect(KILL cr);
9135 
9136   format %{ "salq    $dst, $shift" %}
9137   opcode(0xD1, 0x4); /* D1 /4 */
9138   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9139   ins_pipe(ialu_reg);
9140 %}
9141 
9142 // Shift Left by one
9143 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9144 %{
9145   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9146   effect(KILL cr);
9147 
9148   format %{ "salq    $dst, $shift" %}
9149   opcode(0xD1, 0x4); /* D1 /4 */
9150   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9151   ins_pipe(ialu_mem_imm);
9152 %}
9153 
9154 // Shift Left by 8-bit immediate
9155 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9156 %{
9157   match(Set dst (LShiftL dst shift));
9158   effect(KILL cr);
9159 
9160   format %{ "salq    $dst, $shift" %}
9161   opcode(0xC1, 0x4); /* C1 /4 ib */
9162   ins_encode(reg_opc_imm_wide(dst, shift));
9163   ins_pipe(ialu_reg);
9164 %}
9165 
9166 // Shift Left by 8-bit immediate
9167 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9168 %{
9169   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9170   effect(KILL cr);
9171 
9172   format %{ "salq    $dst, $shift" %}
9173   opcode(0xC1, 0x4); /* C1 /4 ib */
9174   ins_encode(REX_mem_wide(dst), OpcP,
9175              RM_opc_mem(secondary, dst), Con8or32(shift));
9176   ins_pipe(ialu_mem_imm);
9177 %}
9178 
9179 // Shift Left by variable
9180 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9181 %{
9182   match(Set dst (LShiftL dst shift));
9183   effect(KILL cr);
9184 
9185   format %{ "salq    $dst, $shift" %}
9186   opcode(0xD3, 0x4); /* D3 /4 */
9187   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9188   ins_pipe(ialu_reg_reg);
9189 %}
9190 
9191 // Shift Left by variable
9192 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9193 %{
9194   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
9195   effect(KILL cr);
9196 
9197   format %{ "salq    $dst, $shift" %}
9198   opcode(0xD3, 0x4); /* D3 /4 */
9199   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9200   ins_pipe(ialu_mem_reg);
9201 %}
9202 
9203 // Arithmetic shift right by one
9204 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9205 %{
9206   match(Set dst (RShiftL dst shift));
9207   effect(KILL cr);
9208 
9209   format %{ "sarq    $dst, $shift" %}
9210   opcode(0xD1, 0x7); /* D1 /7 */
9211   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9212   ins_pipe(ialu_reg);
9213 %}
9214 
9215 // Arithmetic shift right by one
9216 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9217 %{
9218   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9219   effect(KILL cr);
9220 
9221   format %{ "sarq    $dst, $shift" %}
9222   opcode(0xD1, 0x7); /* D1 /7 */
9223   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9224   ins_pipe(ialu_mem_imm);
9225 %}
9226 
9227 // Arithmetic Shift Right by 8-bit immediate
9228 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9229 %{
9230   match(Set dst (RShiftL dst shift));
9231   effect(KILL cr);
9232 
9233   format %{ "sarq    $dst, $shift" %}
9234   opcode(0xC1, 0x7); /* C1 /7 ib */
9235   ins_encode(reg_opc_imm_wide(dst, shift));
9236   ins_pipe(ialu_mem_imm);
9237 %}
9238 
9239 // Arithmetic Shift Right by 8-bit immediate
9240 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9241 %{
9242   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9243   effect(KILL cr);
9244 
9245   format %{ "sarq    $dst, $shift" %}
9246   opcode(0xC1, 0x7); /* C1 /7 ib */
9247   ins_encode(REX_mem_wide(dst), OpcP,
9248              RM_opc_mem(secondary, dst), Con8or32(shift));
9249   ins_pipe(ialu_mem_imm);
9250 %}
9251 
9252 // Arithmetic Shift Right by variable
9253 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9254 %{
9255   match(Set dst (RShiftL dst shift));
9256   effect(KILL cr);
9257 
9258   format %{ "sarq    $dst, $shift" %}
9259   opcode(0xD3, 0x7); /* D3 /7 */
9260   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9261   ins_pipe(ialu_reg_reg);
9262 %}
9263 
9264 // Arithmetic Shift Right by variable
9265 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9266 %{
9267   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
9268   effect(KILL cr);
9269 
9270   format %{ "sarq    $dst, $shift" %}
9271   opcode(0xD3, 0x7); /* D3 /7 */
9272   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9273   ins_pipe(ialu_mem_reg);
9274 %}
9275 
9276 // Logical shift right by one
9277 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
9278 %{
9279   match(Set dst (URShiftL dst shift));
9280   effect(KILL cr);
9281 
9282   format %{ "shrq    $dst, $shift" %}
9283   opcode(0xD1, 0x5); /* D1 /5 */
9284   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
9285   ins_pipe(ialu_reg);
9286 %}
9287 
9288 // Logical shift right by one
9289 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
9290 %{
9291   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9292   effect(KILL cr);
9293 
9294   format %{ "shrq    $dst, $shift" %}
9295   opcode(0xD1, 0x5); /* D1 /5 */
9296   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9297   ins_pipe(ialu_mem_imm);
9298 %}
9299 
9300 // Logical Shift Right by 8-bit immediate
9301 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
9302 %{
9303   match(Set dst (URShiftL dst shift));
9304   effect(KILL cr);
9305 
9306   format %{ "shrq    $dst, $shift" %}
9307   opcode(0xC1, 0x5); /* C1 /5 ib */
9308   ins_encode(reg_opc_imm_wide(dst, shift));
9309   ins_pipe(ialu_reg);
9310 %}
9311 
9312 
9313 // Logical Shift Right by 8-bit immediate
9314 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
9315 %{
9316   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9317   effect(KILL cr);
9318 
9319   format %{ "shrq    $dst, $shift" %}
9320   opcode(0xC1, 0x5); /* C1 /5 ib */
9321   ins_encode(REX_mem_wide(dst), OpcP,
9322              RM_opc_mem(secondary, dst), Con8or32(shift));
9323   ins_pipe(ialu_mem_imm);
9324 %}
9325 
9326 // Logical Shift Right by variable
9327 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
9328 %{
9329   match(Set dst (URShiftL dst shift));
9330   effect(KILL cr);
9331 
9332   format %{ "shrq    $dst, $shift" %}
9333   opcode(0xD3, 0x5); /* D3 /5 */
9334   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9335   ins_pipe(ialu_reg_reg);
9336 %}
9337 
9338 // Logical Shift Right by variable
9339 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
9340 %{
9341   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
9342   effect(KILL cr);
9343 
9344   format %{ "shrq    $dst, $shift" %}
9345   opcode(0xD3, 0x5); /* D3 /5 */
9346   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
9347   ins_pipe(ialu_mem_reg);
9348 %}
9349 
9350 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9351 // This idiom is used by the compiler for the i2b bytecode.
9352 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
9353 %{
9354   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9355 
9356   format %{ "movsbl  $dst, $src\t# i2b" %}
9357   opcode(0x0F, 0xBE);
9358   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9359   ins_pipe(ialu_reg_reg);
9360 %}
9361 
9362 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9363 // This idiom is used by the compiler the i2s bytecode.
9364 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
9365 %{
9366   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9367 
9368   format %{ "movswl  $dst, $src\t# i2s" %}
9369   opcode(0x0F, 0xBF);
9370   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9371   ins_pipe(ialu_reg_reg);
9372 %}
9373 
9374 // ROL/ROR instructions
9375 
9376 // ROL expand
9377 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
9378   effect(KILL cr, USE_DEF dst);
9379 
9380   format %{ "roll    $dst" %}
9381   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9382   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9383   ins_pipe(ialu_reg);
9384 %}
9385 
9386 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
9387   effect(USE_DEF dst, USE shift, KILL cr);
9388 
9389   format %{ "roll    $dst, $shift" %}
9390   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9391   ins_encode( reg_opc_imm(dst, shift) );
9392   ins_pipe(ialu_reg);
9393 %}
9394 
9395 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9396 %{
9397   effect(USE_DEF dst, USE shift, KILL cr);
9398 
9399   format %{ "roll    $dst, $shift" %}
9400   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9401   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9402   ins_pipe(ialu_reg_reg);
9403 %}
9404 // end of ROL expand
9405 
9406 // Rotate Left by one
9407 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9408 %{
9409   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9410 
9411   expand %{
9412     rolI_rReg_imm1(dst, cr);
9413   %}
9414 %}
9415 
9416 // Rotate Left by 8-bit immediate
9417 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9418 %{
9419   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9420   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9421 
9422   expand %{
9423     rolI_rReg_imm8(dst, lshift, cr);
9424   %}
9425 %}
9426 
9427 // Rotate Left by variable
9428 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9429 %{
9430   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9431 
9432   expand %{
9433     rolI_rReg_CL(dst, shift, cr);
9434   %}
9435 %}
9436 
9437 // Rotate Left by variable
9438 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9439 %{
9440   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9441 
9442   expand %{
9443     rolI_rReg_CL(dst, shift, cr);
9444   %}
9445 %}
9446 
9447 // ROR expand
9448 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9449 %{
9450   effect(USE_DEF dst, KILL cr);
9451 
9452   format %{ "rorl    $dst" %}
9453   opcode(0xD1, 0x1); /* D1 /1 */
9454   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9455   ins_pipe(ialu_reg);
9456 %}
9457 
9458 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9459 %{
9460   effect(USE_DEF dst, USE shift, KILL cr);
9461 
9462   format %{ "rorl    $dst, $shift" %}
9463   opcode(0xC1, 0x1); /* C1 /1 ib */
9464   ins_encode(reg_opc_imm(dst, shift));
9465   ins_pipe(ialu_reg);
9466 %}
9467 
9468 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9469 %{
9470   effect(USE_DEF dst, USE shift, KILL cr);
9471 
9472   format %{ "rorl    $dst, $shift" %}
9473   opcode(0xD3, 0x1); /* D3 /1 */
9474   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9475   ins_pipe(ialu_reg_reg);
9476 %}
9477 // end of ROR expand
9478 
9479 // Rotate Right by one
9480 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9481 %{
9482   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9483 
9484   expand %{
9485     rorI_rReg_imm1(dst, cr);
9486   %}
9487 %}
9488 
9489 // Rotate Right by 8-bit immediate
9490 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9491 %{
9492   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9493   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9494 
9495   expand %{
9496     rorI_rReg_imm8(dst, rshift, cr);
9497   %}
9498 %}
9499 
9500 // Rotate Right by variable
9501 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9502 %{
9503   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9504 
9505   expand %{
9506     rorI_rReg_CL(dst, shift, cr);
9507   %}
9508 %}
9509 
9510 // Rotate Right by variable
9511 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9512 %{
9513   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9514 
9515   expand %{
9516     rorI_rReg_CL(dst, shift, cr);
9517   %}
9518 %}
9519 
9520 // for long rotate
9521 // ROL expand
9522 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9523   effect(USE_DEF dst, KILL cr);
9524 
9525   format %{ "rolq    $dst" %}
9526   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9527   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9528   ins_pipe(ialu_reg);
9529 %}
9530 
9531 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9532   effect(USE_DEF dst, USE shift, KILL cr);
9533 
9534   format %{ "rolq    $dst, $shift" %}
9535   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9536   ins_encode( reg_opc_imm_wide(dst, shift) );
9537   ins_pipe(ialu_reg);
9538 %}
9539 
9540 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9541 %{
9542   effect(USE_DEF dst, USE shift, KILL cr);
9543 
9544   format %{ "rolq    $dst, $shift" %}
9545   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9546   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9547   ins_pipe(ialu_reg_reg);
9548 %}
9549 // end of ROL expand
9550 
9551 // Rotate Left by one
9552 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9553 %{
9554   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9555 
9556   expand %{
9557     rolL_rReg_imm1(dst, cr);
9558   %}
9559 %}
9560 
9561 // Rotate Left by 8-bit immediate
9562 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9563 %{
9564   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9565   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9566 
9567   expand %{
9568     rolL_rReg_imm8(dst, lshift, cr);
9569   %}
9570 %}
9571 
9572 // Rotate Left by variable
9573 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9574 %{
9575   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9576 
9577   expand %{
9578     rolL_rReg_CL(dst, shift, cr);
9579   %}
9580 %}
9581 
9582 // Rotate Left by variable
9583 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9584 %{
9585   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9586 
9587   expand %{
9588     rolL_rReg_CL(dst, shift, cr);
9589   %}
9590 %}
9591 
9592 // ROR expand
9593 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9594 %{
9595   effect(USE_DEF dst, KILL cr);
9596 
9597   format %{ "rorq    $dst" %}
9598   opcode(0xD1, 0x1); /* D1 /1 */
9599   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9600   ins_pipe(ialu_reg);
9601 %}
9602 
9603 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9604 %{
9605   effect(USE_DEF dst, USE shift, KILL cr);
9606 
9607   format %{ "rorq    $dst, $shift" %}
9608   opcode(0xC1, 0x1); /* C1 /1 ib */
9609   ins_encode(reg_opc_imm_wide(dst, shift));
9610   ins_pipe(ialu_reg);
9611 %}
9612 
9613 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9614 %{
9615   effect(USE_DEF dst, USE shift, KILL cr);
9616 
9617   format %{ "rorq    $dst, $shift" %}
9618   opcode(0xD3, 0x1); /* D3 /1 */
9619   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9620   ins_pipe(ialu_reg_reg);
9621 %}
9622 // end of ROR expand
9623 
9624 // Rotate Right by one
9625 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9626 %{
9627   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9628 
9629   expand %{
9630     rorL_rReg_imm1(dst, cr);
9631   %}
9632 %}
9633 
9634 // Rotate Right by 8-bit immediate
9635 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9636 %{
9637   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9638   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9639 
9640   expand %{
9641     rorL_rReg_imm8(dst, rshift, cr);
9642   %}
9643 %}
9644 
9645 // Rotate Right by variable
9646 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9647 %{
9648   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9649 
9650   expand %{
9651     rorL_rReg_CL(dst, shift, cr);
9652   %}
9653 %}
9654 
9655 // Rotate Right by variable
9656 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9657 %{
9658   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9659 
9660   expand %{
9661     rorL_rReg_CL(dst, shift, cr);
9662   %}
9663 %}
9664 
9665 // Logical Instructions
9666 
9667 // Integer Logical Instructions
9668 
9669 // And Instructions
9670 // And Register with Register
9671 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9672 %{
9673   match(Set dst (AndI dst src));
9674   effect(KILL cr);
9675 
9676   format %{ "andl    $dst, $src\t# int" %}
9677   opcode(0x23);
9678   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9679   ins_pipe(ialu_reg_reg);
9680 %}
9681 
9682 // And Register with Immediate 255
9683 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9684 %{
9685   match(Set dst (AndI dst src));
9686 
9687   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9688   opcode(0x0F, 0xB6);
9689   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9690   ins_pipe(ialu_reg);
9691 %}
9692 
9693 // And Register with Immediate 255 and promote to long
9694 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9695 %{
9696   match(Set dst (ConvI2L (AndI src mask)));
9697 
9698   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9699   opcode(0x0F, 0xB6);
9700   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9701   ins_pipe(ialu_reg);
9702 %}
9703 
9704 // And Register with Immediate 65535
9705 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9706 %{
9707   match(Set dst (AndI dst src));
9708 
9709   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9710   opcode(0x0F, 0xB7);
9711   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9712   ins_pipe(ialu_reg);
9713 %}
9714 
9715 // And Register with Immediate 65535 and promote to long
9716 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9717 %{
9718   match(Set dst (ConvI2L (AndI src mask)));
9719 
9720   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9721   opcode(0x0F, 0xB7);
9722   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9723   ins_pipe(ialu_reg);
9724 %}
9725 
9726 // And Register with Immediate
9727 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9728 %{
9729   match(Set dst (AndI dst src));
9730   effect(KILL cr);
9731 
9732   format %{ "andl    $dst, $src\t# int" %}
9733   opcode(0x81, 0x04); /* Opcode 81 /4 */
9734   ins_encode(OpcSErm(dst, src), Con8or32(src));
9735   ins_pipe(ialu_reg);
9736 %}
9737 
9738 // And Register with Memory
9739 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9740 %{
9741   match(Set dst (AndI dst (LoadI src)));
9742   effect(KILL cr);
9743 
9744   ins_cost(125);
9745   format %{ "andl    $dst, $src\t# int" %}
9746   opcode(0x23);
9747   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9748   ins_pipe(ialu_reg_mem);
9749 %}
9750 
9751 // And Memory with Register
9752 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9753 %{
9754   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9755   effect(KILL cr);
9756 
9757   ins_cost(150);
9758   format %{ "andl    $dst, $src\t# int" %}
9759   opcode(0x21); /* Opcode 21 /r */
9760   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9761   ins_pipe(ialu_mem_reg);
9762 %}
9763 
9764 // And Memory with Immediate
9765 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9766 %{
9767   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9768   effect(KILL cr);
9769 
9770   ins_cost(125);
9771   format %{ "andl    $dst, $src\t# int" %}
9772   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9773   ins_encode(REX_mem(dst), OpcSE(src),
9774              RM_opc_mem(secondary, dst), Con8or32(src));
9775   ins_pipe(ialu_mem_imm);
9776 %}
9777 
9778 // Or Instructions
9779 // Or Register with Register
9780 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9781 %{
9782   match(Set dst (OrI dst src));
9783   effect(KILL cr);
9784 
9785   format %{ "orl     $dst, $src\t# int" %}
9786   opcode(0x0B);
9787   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9788   ins_pipe(ialu_reg_reg);
9789 %}
9790 
9791 // Or Register with Immediate
9792 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9793 %{
9794   match(Set dst (OrI dst src));
9795   effect(KILL cr);
9796 
9797   format %{ "orl     $dst, $src\t# int" %}
9798   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9799   ins_encode(OpcSErm(dst, src), Con8or32(src));
9800   ins_pipe(ialu_reg);
9801 %}
9802 
9803 // Or Register with Memory
9804 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9805 %{
9806   match(Set dst (OrI dst (LoadI src)));
9807   effect(KILL cr);
9808 
9809   ins_cost(125);
9810   format %{ "orl     $dst, $src\t# int" %}
9811   opcode(0x0B);
9812   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9813   ins_pipe(ialu_reg_mem);
9814 %}
9815 
9816 // Or Memory with Register
9817 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9818 %{
9819   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9820   effect(KILL cr);
9821 
9822   ins_cost(150);
9823   format %{ "orl     $dst, $src\t# int" %}
9824   opcode(0x09); /* Opcode 09 /r */
9825   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9826   ins_pipe(ialu_mem_reg);
9827 %}
9828 
9829 // Or Memory with Immediate
9830 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9831 %{
9832   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9833   effect(KILL cr);
9834 
9835   ins_cost(125);
9836   format %{ "orl     $dst, $src\t# int" %}
9837   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9838   ins_encode(REX_mem(dst), OpcSE(src),
9839              RM_opc_mem(secondary, dst), Con8or32(src));
9840   ins_pipe(ialu_mem_imm);
9841 %}
9842 
9843 // Xor Instructions
9844 // Xor Register with Register
9845 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9846 %{
9847   match(Set dst (XorI dst src));
9848   effect(KILL cr);
9849 
9850   format %{ "xorl    $dst, $src\t# int" %}
9851   opcode(0x33);
9852   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9853   ins_pipe(ialu_reg_reg);
9854 %}
9855 
9856 // Xor Register with Immediate -1
9857 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9858   match(Set dst (XorI dst imm));  
9859 
9860   format %{ "not    $dst" %}  
9861   ins_encode %{
9862      __ notl($dst$$Register);
9863   %}
9864   ins_pipe(ialu_reg);
9865 %}
9866 
9867 // Xor Register with Immediate
9868 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9869 %{
9870   match(Set dst (XorI dst src));
9871   effect(KILL cr);
9872 
9873   format %{ "xorl    $dst, $src\t# int" %}
9874   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9875   ins_encode(OpcSErm(dst, src), Con8or32(src));
9876   ins_pipe(ialu_reg);
9877 %}
9878 
9879 // Xor Register with Memory
9880 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9881 %{
9882   match(Set dst (XorI dst (LoadI src)));
9883   effect(KILL cr);
9884 
9885   ins_cost(125);
9886   format %{ "xorl    $dst, $src\t# int" %}
9887   opcode(0x33);
9888   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9889   ins_pipe(ialu_reg_mem);
9890 %}
9891 
9892 // Xor Memory with Register
9893 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9894 %{
9895   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9896   effect(KILL cr);
9897 
9898   ins_cost(150);
9899   format %{ "xorl    $dst, $src\t# int" %}
9900   opcode(0x31); /* Opcode 31 /r */
9901   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9902   ins_pipe(ialu_mem_reg);
9903 %}
9904 
9905 // Xor Memory with Immediate
9906 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9907 %{
9908   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9909   effect(KILL cr);
9910 
9911   ins_cost(125);
9912   format %{ "xorl    $dst, $src\t# int" %}
9913   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9914   ins_encode(REX_mem(dst), OpcSE(src),
9915              RM_opc_mem(secondary, dst), Con8or32(src));
9916   ins_pipe(ialu_mem_imm);
9917 %}
9918 
9919 
9920 // Long Logical Instructions
9921 
9922 // And Instructions
9923 // And Register with Register
9924 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9925 %{
9926   match(Set dst (AndL dst src));
9927   effect(KILL cr);
9928 
9929   format %{ "andq    $dst, $src\t# long" %}
9930   opcode(0x23);
9931   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9932   ins_pipe(ialu_reg_reg);
9933 %}
9934 
9935 // And Register with Immediate 255
9936 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9937 %{
9938   match(Set dst (AndL dst src));
9939 
9940   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9941   opcode(0x0F, 0xB6);
9942   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9943   ins_pipe(ialu_reg);
9944 %}
9945 
9946 // And Register with Immediate 65535
9947 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9948 %{
9949   match(Set dst (AndL dst src));
9950 
9951   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9952   opcode(0x0F, 0xB7);
9953   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9954   ins_pipe(ialu_reg);
9955 %}
9956 
9957 // And Register with Immediate
9958 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9959 %{
9960   match(Set dst (AndL dst src));
9961   effect(KILL cr);
9962 
9963   format %{ "andq    $dst, $src\t# long" %}
9964   opcode(0x81, 0x04); /* Opcode 81 /4 */
9965   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9966   ins_pipe(ialu_reg);
9967 %}
9968 
9969 // And Register with Memory
9970 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9971 %{
9972   match(Set dst (AndL dst (LoadL src)));
9973   effect(KILL cr);
9974 
9975   ins_cost(125);
9976   format %{ "andq    $dst, $src\t# long" %}
9977   opcode(0x23);
9978   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9979   ins_pipe(ialu_reg_mem);
9980 %}
9981 
9982 // And Memory with Register
9983 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9984 %{
9985   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9986   effect(KILL cr);
9987 
9988   ins_cost(150);
9989   format %{ "andq    $dst, $src\t# long" %}
9990   opcode(0x21); /* Opcode 21 /r */
9991   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9992   ins_pipe(ialu_mem_reg);
9993 %}
9994 
9995 // And Memory with Immediate
9996 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9997 %{
9998   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9999   effect(KILL cr);
10000 
10001   ins_cost(125);
10002   format %{ "andq    $dst, $src\t# long" %}
10003   opcode(0x81, 0x4); /* Opcode 81 /4 id */
10004   ins_encode(REX_mem_wide(dst), OpcSE(src),
10005              RM_opc_mem(secondary, dst), Con8or32(src));
10006   ins_pipe(ialu_mem_imm);
10007 %}
10008 
10009 // Or Instructions
10010 // Or Register with Register
10011 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10012 %{
10013   match(Set dst (OrL dst src));
10014   effect(KILL cr);
10015 
10016   format %{ "orq     $dst, $src\t# long" %}
10017   opcode(0x0B);
10018   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10019   ins_pipe(ialu_reg_reg);
10020 %}
10021 
10022 // Use any_RegP to match R15 (TLS register) without spilling.
10023 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10024   match(Set dst (OrL dst (CastP2X src)));
10025   effect(KILL cr);
10026 
10027   format %{ "orq     $dst, $src\t# long" %}
10028   opcode(0x0B);
10029   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10030   ins_pipe(ialu_reg_reg);
10031 %}
10032 
10033 
10034 // Or Register with Immediate
10035 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10036 %{
10037   match(Set dst (OrL dst src));
10038   effect(KILL cr);
10039 
10040   format %{ "orq     $dst, $src\t# long" %}
10041   opcode(0x81, 0x01); /* Opcode 81 /1 id */
10042   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10043   ins_pipe(ialu_reg);
10044 %}
10045 
10046 // Or Register with Memory
10047 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10048 %{
10049   match(Set dst (OrL dst (LoadL src)));
10050   effect(KILL cr);
10051 
10052   ins_cost(125);
10053   format %{ "orq     $dst, $src\t# long" %}
10054   opcode(0x0B);
10055   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10056   ins_pipe(ialu_reg_mem);
10057 %}
10058 
10059 // Or Memory with Register
10060 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10061 %{
10062   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10063   effect(KILL cr);
10064 
10065   ins_cost(150);
10066   format %{ "orq     $dst, $src\t# long" %}
10067   opcode(0x09); /* Opcode 09 /r */
10068   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10069   ins_pipe(ialu_mem_reg);
10070 %}
10071 
10072 // Or Memory with Immediate
10073 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10074 %{
10075   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10076   effect(KILL cr);
10077 
10078   ins_cost(125);
10079   format %{ "orq     $dst, $src\t# long" %}
10080   opcode(0x81, 0x1); /* Opcode 81 /1 id */
10081   ins_encode(REX_mem_wide(dst), OpcSE(src),
10082              RM_opc_mem(secondary, dst), Con8or32(src));
10083   ins_pipe(ialu_mem_imm);
10084 %}
10085 
10086 // Xor Instructions
10087 // Xor Register with Register
10088 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10089 %{
10090   match(Set dst (XorL dst src));
10091   effect(KILL cr);
10092 
10093   format %{ "xorq    $dst, $src\t# long" %}
10094   opcode(0x33);
10095   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
10096   ins_pipe(ialu_reg_reg);
10097 %}
10098 
10099 // Xor Register with Immediate -1
10100 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10101   match(Set dst (XorL dst imm));  
10102 
10103   format %{ "notq   $dst" %}  
10104   ins_encode %{
10105      __ notq($dst$$Register);
10106   %}
10107   ins_pipe(ialu_reg);
10108 %}
10109 
10110 // Xor Register with Immediate
10111 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10112 %{
10113   match(Set dst (XorL dst src));
10114   effect(KILL cr);
10115 
10116   format %{ "xorq    $dst, $src\t# long" %}
10117   opcode(0x81, 0x06); /* Opcode 81 /6 id */
10118   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
10119   ins_pipe(ialu_reg);
10120 %}
10121 
10122 // Xor Register with Memory
10123 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10124 %{
10125   match(Set dst (XorL dst (LoadL src)));
10126   effect(KILL cr);
10127 
10128   ins_cost(125);
10129   format %{ "xorq    $dst, $src\t# long" %}
10130   opcode(0x33);
10131   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
10132   ins_pipe(ialu_reg_mem);
10133 %}
10134 
10135 // Xor Memory with Register
10136 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10137 %{
10138   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10139   effect(KILL cr);
10140 
10141   ins_cost(150);
10142   format %{ "xorq    $dst, $src\t# long" %}
10143   opcode(0x31); /* Opcode 31 /r */
10144   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
10145   ins_pipe(ialu_mem_reg);
10146 %}
10147 
10148 // Xor Memory with Immediate
10149 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10150 %{
10151   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10152   effect(KILL cr);
10153 
10154   ins_cost(125);
10155   format %{ "xorq    $dst, $src\t# long" %}
10156   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10157   ins_encode(REX_mem_wide(dst), OpcSE(src),
10158              RM_opc_mem(secondary, dst), Con8or32(src));
10159   ins_pipe(ialu_mem_imm);
10160 %}
10161 
10162 // Convert Int to Boolean
10163 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10164 %{
10165   match(Set dst (Conv2B src));
10166   effect(KILL cr);
10167 
10168   format %{ "testl   $src, $src\t# ci2b\n\t"
10169             "setnz   $dst\n\t"
10170             "movzbl  $dst, $dst" %}
10171   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10172              setNZ_reg(dst),
10173              REX_reg_breg(dst, dst), // movzbl
10174              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10175   ins_pipe(pipe_slow); // XXX
10176 %}
10177 
10178 // Convert Pointer to Boolean
10179 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10180 %{
10181   match(Set dst (Conv2B src));
10182   effect(KILL cr);
10183 
10184   format %{ "testq   $src, $src\t# cp2b\n\t"
10185             "setnz   $dst\n\t"
10186             "movzbl  $dst, $dst" %}
10187   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10188              setNZ_reg(dst),
10189              REX_reg_breg(dst, dst), // movzbl
10190              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10191   ins_pipe(pipe_slow); // XXX
10192 %}
10193 
10194 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10195 %{
10196   match(Set dst (CmpLTMask p q));
10197   effect(KILL cr);
10198 
10199   ins_cost(400); // XXX
10200   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10201             "setlt   $dst\n\t"
10202             "movzbl  $dst, $dst\n\t"
10203             "negl    $dst" %}
10204   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10205              setLT_reg(dst),
10206              REX_reg_breg(dst, dst), // movzbl
10207              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10208              neg_reg(dst));
10209   ins_pipe(pipe_slow);
10210 %}
10211 
10212 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10213 %{
10214   match(Set dst (CmpLTMask dst zero));
10215   effect(KILL cr);
10216 
10217   ins_cost(100); // XXX
10218   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10219   opcode(0xC1, 0x7);  /* C1 /7 ib */
10220   ins_encode(reg_opc_imm(dst, 0x1F));
10221   ins_pipe(ialu_reg);
10222 %}
10223 
10224 
10225 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y,
10226                          rRegI tmp,
10227                          rFlagsReg cr)
10228 %{
10229   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10230   effect(TEMP tmp, KILL cr);
10231 
10232   ins_cost(400); // XXX
10233   format %{ "subl    $p, $q\t# cadd_cmpLTMask1\n\t"
10234             "sbbl    $tmp, $tmp\n\t"
10235             "andl    $tmp, $y\n\t"
10236             "addl    $p, $tmp" %}
10237   ins_encode(enc_cmpLTP(p, q, y, tmp));
10238   ins_pipe(pipe_cmplt);
10239 %}
10240 
10241 /* If I enable this, I encourage spilling in the inner loop of compress.
10242 instruct cadd_cmpLTMask_mem( rRegI p, rRegI q, memory y, rRegI tmp, rFlagsReg cr )
10243 %{
10244   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
10245   effect( TEMP tmp, KILL cr );
10246   ins_cost(400);
10247 
10248   format %{ "SUB    $p,$q\n\t"
10249             "SBB    RCX,RCX\n\t"
10250             "AND    RCX,$y\n\t"
10251             "ADD    $p,RCX" %}
10252   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
10253 %}
10254 */
10255 
10256 //---------- FP Instructions------------------------------------------------
10257 
10258 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10259 %{
10260   match(Set cr (CmpF src1 src2));
10261 
10262   ins_cost(145);
10263   format %{ "ucomiss $src1, $src2\n\t"
10264             "jnp,s   exit\n\t"
10265             "pushfq\t# saw NaN, set CF\n\t"
10266             "andq    [rsp], #0xffffff2b\n\t"
10267             "popfq\n"
10268     "exit:   nop\t# avoid branch to branch" %}
10269   opcode(0x0F, 0x2E);
10270   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10271              cmpfp_fixup);
10272   ins_pipe(pipe_slow);
10273 %}
10274 
10275 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10276   match(Set cr (CmpF src1 src2));
10277 
10278   ins_cost(145);
10279   format %{ "ucomiss $src1, $src2" %}
10280   ins_encode %{
10281     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10282   %}
10283   ins_pipe(pipe_slow);
10284 %}
10285 
10286 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10287 %{
10288   match(Set cr (CmpF src1 (LoadF src2)));
10289 
10290   ins_cost(145);
10291   format %{ "ucomiss $src1, $src2\n\t"
10292             "jnp,s   exit\n\t"
10293             "pushfq\t# saw NaN, set CF\n\t"
10294             "andq    [rsp], #0xffffff2b\n\t"
10295             "popfq\n"
10296     "exit:   nop\t# avoid branch to branch" %}
10297   opcode(0x0F, 0x2E);
10298   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10299              cmpfp_fixup);
10300   ins_pipe(pipe_slow);
10301 %}
10302 
10303 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10304   match(Set cr (CmpF src1 (LoadF src2)));
10305 
10306   ins_cost(100);
10307   format %{ "ucomiss $src1, $src2" %}
10308   opcode(0x0F, 0x2E);
10309   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
10310   ins_pipe(pipe_slow);
10311 %}
10312 
10313 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10314   match(Set cr (CmpF src con));
10315 
10316   ins_cost(145);
10317   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10318             "jnp,s   exit\n\t"
10319             "pushfq\t# saw NaN, set CF\n\t"
10320             "andq    [rsp], #0xffffff2b\n\t"
10321             "popfq\n"
10322     "exit:   nop\t# avoid branch to branch" %}
10323   ins_encode %{
10324     Label L_exit;
10325     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10326     __ jcc(Assembler::noParity, L_exit);
10327     __ pushf();
10328     __ andq(rsp, 0xffffff2b);
10329     __ popf();
10330     __ bind(L_exit);
10331     __ nop();
10332   %}
10333   ins_pipe(pipe_slow);
10334 %}
10335 
10336 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10337   match(Set cr (CmpF src con));
10338   ins_cost(100);
10339   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10340   ins_encode %{
10341     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10342   %}
10343   ins_pipe(pipe_slow);
10344 %}
10345 
10346 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10347 %{
10348   match(Set cr (CmpD src1 src2));
10349 
10350   ins_cost(145);
10351   format %{ "ucomisd $src1, $src2\n\t"
10352             "jnp,s   exit\n\t"
10353             "pushfq\t# saw NaN, set CF\n\t"
10354             "andq    [rsp], #0xffffff2b\n\t"
10355             "popfq\n"
10356     "exit:   nop\t# avoid branch to branch" %}
10357   opcode(0x66, 0x0F, 0x2E);
10358   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10359              cmpfp_fixup);
10360   ins_pipe(pipe_slow);
10361 %}
10362 
10363 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10364   match(Set cr (CmpD src1 src2));
10365 
10366   ins_cost(100);
10367   format %{ "ucomisd $src1, $src2 test" %}
10368   ins_encode %{
10369     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10370   %}
10371   ins_pipe(pipe_slow);
10372 %}
10373 
10374 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10375 %{
10376   match(Set cr (CmpD src1 (LoadD src2)));
10377 
10378   ins_cost(145);
10379   format %{ "ucomisd $src1, $src2\n\t"
10380             "jnp,s   exit\n\t"
10381             "pushfq\t# saw NaN, set CF\n\t"
10382             "andq    [rsp], #0xffffff2b\n\t"
10383             "popfq\n"
10384     "exit:   nop\t# avoid branch to branch" %}
10385   opcode(0x66, 0x0F, 0x2E);
10386   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10387              cmpfp_fixup);
10388   ins_pipe(pipe_slow);
10389 %}
10390 
10391 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10392   match(Set cr (CmpD src1 (LoadD src2)));
10393 
10394   ins_cost(100);
10395   format %{ "ucomisd $src1, $src2" %}
10396   opcode(0x66, 0x0F, 0x2E);
10397   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
10398   ins_pipe(pipe_slow);
10399 %}
10400 
10401 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10402   match(Set cr (CmpD src con));
10403 
10404   ins_cost(145);
10405   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10406             "jnp,s   exit\n\t"
10407             "pushfq\t# saw NaN, set CF\n\t"
10408             "andq    [rsp], #0xffffff2b\n\t"
10409             "popfq\n"
10410     "exit:   nop\t# avoid branch to branch" %}
10411   ins_encode %{
10412     Label L_exit;
10413     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10414     __ jcc(Assembler::noParity, L_exit);
10415     __ pushf();
10416     __ andq(rsp, 0xffffff2b);
10417     __ popf();
10418     __ bind(L_exit);
10419     __ nop();
10420   %}
10421   ins_pipe(pipe_slow);
10422 %}
10423 
10424 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10425   match(Set cr (CmpD src con));
10426   ins_cost(100);
10427   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10428   ins_encode %{
10429     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10430   %}
10431   ins_pipe(pipe_slow);
10432 %}
10433 
10434 // Compare into -1,0,1
10435 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10436 %{
10437   match(Set dst (CmpF3 src1 src2));
10438   effect(KILL cr);
10439 
10440   ins_cost(275);
10441   format %{ "ucomiss $src1, $src2\n\t"
10442             "movl    $dst, #-1\n\t"
10443             "jp,s    done\n\t"
10444             "jb,s    done\n\t"
10445             "setne   $dst\n\t"
10446             "movzbl  $dst, $dst\n"
10447     "done:" %}
10448 
10449   opcode(0x0F, 0x2E);
10450   ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
10451              cmpfp3(dst));
10452   ins_pipe(pipe_slow);
10453 %}
10454 
10455 // Compare into -1,0,1
10456 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10457 %{
10458   match(Set dst (CmpF3 src1 (LoadF src2)));
10459   effect(KILL cr);
10460 
10461   ins_cost(275);
10462   format %{ "ucomiss $src1, $src2\n\t"
10463             "movl    $dst, #-1\n\t"
10464             "jp,s    done\n\t"
10465             "jb,s    done\n\t"
10466             "setne   $dst\n\t"
10467             "movzbl  $dst, $dst\n"
10468     "done:" %}
10469 
10470   opcode(0x0F, 0x2E);
10471   ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
10472              cmpfp3(dst));
10473   ins_pipe(pipe_slow);
10474 %}
10475 
10476 // Compare into -1,0,1
10477 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10478   match(Set dst (CmpF3 src con));
10479   effect(KILL cr);
10480 
10481   ins_cost(275);
10482   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10483             "movl    $dst, #-1\n\t"
10484             "jp,s    done\n\t"
10485             "jb,s    done\n\t"
10486             "setne   $dst\n\t"
10487             "movzbl  $dst, $dst\n"
10488     "done:" %}
10489   ins_encode %{
10490     Label L_done;
10491     Register Rdst = $dst$$Register;
10492     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10493     __ movl(Rdst, -1);
10494     __ jcc(Assembler::parity, L_done);
10495     __ jcc(Assembler::below, L_done);
10496     __ setb(Assembler::notEqual, Rdst);
10497     __ movzbl(Rdst, Rdst);
10498     __ bind(L_done);
10499   %}
10500   ins_pipe(pipe_slow);
10501 %}
10502 
10503 // Compare into -1,0,1
10504 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10505 %{
10506   match(Set dst (CmpD3 src1 src2));
10507   effect(KILL cr);
10508 
10509   ins_cost(275);
10510   format %{ "ucomisd $src1, $src2\n\t"
10511             "movl    $dst, #-1\n\t"
10512             "jp,s    done\n\t"
10513             "jb,s    done\n\t"
10514             "setne   $dst\n\t"
10515             "movzbl  $dst, $dst\n"
10516     "done:" %}
10517 
10518   opcode(0x66, 0x0F, 0x2E);
10519   ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
10520              cmpfp3(dst));
10521   ins_pipe(pipe_slow);
10522 %}
10523 
10524 // Compare into -1,0,1
10525 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10526 %{
10527   match(Set dst (CmpD3 src1 (LoadD src2)));
10528   effect(KILL cr);
10529 
10530   ins_cost(275);
10531   format %{ "ucomisd $src1, $src2\n\t"
10532             "movl    $dst, #-1\n\t"
10533             "jp,s    done\n\t"
10534             "jb,s    done\n\t"
10535             "setne   $dst\n\t"
10536             "movzbl  $dst, $dst\n"
10537     "done:" %}
10538 
10539   opcode(0x66, 0x0F, 0x2E);
10540   ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
10541              cmpfp3(dst));
10542   ins_pipe(pipe_slow);
10543 %}
10544 
10545 // Compare into -1,0,1
10546 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10547   match(Set dst (CmpD3 src con));
10548   effect(KILL cr);
10549 
10550   ins_cost(275);
10551   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10552             "movl    $dst, #-1\n\t"
10553             "jp,s    done\n\t"
10554             "jb,s    done\n\t"
10555             "setne   $dst\n\t"
10556             "movzbl  $dst, $dst\n"
10557     "done:" %}
10558   ins_encode %{
10559     Register Rdst = $dst$$Register;
10560     Label L_done;
10561     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10562     __ movl(Rdst, -1);
10563     __ jcc(Assembler::parity, L_done);
10564     __ jcc(Assembler::below, L_done);
10565     __ setb(Assembler::notEqual, Rdst);
10566     __ movzbl(Rdst, Rdst);
10567     __ bind(L_done);
10568   %}
10569   ins_pipe(pipe_slow);
10570 %}
10571 
10572 instruct addF_reg(regF dst, regF src)
10573 %{
10574   match(Set dst (AddF dst src));
10575 
10576   format %{ "addss   $dst, $src" %}
10577   ins_cost(150); // XXX
10578   opcode(0xF3, 0x0F, 0x58);
10579   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10580   ins_pipe(pipe_slow);
10581 %}
10582 
10583 instruct addF_mem(regF dst, memory src)
10584 %{
10585   match(Set dst (AddF dst (LoadF src)));
10586 
10587   format %{ "addss   $dst, $src" %}
10588   ins_cost(150); // XXX
10589   opcode(0xF3, 0x0F, 0x58);
10590   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10591   ins_pipe(pipe_slow);
10592 %}
10593 
10594 instruct addF_imm(regF dst, immF con) %{
10595   match(Set dst (AddF dst con));
10596   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10597   ins_cost(150); // XXX
10598   ins_encode %{
10599     __ addss($dst$$XMMRegister, $constantaddress($con));
10600   %}
10601   ins_pipe(pipe_slow);
10602 %}
10603 
10604 instruct addD_reg(regD dst, regD src)
10605 %{
10606   match(Set dst (AddD dst src));
10607 
10608   format %{ "addsd   $dst, $src" %}
10609   ins_cost(150); // XXX
10610   opcode(0xF2, 0x0F, 0x58);
10611   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10612   ins_pipe(pipe_slow);
10613 %}
10614 
10615 instruct addD_mem(regD dst, memory src)
10616 %{
10617   match(Set dst (AddD dst (LoadD src)));
10618 
10619   format %{ "addsd   $dst, $src" %}
10620   ins_cost(150); // XXX
10621   opcode(0xF2, 0x0F, 0x58);
10622   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10623   ins_pipe(pipe_slow);
10624 %}
10625 
10626 instruct addD_imm(regD dst, immD con) %{
10627   match(Set dst (AddD dst con));
10628   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10629   ins_cost(150); // XXX
10630   ins_encode %{
10631     __ addsd($dst$$XMMRegister, $constantaddress($con));
10632   %}
10633   ins_pipe(pipe_slow);
10634 %}
10635 
10636 instruct subF_reg(regF dst, regF src)
10637 %{
10638   match(Set dst (SubF dst src));
10639 
10640   format %{ "subss   $dst, $src" %}
10641   ins_cost(150); // XXX
10642   opcode(0xF3, 0x0F, 0x5C);
10643   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10644   ins_pipe(pipe_slow);
10645 %}
10646 
10647 instruct subF_mem(regF dst, memory src)
10648 %{
10649   match(Set dst (SubF dst (LoadF src)));
10650 
10651   format %{ "subss   $dst, $src" %}
10652   ins_cost(150); // XXX
10653   opcode(0xF3, 0x0F, 0x5C);
10654   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10655   ins_pipe(pipe_slow);
10656 %}
10657 
10658 instruct subF_imm(regF dst, immF con) %{
10659   match(Set dst (SubF dst con));
10660   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10661   ins_cost(150); // XXX
10662   ins_encode %{
10663     __ subss($dst$$XMMRegister, $constantaddress($con));
10664   %}
10665   ins_pipe(pipe_slow);
10666 %}
10667 
10668 instruct subD_reg(regD dst, regD src)
10669 %{
10670   match(Set dst (SubD dst src));
10671 
10672   format %{ "subsd   $dst, $src" %}
10673   ins_cost(150); // XXX
10674   opcode(0xF2, 0x0F, 0x5C);
10675   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10676   ins_pipe(pipe_slow);
10677 %}
10678 
10679 instruct subD_mem(regD dst, memory src)
10680 %{
10681   match(Set dst (SubD dst (LoadD src)));
10682 
10683   format %{ "subsd   $dst, $src" %}
10684   ins_cost(150); // XXX
10685   opcode(0xF2, 0x0F, 0x5C);
10686   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10687   ins_pipe(pipe_slow);
10688 %}
10689 
10690 instruct subD_imm(regD dst, immD con) %{
10691   match(Set dst (SubD dst con));
10692   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10693   ins_cost(150); // XXX
10694   ins_encode %{
10695     __ subsd($dst$$XMMRegister, $constantaddress($con));
10696   %}
10697   ins_pipe(pipe_slow);
10698 %}
10699 
10700 instruct mulF_reg(regF dst, regF src)
10701 %{
10702   match(Set dst (MulF dst src));
10703 
10704   format %{ "mulss   $dst, $src" %}
10705   ins_cost(150); // XXX
10706   opcode(0xF3, 0x0F, 0x59);
10707   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10708   ins_pipe(pipe_slow);
10709 %}
10710 
10711 instruct mulF_mem(regF dst, memory src)
10712 %{
10713   match(Set dst (MulF dst (LoadF src)));
10714 
10715   format %{ "mulss   $dst, $src" %}
10716   ins_cost(150); // XXX
10717   opcode(0xF3, 0x0F, 0x59);
10718   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10719   ins_pipe(pipe_slow);
10720 %}
10721 
10722 instruct mulF_imm(regF dst, immF con) %{
10723   match(Set dst (MulF dst con));
10724   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10725   ins_cost(150); // XXX
10726   ins_encode %{
10727     __ mulss($dst$$XMMRegister, $constantaddress($con));
10728   %}
10729   ins_pipe(pipe_slow);
10730 %}
10731 
10732 instruct mulD_reg(regD dst, regD src)
10733 %{
10734   match(Set dst (MulD dst src));
10735 
10736   format %{ "mulsd   $dst, $src" %}
10737   ins_cost(150); // XXX
10738   opcode(0xF2, 0x0F, 0x59);
10739   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10740   ins_pipe(pipe_slow);
10741 %}
10742 
10743 instruct mulD_mem(regD dst, memory src)
10744 %{
10745   match(Set dst (MulD dst (LoadD src)));
10746 
10747   format %{ "mulsd   $dst, $src" %}
10748   ins_cost(150); // XXX
10749   opcode(0xF2, 0x0F, 0x59);
10750   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10751   ins_pipe(pipe_slow);
10752 %}
10753 
10754 instruct mulD_imm(regD dst, immD con) %{
10755   match(Set dst (MulD dst con));
10756   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10757   ins_cost(150); // XXX
10758   ins_encode %{
10759     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10760   %}
10761   ins_pipe(pipe_slow);
10762 %}
10763 
10764 instruct divF_reg(regF dst, regF src)
10765 %{
10766   match(Set dst (DivF dst src));
10767 
10768   format %{ "divss   $dst, $src" %}
10769   ins_cost(150); // XXX
10770   opcode(0xF3, 0x0F, 0x5E);
10771   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10772   ins_pipe(pipe_slow);
10773 %}
10774 
10775 instruct divF_mem(regF dst, memory src)
10776 %{
10777   match(Set dst (DivF dst (LoadF src)));
10778 
10779   format %{ "divss   $dst, $src" %}
10780   ins_cost(150); // XXX
10781   opcode(0xF3, 0x0F, 0x5E);
10782   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10783   ins_pipe(pipe_slow);
10784 %}
10785 
10786 instruct divF_imm(regF dst, immF con) %{
10787   match(Set dst (DivF dst con));
10788   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10789   ins_cost(150); // XXX
10790   ins_encode %{
10791     __ divss($dst$$XMMRegister, $constantaddress($con));
10792   %}
10793   ins_pipe(pipe_slow);
10794 %}
10795 
10796 instruct divD_reg(regD dst, regD src)
10797 %{
10798   match(Set dst (DivD dst src));
10799 
10800   format %{ "divsd   $dst, $src" %}
10801   ins_cost(150); // XXX
10802   opcode(0xF2, 0x0F, 0x5E);
10803   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10804   ins_pipe(pipe_slow);
10805 %}
10806 
10807 instruct divD_mem(regD dst, memory src)
10808 %{
10809   match(Set dst (DivD dst (LoadD src)));
10810 
10811   format %{ "divsd   $dst, $src" %}
10812   ins_cost(150); // XXX
10813   opcode(0xF2, 0x0F, 0x5E);
10814   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10815   ins_pipe(pipe_slow);
10816 %}
10817 
10818 instruct divD_imm(regD dst, immD con) %{
10819   match(Set dst (DivD dst con));
10820   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10821   ins_cost(150); // XXX
10822   ins_encode %{
10823     __ divsd($dst$$XMMRegister, $constantaddress($con));
10824   %}
10825   ins_pipe(pipe_slow);
10826 %}
10827 
10828 instruct sqrtF_reg(regF dst, regF src)
10829 %{
10830   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10831 
10832   format %{ "sqrtss  $dst, $src" %}
10833   ins_cost(150); // XXX
10834   opcode(0xF3, 0x0F, 0x51);
10835   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10836   ins_pipe(pipe_slow);
10837 %}
10838 
10839 instruct sqrtF_mem(regF dst, memory src)
10840 %{
10841   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
10842 
10843   format %{ "sqrtss  $dst, $src" %}
10844   ins_cost(150); // XXX
10845   opcode(0xF3, 0x0F, 0x51);
10846   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10847   ins_pipe(pipe_slow);
10848 %}
10849 
10850 instruct sqrtF_imm(regF dst, immF con) %{
10851   match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
10852   format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
10853   ins_cost(150); // XXX
10854   ins_encode %{
10855     __ sqrtss($dst$$XMMRegister, $constantaddress($con));
10856   %}
10857   ins_pipe(pipe_slow);
10858 %}
10859 
10860 instruct sqrtD_reg(regD dst, regD src)
10861 %{
10862   match(Set dst (SqrtD src));
10863 
10864   format %{ "sqrtsd  $dst, $src" %}
10865   ins_cost(150); // XXX
10866   opcode(0xF2, 0x0F, 0x51);
10867   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
10868   ins_pipe(pipe_slow);
10869 %}
10870 
10871 instruct sqrtD_mem(regD dst, memory src)
10872 %{
10873   match(Set dst (SqrtD (LoadD src)));
10874 
10875   format %{ "sqrtsd  $dst, $src" %}
10876   ins_cost(150); // XXX
10877   opcode(0xF2, 0x0F, 0x51);
10878   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
10879   ins_pipe(pipe_slow);
10880 %}
10881 
10882 instruct sqrtD_imm(regD dst, immD con) %{
10883   match(Set dst (SqrtD con));
10884   format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
10885   ins_cost(150); // XXX
10886   ins_encode %{
10887     __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
10888   %}
10889   ins_pipe(pipe_slow);
10890 %}
10891 
10892 instruct absF_reg(regF dst)
10893 %{
10894   match(Set dst (AbsF dst));
10895 
10896   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
10897   ins_encode(absF_encoding(dst));
10898   ins_pipe(pipe_slow);
10899 %}
10900 
10901 instruct absD_reg(regD dst)
10902 %{
10903   match(Set dst (AbsD dst));
10904 
10905   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
10906             "# abs double by sign masking" %}
10907   ins_encode(absD_encoding(dst));
10908   ins_pipe(pipe_slow);
10909 %}
10910 
10911 instruct negF_reg(regF dst)
10912 %{
10913   match(Set dst (NegF dst));
10914 
10915   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
10916   ins_encode(negF_encoding(dst));
10917   ins_pipe(pipe_slow);
10918 %}
10919 
10920 instruct negD_reg(regD dst)
10921 %{
10922   match(Set dst (NegD dst));
10923 
10924   format %{ "xorpd   $dst, [0x8000000000000000]\t"
10925             "# neg double by sign flipping" %}
10926   ins_encode(negD_encoding(dst));
10927   ins_pipe(pipe_slow);
10928 %}
10929 
10930 // -----------Trig and Trancendental Instructions------------------------------
10931 instruct cosD_reg(regD dst) %{
10932   match(Set dst (CosD dst));
10933 
10934   format %{ "dcos   $dst\n\t" %}
10935   opcode(0xD9, 0xFF);
10936   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10937   ins_pipe( pipe_slow );
10938 %}
10939 
10940 instruct sinD_reg(regD dst) %{
10941   match(Set dst (SinD dst));
10942 
10943   format %{ "dsin   $dst\n\t" %}
10944   opcode(0xD9, 0xFE);
10945   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10946   ins_pipe( pipe_slow );
10947 %}
10948 
10949 instruct tanD_reg(regD dst) %{
10950   match(Set dst (TanD dst));
10951 
10952   format %{ "dtan   $dst\n\t" %}
10953   ins_encode( Push_SrcXD(dst),
10954               Opcode(0xD9), Opcode(0xF2),   //fptan
10955               Opcode(0xDD), Opcode(0xD8),   //fstp st
10956               Push_ResultXD(dst) );
10957   ins_pipe( pipe_slow );
10958 %}
10959 
10960 instruct log10D_reg(regD dst) %{
10961   // The source and result Double operands in XMM registers
10962   match(Set dst (Log10D dst));
10963   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10964   // fyl2x        ; compute log_10(2) * log_2(x)
10965   format %{ "fldlg2\t\t\t#Log10\n\t"
10966             "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
10967          %}
10968    ins_encode(Opcode(0xD9), Opcode(0xEC),   // fldlg2
10969               Push_SrcXD(dst),
10970               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10971               Push_ResultXD(dst));
10972 
10973   ins_pipe( pipe_slow );
10974 %}
10975 
10976 instruct logD_reg(regD dst) %{
10977   // The source and result Double operands in XMM registers
10978   match(Set dst (LogD dst));
10979   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10980   // fyl2x        ; compute log_e(2) * log_2(x)
10981   format %{ "fldln2\t\t\t#Log_e\n\t"
10982             "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
10983          %}
10984   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10985               Push_SrcXD(dst),
10986               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10987               Push_ResultXD(dst));
10988   ins_pipe( pipe_slow );
10989 %}
10990 
10991 
10992 
10993 //----------Arithmetic Conversion Instructions---------------------------------
10994 
10995 instruct roundFloat_nop(regF dst)
10996 %{
10997   match(Set dst (RoundFloat dst));
10998 
10999   ins_cost(0);
11000   ins_encode();
11001   ins_pipe(empty);
11002 %}
11003 
11004 instruct roundDouble_nop(regD dst)
11005 %{
11006   match(Set dst (RoundDouble dst));
11007 
11008   ins_cost(0);
11009   ins_encode();
11010   ins_pipe(empty);
11011 %}
11012 
11013 instruct convF2D_reg_reg(regD dst, regF src)
11014 %{
11015   match(Set dst (ConvF2D src));
11016 
11017   format %{ "cvtss2sd $dst, $src" %}
11018   opcode(0xF3, 0x0F, 0x5A);
11019   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11020   ins_pipe(pipe_slow); // XXX
11021 %}
11022 
11023 instruct convF2D_reg_mem(regD dst, memory src)
11024 %{
11025   match(Set dst (ConvF2D (LoadF src)));
11026 
11027   format %{ "cvtss2sd $dst, $src" %}
11028   opcode(0xF3, 0x0F, 0x5A);
11029   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11030   ins_pipe(pipe_slow); // XXX
11031 %}
11032 
11033 instruct convD2F_reg_reg(regF dst, regD src)
11034 %{
11035   match(Set dst (ConvD2F src));
11036 
11037   format %{ "cvtsd2ss $dst, $src" %}
11038   opcode(0xF2, 0x0F, 0x5A);
11039   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11040   ins_pipe(pipe_slow); // XXX
11041 %}
11042 
11043 instruct convD2F_reg_mem(regF dst, memory src)
11044 %{
11045   match(Set dst (ConvD2F (LoadD src)));
11046 
11047   format %{ "cvtsd2ss $dst, $src" %}
11048   opcode(0xF2, 0x0F, 0x5A);
11049   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11050   ins_pipe(pipe_slow); // XXX
11051 %}
11052 
11053 // XXX do mem variants
11054 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11055 %{
11056   match(Set dst (ConvF2I src));
11057   effect(KILL cr);
11058 
11059   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
11060             "cmpl    $dst, #0x80000000\n\t"
11061             "jne,s   done\n\t"
11062             "subq    rsp, #8\n\t"
11063             "movss   [rsp], $src\n\t"
11064             "call    f2i_fixup\n\t"
11065             "popq    $dst\n"
11066     "done:   "%}
11067   opcode(0xF3, 0x0F, 0x2C);
11068   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11069              f2i_fixup(dst, src));
11070   ins_pipe(pipe_slow);
11071 %}
11072 
11073 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11074 %{
11075   match(Set dst (ConvF2L src));
11076   effect(KILL cr);
11077 
11078   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
11079             "cmpq    $dst, [0x8000000000000000]\n\t"
11080             "jne,s   done\n\t"
11081             "subq    rsp, #8\n\t"
11082             "movss   [rsp], $src\n\t"
11083             "call    f2l_fixup\n\t"
11084             "popq    $dst\n"
11085     "done:   "%}
11086   opcode(0xF3, 0x0F, 0x2C);
11087   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11088              f2l_fixup(dst, src));
11089   ins_pipe(pipe_slow);
11090 %}
11091 
11092 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11093 %{
11094   match(Set dst (ConvD2I src));
11095   effect(KILL cr);
11096 
11097   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
11098             "cmpl    $dst, #0x80000000\n\t"
11099             "jne,s   done\n\t"
11100             "subq    rsp, #8\n\t"
11101             "movsd   [rsp], $src\n\t"
11102             "call    d2i_fixup\n\t"
11103             "popq    $dst\n"
11104     "done:   "%}
11105   opcode(0xF2, 0x0F, 0x2C);
11106   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
11107              d2i_fixup(dst, src));
11108   ins_pipe(pipe_slow);
11109 %}
11110 
11111 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11112 %{
11113   match(Set dst (ConvD2L src));
11114   effect(KILL cr);
11115 
11116   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
11117             "cmpq    $dst, [0x8000000000000000]\n\t"
11118             "jne,s   done\n\t"
11119             "subq    rsp, #8\n\t"
11120             "movsd   [rsp], $src\n\t"
11121             "call    d2l_fixup\n\t"
11122             "popq    $dst\n"
11123     "done:   "%}
11124   opcode(0xF2, 0x0F, 0x2C);
11125   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
11126              d2l_fixup(dst, src));
11127   ins_pipe(pipe_slow);
11128 %}
11129 
11130 instruct convI2F_reg_reg(regF dst, rRegI src)
11131 %{
11132   predicate(!UseXmmI2F);
11133   match(Set dst (ConvI2F src));
11134 
11135   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11136   opcode(0xF3, 0x0F, 0x2A);
11137   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11138   ins_pipe(pipe_slow); // XXX
11139 %}
11140 
11141 instruct convI2F_reg_mem(regF dst, memory src)
11142 %{
11143   match(Set dst (ConvI2F (LoadI src)));
11144 
11145   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11146   opcode(0xF3, 0x0F, 0x2A);
11147   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11148   ins_pipe(pipe_slow); // XXX
11149 %}
11150 
11151 instruct convI2D_reg_reg(regD dst, rRegI src)
11152 %{
11153   predicate(!UseXmmI2D);
11154   match(Set dst (ConvI2D src));
11155 
11156   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11157   opcode(0xF2, 0x0F, 0x2A);
11158   ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
11159   ins_pipe(pipe_slow); // XXX
11160 %}
11161 
11162 instruct convI2D_reg_mem(regD dst, memory src)
11163 %{
11164   match(Set dst (ConvI2D (LoadI src)));
11165 
11166   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11167   opcode(0xF2, 0x0F, 0x2A);
11168   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11169   ins_pipe(pipe_slow); // XXX
11170 %}
11171 
11172 instruct convXI2F_reg(regF dst, rRegI src)
11173 %{
11174   predicate(UseXmmI2F);
11175   match(Set dst (ConvI2F src));
11176 
11177   format %{ "movdl $dst, $src\n\t"
11178             "cvtdq2psl $dst, $dst\t# i2f" %}
11179   ins_encode %{
11180     __ movdl($dst$$XMMRegister, $src$$Register);
11181     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11182   %}
11183   ins_pipe(pipe_slow); // XXX
11184 %}
11185 
11186 instruct convXI2D_reg(regD dst, rRegI src)
11187 %{
11188   predicate(UseXmmI2D);
11189   match(Set dst (ConvI2D src));
11190 
11191   format %{ "movdl $dst, $src\n\t"
11192             "cvtdq2pdl $dst, $dst\t# i2d" %}
11193   ins_encode %{
11194     __ movdl($dst$$XMMRegister, $src$$Register);
11195     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11196   %}
11197   ins_pipe(pipe_slow); // XXX
11198 %}
11199 
11200 instruct convL2F_reg_reg(regF dst, rRegL src)
11201 %{
11202   match(Set dst (ConvL2F src));
11203 
11204   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11205   opcode(0xF3, 0x0F, 0x2A);
11206   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11207   ins_pipe(pipe_slow); // XXX
11208 %}
11209 
11210 instruct convL2F_reg_mem(regF dst, memory src)
11211 %{
11212   match(Set dst (ConvL2F (LoadL src)));
11213 
11214   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11215   opcode(0xF3, 0x0F, 0x2A);
11216   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11217   ins_pipe(pipe_slow); // XXX
11218 %}
11219 
11220 instruct convL2D_reg_reg(regD dst, rRegL src)
11221 %{
11222   match(Set dst (ConvL2D src));
11223 
11224   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11225   opcode(0xF2, 0x0F, 0x2A);
11226   ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
11227   ins_pipe(pipe_slow); // XXX
11228 %}
11229 
11230 instruct convL2D_reg_mem(regD dst, memory src)
11231 %{
11232   match(Set dst (ConvL2D (LoadL src)));
11233 
11234   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11235   opcode(0xF2, 0x0F, 0x2A);
11236   ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
11237   ins_pipe(pipe_slow); // XXX
11238 %}
11239 
11240 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11241 %{
11242   match(Set dst (ConvI2L src));
11243 
11244   ins_cost(125);
11245   format %{ "movslq  $dst, $src\t# i2l" %}
11246   ins_encode %{
11247     __ movslq($dst$$Register, $src$$Register);
11248   %}
11249   ins_pipe(ialu_reg_reg);
11250 %}
11251 
11252 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11253 // %{
11254 //   match(Set dst (ConvI2L src));
11255 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11256 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11257 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11258 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11259 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11260 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11261 
11262 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11263 //   ins_encode(enc_copy(dst, src));
11264 // //   opcode(0x63); // needs REX.W
11265 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11266 //   ins_pipe(ialu_reg_reg);
11267 // %}
11268 
11269 // Zero-extend convert int to long
11270 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11271 %{
11272   match(Set dst (AndL (ConvI2L src) mask));
11273 
11274   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11275   ins_encode(enc_copy(dst, src));
11276   ins_pipe(ialu_reg_reg);
11277 %}
11278 
11279 // Zero-extend convert int to long
11280 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11281 %{
11282   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11283 
11284   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11285   opcode(0x8B);
11286   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11287   ins_pipe(ialu_reg_mem);
11288 %}
11289 
11290 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11291 %{
11292   match(Set dst (AndL src mask));
11293 
11294   format %{ "movl    $dst, $src\t# zero-extend long" %}
11295   ins_encode(enc_copy_always(dst, src));
11296   ins_pipe(ialu_reg_reg);
11297 %}
11298 
11299 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11300 %{
11301   match(Set dst (ConvL2I src));
11302 
11303   format %{ "movl    $dst, $src\t# l2i" %}
11304   ins_encode(enc_copy_always(dst, src));
11305   ins_pipe(ialu_reg_reg);
11306 %}
11307 
11308 
11309 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11310   match(Set dst (MoveF2I src));
11311   effect(DEF dst, USE src);
11312 
11313   ins_cost(125);
11314   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11315   opcode(0x8B);
11316   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
11317   ins_pipe(ialu_reg_mem);
11318 %}
11319 
11320 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11321   match(Set dst (MoveI2F src));
11322   effect(DEF dst, USE src);
11323 
11324   ins_cost(125);
11325   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11326   opcode(0xF3, 0x0F, 0x10);
11327   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11328   ins_pipe(pipe_slow);
11329 %}
11330 
11331 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11332   match(Set dst (MoveD2L src));
11333   effect(DEF dst, USE src);
11334 
11335   ins_cost(125);
11336   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11337   opcode(0x8B);
11338   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
11339   ins_pipe(ialu_reg_mem);
11340 %}
11341 
11342 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11343   predicate(!UseXmmLoadAndClearUpper);
11344   match(Set dst (MoveL2D src));
11345   effect(DEF dst, USE src);
11346 
11347   ins_cost(125);
11348   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11349   opcode(0x66, 0x0F, 0x12);
11350   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11351   ins_pipe(pipe_slow);
11352 %}
11353 
11354 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11355   predicate(UseXmmLoadAndClearUpper);
11356   match(Set dst (MoveL2D src));
11357   effect(DEF dst, USE src);
11358 
11359   ins_cost(125);
11360   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11361   opcode(0xF2, 0x0F, 0x10);
11362   ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
11363   ins_pipe(pipe_slow);
11364 %}
11365 
11366 
11367 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11368   match(Set dst (MoveF2I src));
11369   effect(DEF dst, USE src);
11370 
11371   ins_cost(95); // XXX
11372   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11373   opcode(0xF3, 0x0F, 0x11);
11374   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11375   ins_pipe(pipe_slow);
11376 %}
11377 
11378 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11379   match(Set dst (MoveI2F src));
11380   effect(DEF dst, USE src);
11381 
11382   ins_cost(100);
11383   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11384   opcode(0x89);
11385   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
11386   ins_pipe( ialu_mem_reg );
11387 %}
11388 
11389 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11390   match(Set dst (MoveD2L src));
11391   effect(DEF dst, USE src);
11392 
11393   ins_cost(95); // XXX
11394   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11395   opcode(0xF2, 0x0F, 0x11);
11396   ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
11397   ins_pipe(pipe_slow);
11398 %}
11399 
11400 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11401   match(Set dst (MoveL2D src));
11402   effect(DEF dst, USE src);
11403 
11404   ins_cost(100);
11405   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11406   opcode(0x89);
11407   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
11408   ins_pipe(ialu_mem_reg);
11409 %}
11410 
11411 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11412   match(Set dst (MoveF2I src));
11413   effect(DEF dst, USE src);
11414   ins_cost(85);
11415   format %{ "movd    $dst,$src\t# MoveF2I" %}
11416   ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
11417   ins_pipe( pipe_slow );
11418 %}
11419 
11420 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11421   match(Set dst (MoveD2L src));
11422   effect(DEF dst, USE src);
11423   ins_cost(85);
11424   format %{ "movd    $dst,$src\t# MoveD2L" %}
11425   ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
11426   ins_pipe( pipe_slow );
11427 %}
11428 
11429 // The next instructions have long latency and use Int unit. Set high cost.
11430 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11431   match(Set dst (MoveI2F src));
11432   effect(DEF dst, USE src);
11433   ins_cost(300);
11434   format %{ "movd    $dst,$src\t# MoveI2F" %}
11435   ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
11436   ins_pipe( pipe_slow );
11437 %}
11438 
11439 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11440   match(Set dst (MoveL2D src));
11441   effect(DEF dst, USE src);
11442   ins_cost(300);
11443   format %{ "movd    $dst,$src\t# MoveL2D" %}
11444   ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
11445   ins_pipe( pipe_slow );
11446 %}
11447 
11448 // Replicate scalar to packed byte (1 byte) values in xmm
11449 instruct Repl8B_reg(regD dst, regD src) %{
11450   match(Set dst (Replicate8B src));
11451   format %{ "MOVDQA  $dst,$src\n\t"
11452             "PUNPCKLBW $dst,$dst\n\t"
11453             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11454   ins_encode( pshufd_8x8(dst, src));
11455   ins_pipe( pipe_slow );
11456 %}
11457 
11458 // Replicate scalar to packed byte (1 byte) values in xmm
11459 instruct Repl8B_rRegI(regD dst, rRegI src) %{
11460   match(Set dst (Replicate8B src));
11461   format %{ "MOVD    $dst,$src\n\t"
11462             "PUNPCKLBW $dst,$dst\n\t"
11463             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11464   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11465   ins_pipe( pipe_slow );
11466 %}
11467 
11468 // Replicate scalar zero to packed byte (1 byte) values in xmm
11469 instruct Repl8B_immI0(regD dst, immI0 zero) %{
11470   match(Set dst (Replicate8B zero));
11471   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11472   ins_encode( pxor(dst, dst));
11473   ins_pipe( fpu_reg_reg );
11474 %}
11475 
11476 // Replicate scalar to packed shore (2 byte) values in xmm
11477 instruct Repl4S_reg(regD dst, regD src) %{
11478   match(Set dst (Replicate4S src));
11479   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11480   ins_encode( pshufd_4x16(dst, src));
11481   ins_pipe( fpu_reg_reg );
11482 %}
11483 
11484 // Replicate scalar to packed shore (2 byte) values in xmm
11485 instruct Repl4S_rRegI(regD dst, rRegI src) %{
11486   match(Set dst (Replicate4S src));
11487   format %{ "MOVD    $dst,$src\n\t"
11488             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11489   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11490   ins_pipe( fpu_reg_reg );
11491 %}
11492 
11493 // Replicate scalar zero to packed short (2 byte) values in xmm
11494 instruct Repl4S_immI0(regD dst, immI0 zero) %{
11495   match(Set dst (Replicate4S zero));
11496   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11497   ins_encode( pxor(dst, dst));
11498   ins_pipe( fpu_reg_reg );
11499 %}
11500 
11501 // Replicate scalar to packed char (2 byte) values in xmm
11502 instruct Repl4C_reg(regD dst, regD src) %{
11503   match(Set dst (Replicate4C src));
11504   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11505   ins_encode( pshufd_4x16(dst, src));
11506   ins_pipe( fpu_reg_reg );
11507 %}
11508 
11509 // Replicate scalar to packed char (2 byte) values in xmm
11510 instruct Repl4C_rRegI(regD dst, rRegI src) %{
11511   match(Set dst (Replicate4C src));
11512   format %{ "MOVD    $dst,$src\n\t"
11513             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11514   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11515   ins_pipe( fpu_reg_reg );
11516 %}
11517 
11518 // Replicate scalar zero to packed char (2 byte) values in xmm
11519 instruct Repl4C_immI0(regD dst, immI0 zero) %{
11520   match(Set dst (Replicate4C zero));
11521   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11522   ins_encode( pxor(dst, dst));
11523   ins_pipe( fpu_reg_reg );
11524 %}
11525 
11526 // Replicate scalar to packed integer (4 byte) values in xmm
11527 instruct Repl2I_reg(regD dst, regD src) %{
11528   match(Set dst (Replicate2I src));
11529   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11530   ins_encode( pshufd(dst, src, 0x00));
11531   ins_pipe( fpu_reg_reg );
11532 %}
11533 
11534 // Replicate scalar to packed integer (4 byte) values in xmm
11535 instruct Repl2I_rRegI(regD dst, rRegI src) %{
11536   match(Set dst (Replicate2I src));
11537   format %{ "MOVD   $dst,$src\n\t"
11538             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11539   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11540   ins_pipe( fpu_reg_reg );
11541 %}
11542 
11543 // Replicate scalar zero to packed integer (2 byte) values in xmm
11544 instruct Repl2I_immI0(regD dst, immI0 zero) %{
11545   match(Set dst (Replicate2I zero));
11546   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11547   ins_encode( pxor(dst, dst));
11548   ins_pipe( fpu_reg_reg );
11549 %}
11550 
11551 // Replicate scalar to packed single precision floating point values in xmm
11552 instruct Repl2F_reg(regD dst, regD src) %{
11553   match(Set dst (Replicate2F src));
11554   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11555   ins_encode( pshufd(dst, src, 0xe0));
11556   ins_pipe( fpu_reg_reg );
11557 %}
11558 
11559 // Replicate scalar to packed single precision floating point values in xmm
11560 instruct Repl2F_regF(regD dst, regF src) %{
11561   match(Set dst (Replicate2F src));
11562   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11563   ins_encode( pshufd(dst, src, 0xe0));
11564   ins_pipe( fpu_reg_reg );
11565 %}
11566 
11567 // Replicate scalar to packed single precision floating point values in xmm
11568 instruct Repl2F_immF0(regD dst, immF0 zero) %{
11569   match(Set dst (Replicate2F zero));
11570   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11571   ins_encode( pxor(dst, dst));
11572   ins_pipe( fpu_reg_reg );
11573 %}
11574 
11575 
11576 // =======================================================================
11577 // fast clearing of an array
11578 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
11579                   rFlagsReg cr)
11580 %{
11581   match(Set dummy (ClearArray cnt base));
11582   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11583 
11584   format %{ "xorl    rax, rax\t# ClearArray:\n\t"
11585             "rep stosq\t# Store rax to *rdi++ while rcx--" %}
11586   ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
11587              Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
11588   ins_pipe(pipe_slow);
11589 %}
11590 
11591 instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rbx_RegI cnt2,
11592                         rax_RegI result, regD tmp1, regD tmp2, rFlagsReg cr)
11593 %{
11594   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11595   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11596 
11597   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11598   ins_encode %{
11599     __ string_compare($str1$$Register, $str2$$Register,
11600                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11601                       $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11602   %}
11603   ins_pipe( pipe_slow );
11604 %}
11605 
11606 instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11607                         rbx_RegI result, regD tmp1, rcx_RegI tmp2, rFlagsReg cr)
11608 %{
11609   predicate(UseSSE42Intrinsics);
11610   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11611   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
11612 
11613   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
11614   ins_encode %{
11615     __ string_indexof($str1$$Register, $str2$$Register,
11616                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11617                       $tmp1$$XMMRegister, $tmp2$$Register);
11618   %}
11619   ins_pipe( pipe_slow );
11620 %}
11621 
11622 // fast string equals
11623 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11624                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11625 %{
11626   match(Set result (StrEquals (Binary str1 str2) cnt));
11627   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11628 
11629   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11630   ins_encode %{
11631     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11632                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11633                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11634   %}
11635   ins_pipe( pipe_slow );
11636 %}
11637 
11638 // fast array equals
11639 instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11640                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11641 %{
11642   match(Set result (AryEq ary1 ary2));
11643   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11644   //ins_cost(300);
11645 
11646   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11647   ins_encode %{
11648     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11649                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11650                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11651   %}
11652   ins_pipe( pipe_slow );
11653 %}
11654 
11655 //----------Control Flow Instructions------------------------------------------
11656 // Signed compare Instructions
11657 
11658 // XXX more variants!!
11659 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11660 %{
11661   match(Set cr (CmpI op1 op2));
11662   effect(DEF cr, USE op1, USE op2);
11663 
11664   format %{ "cmpl    $op1, $op2" %}
11665   opcode(0x3B);  /* Opcode 3B /r */
11666   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11667   ins_pipe(ialu_cr_reg_reg);
11668 %}
11669 
11670 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11671 %{
11672   match(Set cr (CmpI op1 op2));
11673 
11674   format %{ "cmpl    $op1, $op2" %}
11675   opcode(0x81, 0x07); /* Opcode 81 /7 */
11676   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11677   ins_pipe(ialu_cr_reg_imm);
11678 %}
11679 
11680 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11681 %{
11682   match(Set cr (CmpI op1 (LoadI op2)));
11683 
11684   ins_cost(500); // XXX
11685   format %{ "cmpl    $op1, $op2" %}
11686   opcode(0x3B); /* Opcode 3B /r */
11687   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11688   ins_pipe(ialu_cr_reg_mem);
11689 %}
11690 
11691 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11692 %{
11693   match(Set cr (CmpI src zero));
11694 
11695   format %{ "testl   $src, $src" %}
11696   opcode(0x85);
11697   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11698   ins_pipe(ialu_cr_reg_imm);
11699 %}
11700 
11701 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11702 %{
11703   match(Set cr (CmpI (AndI src con) zero));
11704 
11705   format %{ "testl   $src, $con" %}
11706   opcode(0xF7, 0x00);
11707   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11708   ins_pipe(ialu_cr_reg_imm);
11709 %}
11710 
11711 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11712 %{
11713   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11714 
11715   format %{ "testl   $src, $mem" %}
11716   opcode(0x85);
11717   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11718   ins_pipe(ialu_cr_reg_mem);
11719 %}
11720 
11721 // Unsigned compare Instructions; really, same as signed except they
11722 // produce an rFlagsRegU instead of rFlagsReg.
11723 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11724 %{
11725   match(Set cr (CmpU op1 op2));
11726 
11727   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11728   opcode(0x3B); /* Opcode 3B /r */
11729   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11730   ins_pipe(ialu_cr_reg_reg);
11731 %}
11732 
11733 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11734 %{
11735   match(Set cr (CmpU op1 op2));
11736 
11737   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11738   opcode(0x81,0x07); /* Opcode 81 /7 */
11739   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11740   ins_pipe(ialu_cr_reg_imm);
11741 %}
11742 
11743 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11744 %{
11745   match(Set cr (CmpU op1 (LoadI op2)));
11746 
11747   ins_cost(500); // XXX
11748   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11749   opcode(0x3B); /* Opcode 3B /r */
11750   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11751   ins_pipe(ialu_cr_reg_mem);
11752 %}
11753 
11754 // // // Cisc-spilled version of cmpU_rReg
11755 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11756 // //%{
11757 // //  match(Set cr (CmpU (LoadI op1) op2));
11758 // //
11759 // //  format %{ "CMPu   $op1,$op2" %}
11760 // //  ins_cost(500);
11761 // //  opcode(0x39);  /* Opcode 39 /r */
11762 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11763 // //%}
11764 
11765 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11766 %{
11767   match(Set cr (CmpU src zero));
11768 
11769   format %{ "testl  $src, $src\t# unsigned" %}
11770   opcode(0x85);
11771   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11772   ins_pipe(ialu_cr_reg_imm);
11773 %}
11774 
11775 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11776 %{
11777   match(Set cr (CmpP op1 op2));
11778 
11779   format %{ "cmpq    $op1, $op2\t# ptr" %}
11780   opcode(0x3B); /* Opcode 3B /r */
11781   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11782   ins_pipe(ialu_cr_reg_reg);
11783 %}
11784 
11785 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11786 %{
11787   match(Set cr (CmpP op1 (LoadP op2)));
11788 
11789   ins_cost(500); // XXX
11790   format %{ "cmpq    $op1, $op2\t# ptr" %}
11791   opcode(0x3B); /* Opcode 3B /r */
11792   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11793   ins_pipe(ialu_cr_reg_mem);
11794 %}
11795 
11796 // // // Cisc-spilled version of cmpP_rReg
11797 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11798 // //%{
11799 // //  match(Set cr (CmpP (LoadP op1) op2));
11800 // //
11801 // //  format %{ "CMPu   $op1,$op2" %}
11802 // //  ins_cost(500);
11803 // //  opcode(0x39);  /* Opcode 39 /r */
11804 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11805 // //%}
11806 
11807 // XXX this is generalized by compP_rReg_mem???
11808 // Compare raw pointer (used in out-of-heap check).
11809 // Only works because non-oop pointers must be raw pointers
11810 // and raw pointers have no anti-dependencies.
11811 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11812 %{
11813   predicate(!n->in(2)->in(2)->bottom_type()->isa_oop_ptr());
11814   match(Set cr (CmpP op1 (LoadP op2)));
11815 
11816   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11817   opcode(0x3B); /* Opcode 3B /r */
11818   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11819   ins_pipe(ialu_cr_reg_mem);
11820 %}
11821 
11822 // This will generate a signed flags result. This should be OK since
11823 // any compare to a zero should be eq/neq.
11824 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11825 %{
11826   match(Set cr (CmpP src zero));
11827 
11828   format %{ "testq   $src, $src\t# ptr" %}
11829   opcode(0x85);
11830   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11831   ins_pipe(ialu_cr_reg_imm);
11832 %}
11833 
11834 // This will generate a signed flags result. This should be OK since
11835 // any compare to a zero should be eq/neq.
11836 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11837 %{
11838   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11839   match(Set cr (CmpP (LoadP op) zero));
11840 
11841   ins_cost(500); // XXX
11842   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11843   opcode(0xF7); /* Opcode F7 /0 */
11844   ins_encode(REX_mem_wide(op),
11845              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11846   ins_pipe(ialu_cr_reg_imm);
11847 %}
11848 
11849 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11850 %{
11851   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
11852   match(Set cr (CmpP (LoadP mem) zero));
11853 
11854   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11855   ins_encode %{
11856     __ cmpq(r12, $mem$$Address);
11857   %}
11858   ins_pipe(ialu_cr_reg_mem);
11859 %}
11860 
11861 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11862 %{
11863   match(Set cr (CmpN op1 op2));
11864 
11865   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11866   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11867   ins_pipe(ialu_cr_reg_reg);
11868 %}
11869 
11870 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11871 %{
11872   match(Set cr (CmpN src (LoadN mem)));
11873 
11874   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11875   ins_encode %{
11876     __ cmpl($src$$Register, $mem$$Address);
11877   %}
11878   ins_pipe(ialu_cr_reg_mem);
11879 %}
11880 
11881 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11882   match(Set cr (CmpN op1 op2));
11883 
11884   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11885   ins_encode %{
11886     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11887   %}
11888   ins_pipe(ialu_cr_reg_imm);
11889 %}
11890 
11891 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11892 %{
11893   match(Set cr (CmpN src (LoadN mem)));
11894 
11895   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11896   ins_encode %{
11897     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11898   %}
11899   ins_pipe(ialu_cr_reg_mem);
11900 %}
11901 
11902 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11903   match(Set cr (CmpN src zero));
11904 
11905   format %{ "testl   $src, $src\t# compressed ptr" %}
11906   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11907   ins_pipe(ialu_cr_reg_imm);
11908 %}
11909 
11910 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11911 %{
11912   predicate(Universe::narrow_oop_base() != NULL);
11913   match(Set cr (CmpN (LoadN mem) zero));
11914 
11915   ins_cost(500); // XXX
11916   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11917   ins_encode %{
11918     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11919   %}
11920   ins_pipe(ialu_cr_reg_mem);
11921 %}
11922 
11923 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11924 %{
11925   predicate(Universe::narrow_oop_base() == NULL);
11926   match(Set cr (CmpN (LoadN mem) zero));
11927 
11928   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11929   ins_encode %{
11930     __ cmpl(r12, $mem$$Address);
11931   %}
11932   ins_pipe(ialu_cr_reg_mem);
11933 %}
11934 
11935 // Yanked all unsigned pointer compare operations.
11936 // Pointer compares are done with CmpP which is already unsigned.
11937 
11938 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11939 %{
11940   match(Set cr (CmpL op1 op2));
11941 
11942   format %{ "cmpq    $op1, $op2" %}
11943   opcode(0x3B);  /* Opcode 3B /r */
11944   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11945   ins_pipe(ialu_cr_reg_reg);
11946 %}
11947 
11948 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11949 %{
11950   match(Set cr (CmpL op1 op2));
11951 
11952   format %{ "cmpq    $op1, $op2" %}
11953   opcode(0x81, 0x07); /* Opcode 81 /7 */
11954   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11955   ins_pipe(ialu_cr_reg_imm);
11956 %}
11957 
11958 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11959 %{
11960   match(Set cr (CmpL op1 (LoadL op2)));
11961 
11962   format %{ "cmpq    $op1, $op2" %}
11963   opcode(0x3B); /* Opcode 3B /r */
11964   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11965   ins_pipe(ialu_cr_reg_mem);
11966 %}
11967 
11968 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11969 %{
11970   match(Set cr (CmpL src zero));
11971 
11972   format %{ "testq   $src, $src" %}
11973   opcode(0x85);
11974   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11975   ins_pipe(ialu_cr_reg_imm);
11976 %}
11977 
11978 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11979 %{
11980   match(Set cr (CmpL (AndL src con) zero));
11981 
11982   format %{ "testq   $src, $con\t# long" %}
11983   opcode(0xF7, 0x00);
11984   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11985   ins_pipe(ialu_cr_reg_imm);
11986 %}
11987 
11988 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11989 %{
11990   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11991 
11992   format %{ "testq   $src, $mem" %}
11993   opcode(0x85);
11994   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11995   ins_pipe(ialu_cr_reg_mem);
11996 %}
11997 
11998 // Manifest a CmpL result in an integer register.  Very painful.
11999 // This is the test to avoid.
12000 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12001 %{
12002   match(Set dst (CmpL3 src1 src2));
12003   effect(KILL flags);
12004 
12005   ins_cost(275); // XXX
12006   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12007             "movl    $dst, -1\n\t"
12008             "jl,s    done\n\t"
12009             "setne   $dst\n\t"
12010             "movzbl  $dst, $dst\n\t"
12011     "done:" %}
12012   ins_encode(cmpl3_flag(src1, src2, dst));
12013   ins_pipe(pipe_slow);
12014 %}
12015 
12016 //----------Max and Min--------------------------------------------------------
12017 // Min Instructions
12018 
12019 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12020 %{
12021   effect(USE_DEF dst, USE src, USE cr);
12022 
12023   format %{ "cmovlgt $dst, $src\t# min" %}
12024   opcode(0x0F, 0x4F);
12025   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12026   ins_pipe(pipe_cmov_reg);
12027 %}
12028 
12029 
12030 instruct minI_rReg(rRegI dst, rRegI src)
12031 %{
12032   match(Set dst (MinI dst src));
12033 
12034   ins_cost(200);
12035   expand %{
12036     rFlagsReg cr;
12037     compI_rReg(cr, dst, src);
12038     cmovI_reg_g(dst, src, cr);
12039   %}
12040 %}
12041 
12042 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12043 %{
12044   effect(USE_DEF dst, USE src, USE cr);
12045 
12046   format %{ "cmovllt $dst, $src\t# max" %}
12047   opcode(0x0F, 0x4C);
12048   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12049   ins_pipe(pipe_cmov_reg);
12050 %}
12051 
12052 
12053 instruct maxI_rReg(rRegI dst, rRegI src)
12054 %{
12055   match(Set dst (MaxI dst src));
12056 
12057   ins_cost(200);
12058   expand %{
12059     rFlagsReg cr;
12060     compI_rReg(cr, dst, src);
12061     cmovI_reg_l(dst, src, cr);
12062   %}
12063 %}
12064 
12065 // ============================================================================
12066 // Branch Instructions
12067 
12068 // Jump Direct - Label defines a relative address from JMP+1
12069 instruct jmpDir(label labl)
12070 %{
12071   match(Goto);
12072   effect(USE labl);
12073 
12074   ins_cost(300);
12075   format %{ "jmp     $labl" %}
12076   size(5);
12077   opcode(0xE9);
12078   ins_encode(OpcP, Lbl(labl));
12079   ins_pipe(pipe_jmp);
12080   ins_pc_relative(1);
12081 %}
12082 
12083 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12084 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12085 %{
12086   match(If cop cr);
12087   effect(USE labl);
12088 
12089   ins_cost(300);
12090   format %{ "j$cop     $labl" %}
12091   size(6);
12092   opcode(0x0F, 0x80);
12093   ins_encode(Jcc(cop, labl));
12094   ins_pipe(pipe_jcc);
12095   ins_pc_relative(1);
12096 %}
12097 
12098 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12099 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12100 %{
12101   match(CountedLoopEnd cop cr);
12102   effect(USE labl);
12103 
12104   ins_cost(300);
12105   format %{ "j$cop     $labl\t# loop end" %}
12106   size(6);
12107   opcode(0x0F, 0x80);
12108   ins_encode(Jcc(cop, labl));
12109   ins_pipe(pipe_jcc);
12110   ins_pc_relative(1);
12111 %}
12112 
12113 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12114 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12115   match(CountedLoopEnd cop cmp);
12116   effect(USE labl);
12117 
12118   ins_cost(300);
12119   format %{ "j$cop,u   $labl\t# loop end" %}
12120   size(6);
12121   opcode(0x0F, 0x80);
12122   ins_encode(Jcc(cop, labl));
12123   ins_pipe(pipe_jcc);
12124   ins_pc_relative(1);
12125 %}
12126 
12127 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12128   match(CountedLoopEnd cop cmp);
12129   effect(USE labl);
12130 
12131   ins_cost(200);
12132   format %{ "j$cop,u   $labl\t# loop end" %}
12133   size(6);
12134   opcode(0x0F, 0x80);
12135   ins_encode(Jcc(cop, labl));
12136   ins_pipe(pipe_jcc);
12137   ins_pc_relative(1);
12138 %}
12139 
12140 // Jump Direct Conditional - using unsigned comparison
12141 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12142   match(If cop cmp);
12143   effect(USE labl);
12144 
12145   ins_cost(300);
12146   format %{ "j$cop,u  $labl" %}
12147   size(6);
12148   opcode(0x0F, 0x80);
12149   ins_encode(Jcc(cop, labl));
12150   ins_pipe(pipe_jcc);
12151   ins_pc_relative(1);
12152 %}
12153 
12154 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12155   match(If cop cmp);
12156   effect(USE labl);
12157 
12158   ins_cost(200);
12159   format %{ "j$cop,u  $labl" %}
12160   size(6);
12161   opcode(0x0F, 0x80);
12162   ins_encode(Jcc(cop, labl));
12163   ins_pipe(pipe_jcc);
12164   ins_pc_relative(1);
12165 %}
12166 
12167 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12168   match(If cop cmp);
12169   effect(USE labl);
12170 
12171   ins_cost(200);
12172   format %{ $$template
12173     if ($cop$$cmpcode == Assembler::notEqual) {
12174       $$emit$$"jp,u   $labl\n\t"
12175       $$emit$$"j$cop,u   $labl"
12176     } else {
12177       $$emit$$"jp,u   done\n\t"
12178       $$emit$$"j$cop,u   $labl\n\t"
12179       $$emit$$"done:"
12180     }
12181   %}
12182   size(12);
12183   opcode(0x0F, 0x80);
12184   ins_encode %{
12185     Label* l = $labl$$label;
12186     $$$emit8$primary;
12187     emit_cc(cbuf, $secondary, Assembler::parity);
12188     int parity_disp = -1;
12189     if ($cop$$cmpcode == Assembler::notEqual) {
12190        // the two jumps 6 bytes apart so the jump distances are too
12191        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12192     } else if ($cop$$cmpcode == Assembler::equal) {
12193        parity_disp = 6;
12194     } else {
12195        ShouldNotReachHere();
12196     }
12197     emit_d32(cbuf, parity_disp);
12198     $$$emit8$primary;
12199     emit_cc(cbuf, $secondary, $cop$$cmpcode);
12200     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
12201     emit_d32(cbuf, disp);
12202   %}
12203   ins_pipe(pipe_jcc);
12204   ins_pc_relative(1);
12205 %}
12206 
12207 // ============================================================================
12208 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12209 // superklass array for an instance of the superklass.  Set a hidden
12210 // internal cache on a hit (cache is checked with exposed code in
12211 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12212 // encoding ALSO sets flags.
12213 
12214 instruct partialSubtypeCheck(rdi_RegP result,
12215                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12216                              rFlagsReg cr)
12217 %{
12218   match(Set result (PartialSubtypeCheck sub super));
12219   effect(KILL rcx, KILL cr);
12220 
12221   ins_cost(1100);  // slightly larger than the next version
12222   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12223             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12224             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12225             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12226             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12227             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12228             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12229     "miss:\t" %}
12230 
12231   opcode(0x1); // Force a XOR of RDI
12232   ins_encode(enc_PartialSubtypeCheck());
12233   ins_pipe(pipe_slow);
12234 %}
12235 
12236 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12237                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12238                                      immP0 zero,
12239                                      rdi_RegP result)
12240 %{
12241   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12242   effect(KILL rcx, KILL result);
12243 
12244   ins_cost(1000);
12245   format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
12246             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
12247             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
12248             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12249             "jne,s   miss\t\t# Missed: flags nz\n\t"
12250             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
12251     "miss:\t" %}
12252 
12253   opcode(0x0); // No need to XOR RDI
12254   ins_encode(enc_PartialSubtypeCheck());
12255   ins_pipe(pipe_slow);
12256 %}
12257 
12258 // ============================================================================
12259 // Branch Instructions -- short offset versions
12260 //
12261 // These instructions are used to replace jumps of a long offset (the default
12262 // match) with jumps of a shorter offset.  These instructions are all tagged
12263 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12264 // match rules in general matching.  Instead, the ADLC generates a conversion
12265 // method in the MachNode which can be used to do in-place replacement of the
12266 // long variant with the shorter variant.  The compiler will determine if a
12267 // branch can be taken by the is_short_branch_offset() predicate in the machine
12268 // specific code section of the file.
12269 
12270 // Jump Direct - Label defines a relative address from JMP+1
12271 instruct jmpDir_short(label labl) %{
12272   match(Goto);
12273   effect(USE labl);
12274 
12275   ins_cost(300);
12276   format %{ "jmp,s   $labl" %}
12277   size(2);
12278   opcode(0xEB);
12279   ins_encode(OpcP, LblShort(labl));
12280   ins_pipe(pipe_jmp);
12281   ins_pc_relative(1);
12282   ins_short_branch(1);
12283 %}
12284 
12285 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12286 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12287   match(If cop cr);
12288   effect(USE labl);
12289 
12290   ins_cost(300);
12291   format %{ "j$cop,s   $labl" %}
12292   size(2);
12293   opcode(0x70);
12294   ins_encode(JccShort(cop, labl));
12295   ins_pipe(pipe_jcc);
12296   ins_pc_relative(1);
12297   ins_short_branch(1);
12298 %}
12299 
12300 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12301 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12302   match(CountedLoopEnd cop cr);
12303   effect(USE labl);
12304 
12305   ins_cost(300);
12306   format %{ "j$cop,s   $labl\t# loop end" %}
12307   size(2);
12308   opcode(0x70);
12309   ins_encode(JccShort(cop, labl));
12310   ins_pipe(pipe_jcc);
12311   ins_pc_relative(1);
12312   ins_short_branch(1);
12313 %}
12314 
12315 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12316 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12317   match(CountedLoopEnd cop cmp);
12318   effect(USE labl);
12319 
12320   ins_cost(300);
12321   format %{ "j$cop,us  $labl\t# loop end" %}
12322   size(2);
12323   opcode(0x70);
12324   ins_encode(JccShort(cop, labl));
12325   ins_pipe(pipe_jcc);
12326   ins_pc_relative(1);
12327   ins_short_branch(1);
12328 %}
12329 
12330 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12331   match(CountedLoopEnd cop cmp);
12332   effect(USE labl);
12333 
12334   ins_cost(300);
12335   format %{ "j$cop,us  $labl\t# loop end" %}
12336   size(2);
12337   opcode(0x70);
12338   ins_encode(JccShort(cop, labl));
12339   ins_pipe(pipe_jcc);
12340   ins_pc_relative(1);
12341   ins_short_branch(1);
12342 %}
12343 
12344 // Jump Direct Conditional - using unsigned comparison
12345 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12346   match(If cop cmp);
12347   effect(USE labl);
12348 
12349   ins_cost(300);
12350   format %{ "j$cop,us  $labl" %}
12351   size(2);
12352   opcode(0x70);
12353   ins_encode(JccShort(cop, labl));
12354   ins_pipe(pipe_jcc);
12355   ins_pc_relative(1);
12356   ins_short_branch(1);
12357 %}
12358 
12359 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12360   match(If cop cmp);
12361   effect(USE labl);
12362 
12363   ins_cost(300);
12364   format %{ "j$cop,us  $labl" %}
12365   size(2);
12366   opcode(0x70);
12367   ins_encode(JccShort(cop, labl));
12368   ins_pipe(pipe_jcc);
12369   ins_pc_relative(1);
12370   ins_short_branch(1);
12371 %}
12372 
12373 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12374   match(If cop cmp);
12375   effect(USE labl);
12376 
12377   ins_cost(300);
12378   format %{ $$template
12379     if ($cop$$cmpcode == Assembler::notEqual) {
12380       $$emit$$"jp,u,s   $labl\n\t"
12381       $$emit$$"j$cop,u,s   $labl"
12382     } else {
12383       $$emit$$"jp,u,s   done\n\t"
12384       $$emit$$"j$cop,u,s  $labl\n\t"
12385       $$emit$$"done:"
12386     }
12387   %}
12388   size(4);
12389   opcode(0x70);
12390   ins_encode %{
12391     Label* l = $labl$$label;
12392     emit_cc(cbuf, $primary, Assembler::parity);
12393     int parity_disp = -1;
12394     if ($cop$$cmpcode == Assembler::notEqual) {
12395       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12396     } else if ($cop$$cmpcode == Assembler::equal) {
12397       parity_disp = 2;
12398     } else {
12399       ShouldNotReachHere();
12400     }
12401     emit_d8(cbuf, parity_disp);
12402     emit_cc(cbuf, $primary, $cop$$cmpcode);
12403     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
12404     emit_d8(cbuf, disp);
12405     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12406     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12407   %}
12408   ins_pipe(pipe_jcc);
12409   ins_pc_relative(1);
12410   ins_short_branch(1);
12411 %}
12412 
12413 // ============================================================================
12414 // inlined locking and unlocking
12415 
12416 instruct cmpFastLock(rFlagsReg cr,
12417                      rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
12418 %{
12419   match(Set cr (FastLock object box));
12420   effect(TEMP tmp, TEMP scr);
12421 
12422   ins_cost(300);
12423   format %{ "fastlock $object,$box,$tmp,$scr" %}
12424   ins_encode(Fast_Lock(object, box, tmp, scr));
12425   ins_pipe(pipe_slow);
12426   ins_pc_relative(1);
12427 %}
12428 
12429 instruct cmpFastUnlock(rFlagsReg cr,
12430                        rRegP object, rax_RegP box, rRegP tmp)
12431 %{
12432   match(Set cr (FastUnlock object box));
12433   effect(TEMP tmp);
12434 
12435   ins_cost(300);
12436   format %{ "fastunlock $object, $box, $tmp" %}
12437   ins_encode(Fast_Unlock(object, box, tmp));
12438   ins_pipe(pipe_slow);
12439   ins_pc_relative(1);
12440 %}
12441 
12442 
12443 // ============================================================================
12444 // Safepoint Instructions
12445 instruct safePoint_poll(rFlagsReg cr)
12446 %{
12447   match(SafePoint);
12448   effect(KILL cr);
12449 
12450   format %{ "testl   rax, [rip + #offset_to_poll_page]\t"
12451             "# Safepoint: poll for GC" %}
12452   size(6); // Opcode + ModRM + Disp32 == 6 bytes
12453   ins_cost(125);
12454   ins_encode(enc_safepoint_poll);
12455   ins_pipe(ialu_reg_mem);
12456 %}
12457 
12458 // ============================================================================
12459 // Procedure Call/Return Instructions
12460 // Call Java Static Instruction
12461 // Note: If this code changes, the corresponding ret_addr_offset() and
12462 //       compute_padding() functions will have to be adjusted.
12463 instruct CallStaticJavaDirect(method meth) %{
12464   match(CallStaticJava);
12465   predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
12466   effect(USE meth);
12467 
12468   ins_cost(300);
12469   format %{ "call,static " %}
12470   opcode(0xE8); /* E8 cd */
12471   ins_encode(Java_Static_Call(meth), call_epilog);
12472   ins_pipe(pipe_slow);
12473   ins_pc_relative(1);
12474   ins_alignment(4);
12475 %}
12476 
12477 // Call Java Static Instruction (method handle version)
12478 // Note: If this code changes, the corresponding ret_addr_offset() and
12479 //       compute_padding() functions will have to be adjusted.
12480 instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
12481   match(CallStaticJava);
12482   predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
12483   effect(USE meth);
12484   // RBP is saved by all callees (for interpreter stack correction).
12485   // We use it here for a similar purpose, in {preserve,restore}_SP.
12486 
12487   ins_cost(300);
12488   format %{ "call,static/MethodHandle " %}
12489   opcode(0xE8); /* E8 cd */
12490   ins_encode(preserve_SP,
12491              Java_Static_Call(meth),
12492              restore_SP,
12493              call_epilog);
12494   ins_pipe(pipe_slow);
12495   ins_pc_relative(1);
12496   ins_alignment(4);
12497 %}
12498 
12499 // Call Java Dynamic Instruction
12500 // Note: If this code changes, the corresponding ret_addr_offset() and
12501 //       compute_padding() functions will have to be adjusted.
12502 instruct CallDynamicJavaDirect(method meth)
12503 %{
12504   match(CallDynamicJava);
12505   effect(USE meth);
12506 
12507   ins_cost(300);
12508   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12509             "call,dynamic " %}
12510   opcode(0xE8); /* E8 cd */
12511   ins_encode(Java_Dynamic_Call(meth), call_epilog);
12512   ins_pipe(pipe_slow);
12513   ins_pc_relative(1);
12514   ins_alignment(4);
12515 %}
12516 
12517 // Call Runtime Instruction
12518 instruct CallRuntimeDirect(method meth)
12519 %{
12520   match(CallRuntime);
12521   effect(USE meth);
12522 
12523   ins_cost(300);
12524   format %{ "call,runtime " %}
12525   opcode(0xE8); /* E8 cd */
12526   ins_encode(Java_To_Runtime(meth));
12527   ins_pipe(pipe_slow);
12528   ins_pc_relative(1);
12529 %}
12530 
12531 // Call runtime without safepoint
12532 instruct CallLeafDirect(method meth)
12533 %{
12534   match(CallLeaf);
12535   effect(USE meth);
12536 
12537   ins_cost(300);
12538   format %{ "call_leaf,runtime " %}
12539   opcode(0xE8); /* E8 cd */
12540   ins_encode(Java_To_Runtime(meth));
12541   ins_pipe(pipe_slow);
12542   ins_pc_relative(1);
12543 %}
12544 
12545 // Call runtime without safepoint
12546 instruct CallLeafNoFPDirect(method meth)
12547 %{
12548   match(CallLeafNoFP);
12549   effect(USE meth);
12550 
12551   ins_cost(300);
12552   format %{ "call_leaf_nofp,runtime " %}
12553   opcode(0xE8); /* E8 cd */
12554   ins_encode(Java_To_Runtime(meth));
12555   ins_pipe(pipe_slow);
12556   ins_pc_relative(1);
12557 %}
12558 
12559 // Return Instruction
12560 // Remove the return address & jump to it.
12561 // Notice: We always emit a nop after a ret to make sure there is room
12562 // for safepoint patching
12563 instruct Ret()
12564 %{
12565   match(Return);
12566 
12567   format %{ "ret" %}
12568   opcode(0xC3);
12569   ins_encode(OpcP);
12570   ins_pipe(pipe_jmp);
12571 %}
12572 
12573 // Tail Call; Jump from runtime stub to Java code.
12574 // Also known as an 'interprocedural jump'.
12575 // Target of jump will eventually return to caller.
12576 // TailJump below removes the return address.
12577 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12578 %{
12579   match(TailCall jump_target method_oop);
12580 
12581   ins_cost(300);
12582   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12583   opcode(0xFF, 0x4); /* Opcode FF /4 */
12584   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12585   ins_pipe(pipe_jmp);
12586 %}
12587 
12588 // Tail Jump; remove the return address; jump to target.
12589 // TailCall above leaves the return address around.
12590 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12591 %{
12592   match(TailJump jump_target ex_oop);
12593 
12594   ins_cost(300);
12595   format %{ "popq    rdx\t# pop return address\n\t"
12596             "jmp     $jump_target" %}
12597   opcode(0xFF, 0x4); /* Opcode FF /4 */
12598   ins_encode(Opcode(0x5a), // popq rdx
12599              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12600   ins_pipe(pipe_jmp);
12601 %}
12602 
12603 // Create exception oop: created by stack-crawling runtime code.
12604 // Created exception is now available to this handler, and is setup
12605 // just prior to jumping to this handler.  No code emitted.
12606 instruct CreateException(rax_RegP ex_oop)
12607 %{
12608   match(Set ex_oop (CreateEx));
12609 
12610   size(0);
12611   // use the following format syntax
12612   format %{ "# exception oop is in rax; no code emitted" %}
12613   ins_encode();
12614   ins_pipe(empty);
12615 %}
12616 
12617 // Rethrow exception:
12618 // The exception oop will come in the first argument position.
12619 // Then JUMP (not call) to the rethrow stub code.
12620 instruct RethrowException()
12621 %{
12622   match(Rethrow);
12623 
12624   // use the following format syntax
12625   format %{ "jmp     rethrow_stub" %}
12626   ins_encode(enc_rethrow);
12627   ins_pipe(pipe_jmp);
12628 %}
12629 
12630 
12631 //----------PEEPHOLE RULES-----------------------------------------------------
12632 // These must follow all instruction definitions as they use the names
12633 // defined in the instructions definitions.
12634 //
12635 // peepmatch ( root_instr_name [preceding_instruction]* );
12636 //
12637 // peepconstraint %{
12638 // (instruction_number.operand_name relational_op instruction_number.operand_name
12639 //  [, ...] );
12640 // // instruction numbers are zero-based using left to right order in peepmatch
12641 //
12642 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12643 // // provide an instruction_number.operand_name for each operand that appears
12644 // // in the replacement instruction's match rule
12645 //
12646 // ---------VM FLAGS---------------------------------------------------------
12647 //
12648 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12649 //
12650 // Each peephole rule is given an identifying number starting with zero and
12651 // increasing by one in the order seen by the parser.  An individual peephole
12652 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12653 // on the command-line.
12654 //
12655 // ---------CURRENT LIMITATIONS----------------------------------------------
12656 //
12657 // Only match adjacent instructions in same basic block
12658 // Only equality constraints
12659 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12660 // Only one replacement instruction
12661 //
12662 // ---------EXAMPLE----------------------------------------------------------
12663 //
12664 // // pertinent parts of existing instructions in architecture description
12665 // instruct movI(rRegI dst, rRegI src)
12666 // %{
12667 //   match(Set dst (CopyI src));
12668 // %}
12669 //
12670 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12671 // %{
12672 //   match(Set dst (AddI dst src));
12673 //   effect(KILL cr);
12674 // %}
12675 //
12676 // // Change (inc mov) to lea
12677 // peephole %{
12678 //   // increment preceeded by register-register move
12679 //   peepmatch ( incI_rReg movI );
12680 //   // require that the destination register of the increment
12681 //   // match the destination register of the move
12682 //   peepconstraint ( 0.dst == 1.dst );
12683 //   // construct a replacement instruction that sets
12684 //   // the destination to ( move's source register + one )
12685 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12686 // %}
12687 //
12688 
12689 // Implementation no longer uses movX instructions since
12690 // machine-independent system no longer uses CopyX nodes.
12691 //
12692 // peephole
12693 // %{
12694 //   peepmatch (incI_rReg movI);
12695 //   peepconstraint (0.dst == 1.dst);
12696 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12697 // %}
12698 
12699 // peephole
12700 // %{
12701 //   peepmatch (decI_rReg movI);
12702 //   peepconstraint (0.dst == 1.dst);
12703 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12704 // %}
12705 
12706 // peephole
12707 // %{
12708 //   peepmatch (addI_rReg_imm movI);
12709 //   peepconstraint (0.dst == 1.dst);
12710 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12711 // %}
12712 
12713 // peephole
12714 // %{
12715 //   peepmatch (incL_rReg movL);
12716 //   peepconstraint (0.dst == 1.dst);
12717 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12718 // %}
12719 
12720 // peephole
12721 // %{
12722 //   peepmatch (decL_rReg movL);
12723 //   peepconstraint (0.dst == 1.dst);
12724 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12725 // %}
12726 
12727 // peephole
12728 // %{
12729 //   peepmatch (addL_rReg_imm movL);
12730 //   peepconstraint (0.dst == 1.dst);
12731 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12732 // %}
12733 
12734 // peephole
12735 // %{
12736 //   peepmatch (addP_rReg_imm movP);
12737 //   peepconstraint (0.dst == 1.dst);
12738 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12739 // %}
12740 
12741 // // Change load of spilled value to only a spill
12742 // instruct storeI(memory mem, rRegI src)
12743 // %{
12744 //   match(Set mem (StoreI mem src));
12745 // %}
12746 //
12747 // instruct loadI(rRegI dst, memory mem)
12748 // %{
12749 //   match(Set dst (LoadI mem));
12750 // %}
12751 //
12752 
12753 peephole
12754 %{
12755   peepmatch (loadI storeI);
12756   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12757   peepreplace (storeI(1.mem 1.mem 1.src));
12758 %}
12759 
12760 peephole
12761 %{
12762   peepmatch (loadL storeL);
12763   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12764   peepreplace (storeL(1.mem 1.mem 1.src));
12765 %}
12766 
12767 //----------SMARTSPILL RULES---------------------------------------------------
12768 // These must follow all instruction definitions as they use the names
12769 // defined in the instructions definitions.