1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Empty register class.
 170 reg_class no_reg();
 171 
 172 // Class for all pointer registers (including RSP and RBP)
 173 reg_class any_reg_with_rbp(RAX, RAX_H,
 174                            RDX, RDX_H,
 175                            RBP, RBP_H,
 176                            RDI, RDI_H,
 177                            RSI, RSI_H,
 178                            RCX, RCX_H,
 179                            RBX, RBX_H,
 180                            RSP, RSP_H,
 181                            R8,  R8_H,
 182                            R9,  R9_H,
 183                            R10, R10_H,
 184                            R11, R11_H,
 185                            R12, R12_H,
 186                            R13, R13_H,
 187                            R14, R14_H,
 188                            R15, R15_H);
 189 
 190 // Class for all pointer registers (including RSP, but excluding RBP)
 191 reg_class any_reg_no_rbp(RAX, RAX_H,
 192                          RDX, RDX_H,
 193                          RDI, RDI_H,
 194                          RSI, RSI_H,
 195                          RCX, RCX_H,
 196                          RBX, RBX_H,
 197                          RSP, RSP_H,
 198                          R8,  R8_H,
 199                          R9,  R9_H,
 200                          R10, R10_H,
 201                          R11, R11_H,
 202                          R12, R12_H,
 203                          R13, R13_H,
 204                          R14, R14_H,
 205                          R15, R15_H);
 206 
 207 // Dynamic register class that selects at runtime between register classes
 208 // any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer).
 209 // Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp;
 210 reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %});
 211 
 212 // Class for all pointer registers (excluding RSP)
 213 reg_class ptr_reg_with_rbp(RAX, RAX_H,
 214                            RDX, RDX_H,
 215                            RBP, RBP_H,
 216                            RDI, RDI_H,
 217                            RSI, RSI_H,
 218                            RCX, RCX_H,
 219                            RBX, RBX_H,
 220                            R8,  R8_H,
 221                            R9,  R9_H,
 222                            R10, R10_H,
 223                            R11, R11_H,
 224                            R13, R13_H,
 225                            R14, R14_H);
 226 
 227 // Class for all pointer registers (excluding RSP and RBP)
 228 reg_class ptr_reg_no_rbp(RAX, RAX_H,
 229                          RDX, RDX_H,
 230                          RDI, RDI_H,
 231                          RSI, RSI_H,
 232                          RCX, RCX_H,
 233                          RBX, RBX_H,
 234                          R8,  R8_H,
 235                          R9,  R9_H,
 236                          R10, R10_H,
 237                          R11, R11_H,
 238                          R13, R13_H,
 239                          R14, R14_H);
 240 
 241 // Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp.
 242 reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %});
 243 
 244 // Class for all pointer registers (excluding RAX and RSP)
 245 reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H,
 246                                   RBP, RBP_H,
 247                                   RDI, RDI_H,
 248                                   RSI, RSI_H,
 249                                   RCX, RCX_H,
 250                                   RBX, RBX_H,
 251                                   R8,  R8_H,
 252                                   R9,  R9_H,
 253                                   R10, R10_H,
 254                                   R11, R11_H,
 255                                   R13, R13_H,
 256                                   R14, R14_H);
 257 
 258 // Class for all pointer registers (excluding RAX, RSP, and RBP)
 259 reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H,
 260                                 RDI, RDI_H,
 261                                 RSI, RSI_H,
 262                                 RCX, RCX_H,
 263                                 RBX, RBX_H,
 264                                 R8,  R8_H,
 265                                 R9,  R9_H,
 266                                 R10, R10_H,
 267                                 R11, R11_H,
 268                                 R13, R13_H,
 269                                 R14, R14_H);
 270 
 271 // Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp.
 272 reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %});
 273 
 274 // Class for all pointer registers (excluding RAX, RBX, and RSP)
 275 reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H,
 276                                       RBP, RBP_H,
 277                                       RDI, RDI_H,
 278                                       RSI, RSI_H,
 279                                       RCX, RCX_H,
 280                                       R8,  R8_H,
 281                                       R9,  R9_H,
 282                                       R10, R10_H,
 283                                       R11, R11_H,
 284                                       R13, R13_H,
 285                                       R14, R14_H);
 286 
 287 // Class for all pointer registers (excluding RAX, RBX, RSP, and RBP)
 288 reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H,
 289                                     RDI, RDI_H,
 290                                     RSI, RSI_H,
 291                                     RCX, RCX_H,
 292                                     R8,  R8_H,
 293                                     R9,  R9_H,
 294                                     R10, R10_H,
 295                                     R11, R11_H,
 296                                     R13, R13_H,
 297                                     R14, R14_H);
 298 
 299 // Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp.
 300 reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
 301 
 302 // Singleton class for RAX pointer register
 303 reg_class ptr_rax_reg(RAX, RAX_H);
 304 
 305 // Singleton class for RBX pointer register
 306 reg_class ptr_rbx_reg(RBX, RBX_H);
 307 
 308 // Singleton class for RSI pointer register
 309 reg_class ptr_rsi_reg(RSI, RSI_H);
 310 
 311 // Singleton class for RDI pointer register
 312 reg_class ptr_rdi_reg(RDI, RDI_H);
 313 
 314 // Singleton class for stack pointer
 315 reg_class ptr_rsp_reg(RSP, RSP_H);
 316 
 317 // Singleton class for TLS pointer
 318 reg_class ptr_r15_reg(R15, R15_H);
 319 
 320 // Class for all long registers (excluding RSP)
 321 reg_class long_reg_with_rbp(RAX, RAX_H,
 322                             RDX, RDX_H,
 323                             RBP, RBP_H,
 324                             RDI, RDI_H,
 325                             RSI, RSI_H,
 326                             RCX, RCX_H,
 327                             RBX, RBX_H,
 328                             R8,  R8_H,
 329                             R9,  R9_H,
 330                             R10, R10_H,
 331                             R11, R11_H,
 332                             R13, R13_H,
 333                             R14, R14_H);
 334 
 335 // Class for all long registers (excluding RSP and RBP)
 336 reg_class long_reg_no_rbp(RAX, RAX_H,
 337                           RDX, RDX_H,
 338                           RDI, RDI_H,
 339                           RSI, RSI_H,
 340                           RCX, RCX_H,
 341                           RBX, RBX_H,
 342                           R8,  R8_H,
 343                           R9,  R9_H,
 344                           R10, R10_H,
 345                           R11, R11_H,
 346                           R13, R13_H,
 347                           R14, R14_H);
 348 
 349 // Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp.
 350 reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %});
 351 
 352 // Class for all long registers (excluding RAX, RDX and RSP)
 353 reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H,
 354                                        RDI, RDI_H,
 355                                        RSI, RSI_H,
 356                                        RCX, RCX_H,
 357                                        RBX, RBX_H,
 358                                        R8,  R8_H,
 359                                        R9,  R9_H,
 360                                        R10, R10_H,
 361                                        R11, R11_H,
 362                                        R13, R13_H,
 363                                        R14, R14_H);
 364 
 365 // Class for all long registers (excluding RAX, RDX, RSP, and RBP)
 366 reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H,
 367                                      RSI, RSI_H,
 368                                      RCX, RCX_H,
 369                                      RBX, RBX_H,
 370                                      R8,  R8_H,
 371                                      R9,  R9_H,
 372                                      R10, R10_H,
 373                                      R11, R11_H,
 374                                      R13, R13_H,
 375                                      R14, R14_H);
 376 
 377 // Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp.
 378 reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 379 
 380 // Class for all long registers (excluding RCX and RSP)
 381 reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H,
 382                                    RDI, RDI_H,
 383                                    RSI, RSI_H,
 384                                    RAX, RAX_H,
 385                                    RDX, RDX_H,
 386                                    RBX, RBX_H,
 387                                    R8,  R8_H,
 388                                    R9,  R9_H,
 389                                    R10, R10_H,
 390                                    R11, R11_H,
 391                                    R13, R13_H,
 392                                    R14, R14_H);
 393 
 394 // Class for all long registers (excluding RCX, RSP, and RBP)
 395 reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H,
 396                                  RSI, RSI_H,
 397                                  RAX, RAX_H,
 398                                  RDX, RDX_H,
 399                                  RBX, RBX_H,
 400                                  R8,  R8_H,
 401                                  R9,  R9_H,
 402                                  R10, R10_H,
 403                                  R11, R11_H,
 404                                  R13, R13_H,
 405                                  R14, R14_H);
 406 
 407 // Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp.
 408 reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
 409 
 410 // Singleton class for RAX long register
 411 reg_class long_rax_reg(RAX, RAX_H);
 412 
 413 // Singleton class for RCX long register
 414 reg_class long_rcx_reg(RCX, RCX_H);
 415 
 416 // Singleton class for RDX long register
 417 reg_class long_rdx_reg(RDX, RDX_H);
 418 
 419 // Class for all int registers (excluding RSP)
 420 reg_class int_reg_with_rbp(RAX,
 421                            RDX,
 422                            RBP,
 423                            RDI,
 424                            RSI,
 425                            RCX,
 426                            RBX,
 427                            R8,
 428                            R9,
 429                            R10,
 430                            R11,
 431                            R13,
 432                            R14);
 433 
 434 // Class for all int registers (excluding RSP and RBP)
 435 reg_class int_reg_no_rbp(RAX,
 436                          RDX,
 437                          RDI,
 438                          RSI,
 439                          RCX,
 440                          RBX,
 441                          R8,
 442                          R9,
 443                          R10,
 444                          R11,
 445                          R13,
 446                          R14);
 447 
 448 // Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp.
 449 reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %});
 450 
 451 // Class for all int registers (excluding RCX and RSP)
 452 reg_class int_no_rcx_reg_with_rbp(RAX,
 453                                   RDX,
 454                                   RBP,
 455                                   RDI,
 456                                   RSI,
 457                                   RBX,
 458                                   R8,
 459                                   R9,
 460                                   R10,
 461                                   R11,
 462                                   R13,
 463                                   R14);
 464 
 465 // Class for all int registers (excluding RCX, RSP, and RBP)
 466 reg_class int_no_rcx_reg_no_rbp(RAX,
 467                                 RDX,
 468                                 RDI,
 469                                 RSI,
 470                                 RBX,
 471                                 R8,
 472                                 R9,
 473                                 R10,
 474                                 R11,
 475                                 R13,
 476                                 R14);
 477 
 478 // Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp.
 479 reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
 480 
 481 // Class for all int registers (excluding RAX, RDX, and RSP)
 482 reg_class int_no_rax_rdx_reg_with_rbp(RBP,
 483                                       RDI,
 484                                       RSI,
 485                                       RCX,
 486                                       RBX,
 487                                       R8,
 488                                       R9,
 489                                       R10,
 490                                       R11,
 491                                       R13,
 492                                       R14);
 493 
 494 // Class for all int registers (excluding RAX, RDX, RSP, and RBP)
 495 reg_class int_no_rax_rdx_reg_no_rbp(RDI,
 496                                     RSI,
 497                                     RCX,
 498                                     RBX,
 499                                     R8,
 500                                     R9,
 501                                     R10,
 502                                     R11,
 503                                     R13,
 504                                     R14);
 505 
 506 // Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp.
 507 reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 508 
 509 // Singleton class for RAX int register
 510 reg_class int_rax_reg(RAX);
 511 
 512 // Singleton class for RBX int register
 513 reg_class int_rbx_reg(RBX);
 514 
 515 // Singleton class for RCX int register
 516 reg_class int_rcx_reg(RCX);
 517 
 518 // Singleton class for RCX int register
 519 reg_class int_rdx_reg(RDX);
 520 
 521 // Singleton class for RCX int register
 522 reg_class int_rdi_reg(RDI);
 523 
 524 // Singleton class for instruction pointer
 525 // reg_class ip_reg(RIP);
 526 
 527 %}
 528 
 529 //----------SOURCE BLOCK-------------------------------------------------------
 530 // This is a block of C++ code which provides values, functions, and
 531 // definitions necessary in the rest of the architecture description
 532 source %{
 533 #define   RELOC_IMM64    Assembler::imm_operand
 534 #define   RELOC_DISP32   Assembler::disp32_operand
 535 
 536 #define __ _masm.
 537 
 538 static bool generate_vzeroupper(Compile* C) {
 539   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 540 }
 541 
 542 static int clear_avx_size() {
 543   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 544 }
 545 
 546 // !!!!! Special hack to get all types of calls to specify the byte offset
 547 //       from the start of the call to the point where the return address
 548 //       will point.
 549 int MachCallStaticJavaNode::ret_addr_offset()
 550 {
 551   int offset = 5; // 5 bytes from start of call to where return address points
 552   offset += clear_avx_size();
 553   return offset;
 554 }
 555 
 556 int MachCallDynamicJavaNode::ret_addr_offset()
 557 {
 558   int offset = 15; // 15 bytes from start of call to where return address points
 559   offset += clear_avx_size();
 560   return offset;
 561 }
 562 
 563 int MachCallRuntimeNode::ret_addr_offset() {
 564   int offset = 13; // movq r10,#addr; callq (r10)
 565   offset += clear_avx_size();
 566   return offset;
 567 }
 568 
 569 // Indicate if the safepoint node needs the polling page as an input,
 570 // it does if the polling page is more than disp32 away.
 571 bool SafePointNode::needs_polling_address_input()
 572 {
 573   return SafepointMechanism::uses_thread_local_poll() || Assembler::is_polling_page_far();
 574 }
 575 
 576 //
 577 // Compute padding required for nodes which need alignment
 578 //
 579 
 580 // The address of the call instruction needs to be 4-byte aligned to
 581 // ensure that it does not span a cache line so that it can be patched.
 582 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 583 {
 584   current_offset += clear_avx_size(); // skip vzeroupper
 585   current_offset += 1; // skip call opcode byte
 586   return align_up(current_offset, alignment_required()) - current_offset;
 587 }
 588 
 589 // The address of the call instruction needs to be 4-byte aligned to
 590 // ensure that it does not span a cache line so that it can be patched.
 591 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 592 {
 593   current_offset += clear_avx_size(); // skip vzeroupper
 594   current_offset += 11; // skip movq instruction + call opcode byte
 595   return align_up(current_offset, alignment_required()) - current_offset;
 596 }
 597 
 598 // EMIT_RM()
 599 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 600   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 601   cbuf.insts()->emit_int8(c);
 602 }
 603 
 604 // EMIT_CC()
 605 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 606   unsigned char c = (unsigned char) (f1 | f2);
 607   cbuf.insts()->emit_int8(c);
 608 }
 609 
 610 // EMIT_OPCODE()
 611 void emit_opcode(CodeBuffer &cbuf, int code) {
 612   cbuf.insts()->emit_int8((unsigned char) code);
 613 }
 614 
 615 // EMIT_OPCODE() w/ relocation information
 616 void emit_opcode(CodeBuffer &cbuf,
 617                  int code, relocInfo::relocType reloc, int offset, int format)
 618 {
 619   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 620   emit_opcode(cbuf, code);
 621 }
 622 
 623 // EMIT_D8()
 624 void emit_d8(CodeBuffer &cbuf, int d8) {
 625   cbuf.insts()->emit_int8((unsigned char) d8);
 626 }
 627 
 628 // EMIT_D16()
 629 void emit_d16(CodeBuffer &cbuf, int d16) {
 630   cbuf.insts()->emit_int16(d16);
 631 }
 632 
 633 // EMIT_D32()
 634 void emit_d32(CodeBuffer &cbuf, int d32) {
 635   cbuf.insts()->emit_int32(d32);
 636 }
 637 
 638 // EMIT_D64()
 639 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 640   cbuf.insts()->emit_int64(d64);
 641 }
 642 
 643 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 644 void emit_d32_reloc(CodeBuffer& cbuf,
 645                     int d32,
 646                     relocInfo::relocType reloc,
 647                     int format)
 648 {
 649   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 650   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 651   cbuf.insts()->emit_int32(d32);
 652 }
 653 
 654 // emit 32 bit value and construct relocation entry from RelocationHolder
 655 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 656 #ifdef ASSERT
 657   if (rspec.reloc()->type() == relocInfo::oop_type &&
 658       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 659     assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
 660     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop((intptr_t)d32))), "cannot embed scavengable oops in code");
 661   }
 662 #endif
 663   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 664   cbuf.insts()->emit_int32(d32);
 665 }
 666 
 667 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 668   address next_ip = cbuf.insts_end() + 4;
 669   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 670                  external_word_Relocation::spec(addr),
 671                  RELOC_DISP32);
 672 }
 673 
 674 
 675 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 676 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 677   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 678   cbuf.insts()->emit_int64(d64);
 679 }
 680 
 681 // emit 64 bit value and construct relocation entry from RelocationHolder
 682 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 683 #ifdef ASSERT
 684   if (rspec.reloc()->type() == relocInfo::oop_type &&
 685       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 686     assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
 687     assert(oopDesc::is_oop(cast_to_oop(d64)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d64))),
 688            "cannot embed scavengable oops in code");
 689   }
 690 #endif
 691   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 692   cbuf.insts()->emit_int64(d64);
 693 }
 694 
 695 // Access stack slot for load or store
 696 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 697 {
 698   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 699   if (-0x80 <= disp && disp < 0x80) {
 700     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 701     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 702     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 703   } else {
 704     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 705     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 706     emit_d32(cbuf, disp);     // Displacement // R/M byte
 707   }
 708 }
 709 
 710    // rRegI ereg, memory mem) %{    // emit_reg_mem
 711 void encode_RegMem(CodeBuffer &cbuf,
 712                    int reg,
 713                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 714 {
 715   assert(disp_reloc == relocInfo::none, "cannot have disp");
 716   int regenc = reg & 7;
 717   int baseenc = base & 7;
 718   int indexenc = index & 7;
 719 
 720   // There is no index & no scale, use form without SIB byte
 721   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 722     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 723     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 724       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 725     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 726       // If 8-bit displacement, mode 0x1
 727       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 728       emit_d8(cbuf, disp);
 729     } else {
 730       // If 32-bit displacement
 731       if (base == -1) { // Special flag for absolute address
 732         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 733         if (disp_reloc != relocInfo::none) {
 734           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 735         } else {
 736           emit_d32(cbuf, disp);
 737         }
 738       } else {
 739         // Normal base + offset
 740         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 741         if (disp_reloc != relocInfo::none) {
 742           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 743         } else {
 744           emit_d32(cbuf, disp);
 745         }
 746       }
 747     }
 748   } else {
 749     // Else, encode with the SIB byte
 750     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 751     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 752       // If no displacement
 753       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 754       emit_rm(cbuf, scale, indexenc, baseenc);
 755     } else {
 756       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 757         // If 8-bit displacement, mode 0x1
 758         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 759         emit_rm(cbuf, scale, indexenc, baseenc);
 760         emit_d8(cbuf, disp);
 761       } else {
 762         // If 32-bit displacement
 763         if (base == 0x04 ) {
 764           emit_rm(cbuf, 0x2, regenc, 0x4);
 765           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 766         } else {
 767           emit_rm(cbuf, 0x2, regenc, 0x4);
 768           emit_rm(cbuf, scale, indexenc, baseenc); // *
 769         }
 770         if (disp_reloc != relocInfo::none) {
 771           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 772         } else {
 773           emit_d32(cbuf, disp);
 774         }
 775       }
 776     }
 777   }
 778 }
 779 
 780 // This could be in MacroAssembler but it's fairly C2 specific
 781 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 782   Label exit;
 783   __ jccb(Assembler::noParity, exit);
 784   __ pushf();
 785   //
 786   // comiss/ucomiss instructions set ZF,PF,CF flags and
 787   // zero OF,AF,SF for NaN values.
 788   // Fixup flags by zeroing ZF,PF so that compare of NaN
 789   // values returns 'less than' result (CF is set).
 790   // Leave the rest of flags unchanged.
 791   //
 792   //    7 6 5 4 3 2 1 0
 793   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 794   //    0 0 1 0 1 0 1 1   (0x2B)
 795   //
 796   __ andq(Address(rsp, 0), 0xffffff2b);
 797   __ popf();
 798   __ bind(exit);
 799 }
 800 
 801 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 802   Label done;
 803   __ movl(dst, -1);
 804   __ jcc(Assembler::parity, done);
 805   __ jcc(Assembler::below, done);
 806   __ setb(Assembler::notEqual, dst);
 807   __ movzbl(dst, dst);
 808   __ bind(done);
 809 }
 810 
 811 
 812 //=============================================================================
 813 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 814 
 815 int Compile::ConstantTable::calculate_table_base_offset() const {
 816   return 0;  // absolute addressing, no offset
 817 }
 818 
 819 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 820 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 821   ShouldNotReachHere();
 822 }
 823 
 824 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 825   // Empty encoding
 826 }
 827 
 828 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 829   return 0;
 830 }
 831 
 832 #ifndef PRODUCT
 833 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 834   st->print("# MachConstantBaseNode (empty encoding)");
 835 }
 836 #endif
 837 
 838 
 839 //=============================================================================
 840 #ifndef PRODUCT
 841 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 842   Compile* C = ra_->C;
 843 
 844   int framesize = C->frame_size_in_bytes();
 845   int bangsize = C->bang_size_in_bytes();
 846   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 847   // Remove wordSize for return addr which is already pushed.
 848   framesize -= wordSize;
 849 
 850   if (C->need_stack_bang(bangsize)) {
 851     framesize -= wordSize;
 852     st->print("# stack bang (%d bytes)", bangsize);
 853     st->print("\n\t");
 854     st->print("pushq   rbp\t# Save rbp");
 855     if (PreserveFramePointer) {
 856         st->print("\n\t");
 857         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 858     }
 859     if (framesize) {
 860       st->print("\n\t");
 861       st->print("subq    rsp, #%d\t# Create frame",framesize);
 862     }
 863   } else {
 864     st->print("subq    rsp, #%d\t# Create frame",framesize);
 865     st->print("\n\t");
 866     framesize -= wordSize;
 867     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 868     if (PreserveFramePointer) {
 869       st->print("\n\t");
 870       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 871       if (framesize > 0) {
 872         st->print("\n\t");
 873         st->print("addq    rbp, #%d", framesize);
 874       }
 875     }
 876   }
 877 
 878   if (VerifyStackAtCalls) {
 879     st->print("\n\t");
 880     framesize -= wordSize;
 881     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 882 #ifdef ASSERT
 883     st->print("\n\t");
 884     st->print("# stack alignment check");
 885 #endif
 886   }
 887   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
 888     st->print("\n\t");
 889     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
 890     st->print("\n\t");
 891     st->print("je      fast_entry\t");
 892     st->print("\n\t");
 893     st->print("call    #nmethod_entry_barrier_stub\t");
 894     st->print("\n\tfast_entry:");
 895   }
 896   st->cr();
 897 }
 898 #endif
 899 
 900 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 901   Compile* C = ra_->C;
 902   MacroAssembler _masm(&cbuf);
 903 
 904   int framesize = C->frame_size_in_bytes();
 905   int bangsize = C->bang_size_in_bytes();
 906 
 907   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
 908 
 909   C->set_frame_complete(cbuf.insts_size());
 910 
 911   if (C->has_mach_constant_base_node()) {
 912     // NOTE: We set the table base offset here because users might be
 913     // emitted before MachConstantBaseNode.
 914     Compile::ConstantTable& constant_table = C->constant_table();
 915     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 916   }
 917 }
 918 
 919 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 920 {
 921   return MachNode::size(ra_); // too many variables; just compute it
 922                               // the hard way
 923 }
 924 
 925 int MachPrologNode::reloc() const
 926 {
 927   return 0; // a large enough number
 928 }
 929 
 930 //=============================================================================
 931 #ifndef PRODUCT
 932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 933 {
 934   Compile* C = ra_->C;
 935   if (generate_vzeroupper(C)) {
 936     st->print("vzeroupper");
 937     st->cr(); st->print("\t");
 938   }
 939 
 940   int framesize = C->frame_size_in_bytes();
 941   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 942   // Remove word for return adr already pushed
 943   // and RBP
 944   framesize -= 2*wordSize;
 945 
 946   if (framesize) {
 947     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 948     st->print("\t");
 949   }
 950 
 951   st->print_cr("popq   rbp");
 952   if (do_polling() && C->is_method_compilation()) {
 953     st->print("\t");
 954     if (SafepointMechanism::uses_thread_local_poll()) {
 955       st->print_cr("movq   rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
 956                    "testl  rax, [rscratch1]\t"
 957                    "# Safepoint: poll for GC");
 958     } else if (Assembler::is_polling_page_far()) {
 959       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 960                    "testl  rax, [rscratch1]\t"
 961                    "# Safepoint: poll for GC");
 962     } else {
 963       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 964                    "# Safepoint: poll for GC");
 965     }
 966   }
 967 }
 968 #endif
 969 
 970 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 971 {
 972   Compile* C = ra_->C;
 973   MacroAssembler _masm(&cbuf);
 974 
 975   if (generate_vzeroupper(C)) {
 976     // Clear upper bits of YMM registers when current compiled code uses
 977     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 978     __ vzeroupper();
 979   }
 980 
 981   int framesize = C->frame_size_in_bytes();
 982   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 983   // Remove word for return adr already pushed
 984   // and RBP
 985   framesize -= 2*wordSize;
 986 
 987   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 988 
 989   if (framesize) {
 990     emit_opcode(cbuf, Assembler::REX_W);
 991     if (framesize < 0x80) {
 992       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 993       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 994       emit_d8(cbuf, framesize);
 995     } else {
 996       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 997       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 998       emit_d32(cbuf, framesize);
 999     }
1000   }
1001 
1002   // popq rbp
1003   emit_opcode(cbuf, 0x58 | RBP_enc);
1004 
1005   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1006     __ reserved_stack_check();
1007   }
1008 
1009   if (do_polling() && C->is_method_compilation()) {
1010     MacroAssembler _masm(&cbuf);
1011     if (SafepointMechanism::uses_thread_local_poll()) {
1012       __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
1013       __ relocate(relocInfo::poll_return_type);
1014       __ testl(rax, Address(rscratch1, 0));
1015     } else {
1016       AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1017       if (Assembler::is_polling_page_far()) {
1018         __ lea(rscratch1, polling_page);
1019         __ relocate(relocInfo::poll_return_type);
1020         __ testl(rax, Address(rscratch1, 0));
1021       } else {
1022         __ testl(rax, polling_page);
1023       }
1024     }
1025   }
1026 }
1027 
1028 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1029 {
1030   return MachNode::size(ra_); // too many variables; just compute it
1031                               // the hard way
1032 }
1033 
1034 int MachEpilogNode::reloc() const
1035 {
1036   return 2; // a large enough number
1037 }
1038 
1039 const Pipeline* MachEpilogNode::pipeline() const
1040 {
1041   return MachNode::pipeline_class();
1042 }
1043 
1044 int MachEpilogNode::safepoint_offset() const
1045 {
1046   return 0;
1047 }
1048 
1049 //=============================================================================
1050 
1051 enum RC {
1052   rc_bad,
1053   rc_int,
1054   rc_float,
1055   rc_stack
1056 };
1057 
1058 static enum RC rc_class(OptoReg::Name reg)
1059 {
1060   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1061 
1062   if (OptoReg::is_stack(reg)) return rc_stack;
1063 
1064   VMReg r = OptoReg::as_VMReg(reg);
1065 
1066   if (r->is_Register()) return rc_int;
1067 
1068   assert(r->is_XMMRegister(), "must be");
1069   return rc_float;
1070 }
1071 
1072 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
1073 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1074                           int src_hi, int dst_hi, uint ireg, outputStream* st);
1075 
1076 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1077                             int stack_offset, int reg, uint ireg, outputStream* st);
1078 
1079 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
1080                                       int dst_offset, uint ireg, outputStream* st) {
1081   if (cbuf) {
1082     MacroAssembler _masm(cbuf);
1083     switch (ireg) {
1084     case Op_VecS:
1085       __ movq(Address(rsp, -8), rax);
1086       __ movl(rax, Address(rsp, src_offset));
1087       __ movl(Address(rsp, dst_offset), rax);
1088       __ movq(rax, Address(rsp, -8));
1089       break;
1090     case Op_VecD:
1091       __ pushq(Address(rsp, src_offset));
1092       __ popq (Address(rsp, dst_offset));
1093       break;
1094     case Op_VecX:
1095       __ pushq(Address(rsp, src_offset));
1096       __ popq (Address(rsp, dst_offset));
1097       __ pushq(Address(rsp, src_offset+8));
1098       __ popq (Address(rsp, dst_offset+8));
1099       break;
1100     case Op_VecY:
1101       __ vmovdqu(Address(rsp, -32), xmm0);
1102       __ vmovdqu(xmm0, Address(rsp, src_offset));
1103       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1104       __ vmovdqu(xmm0, Address(rsp, -32));
1105       break;
1106     case Op_VecZ:
1107       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1108       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1109       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1110       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1111       break;
1112     default:
1113       ShouldNotReachHere();
1114     }
1115 #ifndef PRODUCT
1116   } else {
1117     switch (ireg) {
1118     case Op_VecS:
1119       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1120                 "movl    rax, [rsp + #%d]\n\t"
1121                 "movl    [rsp + #%d], rax\n\t"
1122                 "movq    rax, [rsp - #8]",
1123                 src_offset, dst_offset);
1124       break;
1125     case Op_VecD:
1126       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1127                 "popq    [rsp + #%d]",
1128                 src_offset, dst_offset);
1129       break;
1130      case Op_VecX:
1131       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
1132                 "popq    [rsp + #%d]\n\t"
1133                 "pushq   [rsp + #%d]\n\t"
1134                 "popq    [rsp + #%d]",
1135                 src_offset, dst_offset, src_offset+8, dst_offset+8);
1136       break;
1137     case Op_VecY:
1138       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1139                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1140                 "vmovdqu [rsp + #%d], xmm0\n\t"
1141                 "vmovdqu xmm0, [rsp - #32]",
1142                 src_offset, dst_offset);
1143       break;
1144     case Op_VecZ:
1145       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1146                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1147                 "vmovdqu [rsp + #%d], xmm0\n\t"
1148                 "vmovdqu xmm0, [rsp - #64]",
1149                 src_offset, dst_offset);
1150       break;
1151     default:
1152       ShouldNotReachHere();
1153     }
1154 #endif
1155   }
1156 }
1157 
1158 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1159                                        PhaseRegAlloc* ra_,
1160                                        bool do_size,
1161                                        outputStream* st) const {
1162   assert(cbuf != NULL || st  != NULL, "sanity");
1163   // Get registers to move
1164   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1165   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1166   OptoReg::Name dst_second = ra_->get_reg_second(this);
1167   OptoReg::Name dst_first = ra_->get_reg_first(this);
1168 
1169   enum RC src_second_rc = rc_class(src_second);
1170   enum RC src_first_rc = rc_class(src_first);
1171   enum RC dst_second_rc = rc_class(dst_second);
1172   enum RC dst_first_rc = rc_class(dst_first);
1173 
1174   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1175          "must move at least 1 register" );
1176 
1177   if (src_first == dst_first && src_second == dst_second) {
1178     // Self copy, no move
1179     return 0;
1180   }
1181   if (bottom_type()->isa_vect() != NULL) {
1182     uint ireg = ideal_reg();
1183     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1184     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1185     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1186       // mem -> mem
1187       int src_offset = ra_->reg2offset(src_first);
1188       int dst_offset = ra_->reg2offset(dst_first);
1189       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1190     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1191       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1192     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1193       int stack_offset = ra_->reg2offset(dst_first);
1194       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1195     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1196       int stack_offset = ra_->reg2offset(src_first);
1197       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1198     } else {
1199       ShouldNotReachHere();
1200     }
1201     return 0;
1202   }
1203   if (src_first_rc == rc_stack) {
1204     // mem ->
1205     if (dst_first_rc == rc_stack) {
1206       // mem -> mem
1207       assert(src_second != dst_first, "overlap");
1208       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1209           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1210         // 64-bit
1211         int src_offset = ra_->reg2offset(src_first);
1212         int dst_offset = ra_->reg2offset(dst_first);
1213         if (cbuf) {
1214           MacroAssembler _masm(cbuf);
1215           __ pushq(Address(rsp, src_offset));
1216           __ popq (Address(rsp, dst_offset));
1217 #ifndef PRODUCT
1218         } else {
1219           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1220                     "popq    [rsp + #%d]",
1221                      src_offset, dst_offset);
1222 #endif
1223         }
1224       } else {
1225         // 32-bit
1226         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1227         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1228         // No pushl/popl, so:
1229         int src_offset = ra_->reg2offset(src_first);
1230         int dst_offset = ra_->reg2offset(dst_first);
1231         if (cbuf) {
1232           MacroAssembler _masm(cbuf);
1233           __ movq(Address(rsp, -8), rax);
1234           __ movl(rax, Address(rsp, src_offset));
1235           __ movl(Address(rsp, dst_offset), rax);
1236           __ movq(rax, Address(rsp, -8));
1237 #ifndef PRODUCT
1238         } else {
1239           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1240                     "movl    rax, [rsp + #%d]\n\t"
1241                     "movl    [rsp + #%d], rax\n\t"
1242                     "movq    rax, [rsp - #8]",
1243                      src_offset, dst_offset);
1244 #endif
1245         }
1246       }
1247       return 0;
1248     } else if (dst_first_rc == rc_int) {
1249       // mem -> gpr
1250       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1251           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1252         // 64-bit
1253         int offset = ra_->reg2offset(src_first);
1254         if (cbuf) {
1255           MacroAssembler _masm(cbuf);
1256           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1257 #ifndef PRODUCT
1258         } else {
1259           st->print("movq    %s, [rsp + #%d]\t# spill",
1260                      Matcher::regName[dst_first],
1261                      offset);
1262 #endif
1263         }
1264       } else {
1265         // 32-bit
1266         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1267         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1268         int offset = ra_->reg2offset(src_first);
1269         if (cbuf) {
1270           MacroAssembler _masm(cbuf);
1271           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1272 #ifndef PRODUCT
1273         } else {
1274           st->print("movl    %s, [rsp + #%d]\t# spill",
1275                      Matcher::regName[dst_first],
1276                      offset);
1277 #endif
1278         }
1279       }
1280       return 0;
1281     } else if (dst_first_rc == rc_float) {
1282       // mem-> xmm
1283       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1284           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1285         // 64-bit
1286         int offset = ra_->reg2offset(src_first);
1287         if (cbuf) {
1288           MacroAssembler _masm(cbuf);
1289           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1290 #ifndef PRODUCT
1291         } else {
1292           st->print("%s  %s, [rsp + #%d]\t# spill",
1293                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1294                      Matcher::regName[dst_first],
1295                      offset);
1296 #endif
1297         }
1298       } else {
1299         // 32-bit
1300         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1301         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1302         int offset = ra_->reg2offset(src_first);
1303         if (cbuf) {
1304           MacroAssembler _masm(cbuf);
1305           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1306 #ifndef PRODUCT
1307         } else {
1308           st->print("movss   %s, [rsp + #%d]\t# spill",
1309                      Matcher::regName[dst_first],
1310                      offset);
1311 #endif
1312         }
1313       }
1314       return 0;
1315     }
1316   } else if (src_first_rc == rc_int) {
1317     // gpr ->
1318     if (dst_first_rc == rc_stack) {
1319       // gpr -> mem
1320       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1321           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1322         // 64-bit
1323         int offset = ra_->reg2offset(dst_first);
1324         if (cbuf) {
1325           MacroAssembler _masm(cbuf);
1326           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1327 #ifndef PRODUCT
1328         } else {
1329           st->print("movq    [rsp + #%d], %s\t# spill",
1330                      offset,
1331                      Matcher::regName[src_first]);
1332 #endif
1333         }
1334       } else {
1335         // 32-bit
1336         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1337         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1338         int offset = ra_->reg2offset(dst_first);
1339         if (cbuf) {
1340           MacroAssembler _masm(cbuf);
1341           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1342 #ifndef PRODUCT
1343         } else {
1344           st->print("movl    [rsp + #%d], %s\t# spill",
1345                      offset,
1346                      Matcher::regName[src_first]);
1347 #endif
1348         }
1349       }
1350       return 0;
1351     } else if (dst_first_rc == rc_int) {
1352       // gpr -> gpr
1353       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1354           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1355         // 64-bit
1356         if (cbuf) {
1357           MacroAssembler _masm(cbuf);
1358           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1359                   as_Register(Matcher::_regEncode[src_first]));
1360 #ifndef PRODUCT
1361         } else {
1362           st->print("movq    %s, %s\t# spill",
1363                      Matcher::regName[dst_first],
1364                      Matcher::regName[src_first]);
1365 #endif
1366         }
1367         return 0;
1368       } else {
1369         // 32-bit
1370         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1371         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1372         if (cbuf) {
1373           MacroAssembler _masm(cbuf);
1374           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1375                   as_Register(Matcher::_regEncode[src_first]));
1376 #ifndef PRODUCT
1377         } else {
1378           st->print("movl    %s, %s\t# spill",
1379                      Matcher::regName[dst_first],
1380                      Matcher::regName[src_first]);
1381 #endif
1382         }
1383         return 0;
1384       }
1385     } else if (dst_first_rc == rc_float) {
1386       // gpr -> xmm
1387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1389         // 64-bit
1390         if (cbuf) {
1391           MacroAssembler _masm(cbuf);
1392           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1393 #ifndef PRODUCT
1394         } else {
1395           st->print("movdq   %s, %s\t# spill",
1396                      Matcher::regName[dst_first],
1397                      Matcher::regName[src_first]);
1398 #endif
1399         }
1400       } else {
1401         // 32-bit
1402         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1403         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1404         if (cbuf) {
1405           MacroAssembler _masm(cbuf);
1406           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1407 #ifndef PRODUCT
1408         } else {
1409           st->print("movdl   %s, %s\t# spill",
1410                      Matcher::regName[dst_first],
1411                      Matcher::regName[src_first]);
1412 #endif
1413         }
1414       }
1415       return 0;
1416     }
1417   } else if (src_first_rc == rc_float) {
1418     // xmm ->
1419     if (dst_first_rc == rc_stack) {
1420       // xmm -> mem
1421       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1422           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1423         // 64-bit
1424         int offset = ra_->reg2offset(dst_first);
1425         if (cbuf) {
1426           MacroAssembler _masm(cbuf);
1427           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1428 #ifndef PRODUCT
1429         } else {
1430           st->print("movsd   [rsp + #%d], %s\t# spill",
1431                      offset,
1432                      Matcher::regName[src_first]);
1433 #endif
1434         }
1435       } else {
1436         // 32-bit
1437         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1438         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1439         int offset = ra_->reg2offset(dst_first);
1440         if (cbuf) {
1441           MacroAssembler _masm(cbuf);
1442           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1443 #ifndef PRODUCT
1444         } else {
1445           st->print("movss   [rsp + #%d], %s\t# spill",
1446                      offset,
1447                      Matcher::regName[src_first]);
1448 #endif
1449         }
1450       }
1451       return 0;
1452     } else if (dst_first_rc == rc_int) {
1453       // xmm -> gpr
1454       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1455           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1456         // 64-bit
1457         if (cbuf) {
1458           MacroAssembler _masm(cbuf);
1459           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1460 #ifndef PRODUCT
1461         } else {
1462           st->print("movdq   %s, %s\t# spill",
1463                      Matcher::regName[dst_first],
1464                      Matcher::regName[src_first]);
1465 #endif
1466         }
1467       } else {
1468         // 32-bit
1469         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1470         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1471         if (cbuf) {
1472           MacroAssembler _masm(cbuf);
1473           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1474 #ifndef PRODUCT
1475         } else {
1476           st->print("movdl   %s, %s\t# spill",
1477                      Matcher::regName[dst_first],
1478                      Matcher::regName[src_first]);
1479 #endif
1480         }
1481       }
1482       return 0;
1483     } else if (dst_first_rc == rc_float) {
1484       // xmm -> xmm
1485       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1486           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1487         // 64-bit
1488         if (cbuf) {
1489           MacroAssembler _masm(cbuf);
1490           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1491 #ifndef PRODUCT
1492         } else {
1493           st->print("%s  %s, %s\t# spill",
1494                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1495                      Matcher::regName[dst_first],
1496                      Matcher::regName[src_first]);
1497 #endif
1498         }
1499       } else {
1500         // 32-bit
1501         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1502         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1503         if (cbuf) {
1504           MacroAssembler _masm(cbuf);
1505           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1506 #ifndef PRODUCT
1507         } else {
1508           st->print("%s  %s, %s\t# spill",
1509                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1510                      Matcher::regName[dst_first],
1511                      Matcher::regName[src_first]);
1512 #endif
1513         }
1514       }
1515       return 0;
1516     }
1517   }
1518 
1519   assert(0," foo ");
1520   Unimplemented();
1521   return 0;
1522 }
1523 
1524 #ifndef PRODUCT
1525 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1526   implementation(NULL, ra_, false, st);
1527 }
1528 #endif
1529 
1530 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1531   implementation(&cbuf, ra_, false, NULL);
1532 }
1533 
1534 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1535   return MachNode::size(ra_);
1536 }
1537 
1538 //=============================================================================
1539 #ifndef PRODUCT
1540 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1541 {
1542   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1543   int reg = ra_->get_reg_first(this);
1544   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1545             Matcher::regName[reg], offset);
1546 }
1547 #endif
1548 
1549 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1550 {
1551   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1552   int reg = ra_->get_encode(this);
1553   if (offset >= 0x80) {
1554     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1555     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1556     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1557     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1558     emit_d32(cbuf, offset);
1559   } else {
1560     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1561     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1562     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1563     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1564     emit_d8(cbuf, offset);
1565   }
1566 }
1567 
1568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1569 {
1570   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1571   return (offset < 0x80) ? 5 : 8; // REX
1572 }
1573 
1574 //=============================================================================
1575 #ifndef PRODUCT
1576 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1577 {
1578   if (UseCompressedClassPointers) {
1579     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1580     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1581     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1582   } else {
1583     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1584                  "# Inline cache check");
1585   }
1586   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1587   st->print_cr("\tnop\t# nops to align entry point");
1588 }
1589 #endif
1590 
1591 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1592 {
1593   MacroAssembler masm(&cbuf);
1594   uint insts_size = cbuf.insts_size();
1595   if (UseCompressedClassPointers) {
1596     masm.load_klass(rscratch1, j_rarg0);
1597     masm.cmpptr(rax, rscratch1);
1598   } else {
1599     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1600   }
1601 
1602   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1603 
1604   /* WARNING these NOPs are critical so that verified entry point is properly
1605      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1606   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1607   if (OptoBreakpoint) {
1608     // Leave space for int3
1609     nops_cnt -= 1;
1610   }
1611   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1612   if (nops_cnt > 0)
1613     masm.nop(nops_cnt);
1614 }
1615 
1616 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1617 {
1618   return MachNode::size(ra_); // too many variables; just compute it
1619                               // the hard way
1620 }
1621 
1622 
1623 //=============================================================================
1624 
1625 int Matcher::regnum_to_fpu_offset(int regnum)
1626 {
1627   return regnum - 32; // The FP registers are in the second chunk
1628 }
1629 
1630 // This is UltraSparc specific, true just means we have fast l2f conversion
1631 const bool Matcher::convL2FSupported(void) {
1632   return true;
1633 }
1634 
1635 // Is this branch offset short enough that a short branch can be used?
1636 //
1637 // NOTE: If the platform does not provide any short branch variants, then
1638 //       this method should return false for offset 0.
1639 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1640   // The passed offset is relative to address of the branch.
1641   // On 86 a branch displacement is calculated relative to address
1642   // of a next instruction.
1643   offset -= br_size;
1644 
1645   // the short version of jmpConUCF2 contains multiple branches,
1646   // making the reach slightly less
1647   if (rule == jmpConUCF2_rule)
1648     return (-126 <= offset && offset <= 125);
1649   return (-128 <= offset && offset <= 127);
1650 }
1651 
1652 const bool Matcher::isSimpleConstant64(jlong value) {
1653   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1654   //return value == (int) value;  // Cf. storeImmL and immL32.
1655 
1656   // Probably always true, even if a temp register is required.
1657   return true;
1658 }
1659 
1660 // The ecx parameter to rep stosq for the ClearArray node is in words.
1661 const bool Matcher::init_array_count_is_in_bytes = false;
1662 
1663 // No additional cost for CMOVL.
1664 const int Matcher::long_cmove_cost() { return 0; }
1665 
1666 // No CMOVF/CMOVD with SSE2
1667 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1668 
1669 // Does the CPU require late expand (see block.cpp for description of late expand)?
1670 const bool Matcher::require_postalloc_expand = false;
1671 
1672 // Do we need to mask the count passed to shift instructions or does
1673 // the cpu only look at the lower 5/6 bits anyway?
1674 const bool Matcher::need_masked_shift_count = false;
1675 
1676 bool Matcher::narrow_oop_use_complex_address() {
1677   assert(UseCompressedOops, "only for compressed oops code");
1678   return (LogMinObjAlignmentInBytes <= 3);
1679 }
1680 
1681 bool Matcher::narrow_klass_use_complex_address() {
1682   assert(UseCompressedClassPointers, "only for compressed klass code");
1683   return (LogKlassAlignmentInBytes <= 3);
1684 }
1685 
1686 bool Matcher::const_oop_prefer_decode() {
1687   // Prefer ConN+DecodeN over ConP.
1688   return true;
1689 }
1690 
1691 bool Matcher::const_klass_prefer_decode() {
1692   // TODO: Either support matching DecodeNKlass (heap-based) in operand
1693   //       or condisider the following:
1694   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
1695   //return Universe::narrow_klass_base() == NULL;
1696   return true;
1697 }
1698 
1699 // Is it better to copy float constants, or load them directly from
1700 // memory?  Intel can load a float constant from a direct address,
1701 // requiring no extra registers.  Most RISCs will have to materialize
1702 // an address into a register first, so they would do better to copy
1703 // the constant from stack.
1704 const bool Matcher::rematerialize_float_constants = true; // XXX
1705 
1706 // If CPU can load and store mis-aligned doubles directly then no
1707 // fixup is needed.  Else we split the double into 2 integer pieces
1708 // and move it piece-by-piece.  Only happens when passing doubles into
1709 // C code as the Java calling convention forces doubles to be aligned.
1710 const bool Matcher::misaligned_doubles_ok = true;
1711 
1712 // No-op on amd64
1713 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1714 
1715 // Advertise here if the CPU requires explicit rounding operations to
1716 // implement the UseStrictFP mode.
1717 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1718 
1719 // Are floats conerted to double when stored to stack during deoptimization?
1720 // On x64 it is stored without convertion so we can use normal access.
1721 bool Matcher::float_in_double() { return false; }
1722 
1723 // Do ints take an entire long register or just half?
1724 const bool Matcher::int_in_long = true;
1725 
1726 // Return whether or not this register is ever used as an argument.
1727 // This function is used on startup to build the trampoline stubs in
1728 // generateOptoStub.  Registers not mentioned will be killed by the VM
1729 // call in the trampoline, and arguments in those registers not be
1730 // available to the callee.
1731 bool Matcher::can_be_java_arg(int reg)
1732 {
1733   return
1734     reg ==  RDI_num || reg == RDI_H_num ||
1735     reg ==  RSI_num || reg == RSI_H_num ||
1736     reg ==  RDX_num || reg == RDX_H_num ||
1737     reg ==  RCX_num || reg == RCX_H_num ||
1738     reg ==   R8_num || reg ==  R8_H_num ||
1739     reg ==   R9_num || reg ==  R9_H_num ||
1740     reg ==  R12_num || reg == R12_H_num ||
1741     reg == XMM0_num || reg == XMM0b_num ||
1742     reg == XMM1_num || reg == XMM1b_num ||
1743     reg == XMM2_num || reg == XMM2b_num ||
1744     reg == XMM3_num || reg == XMM3b_num ||
1745     reg == XMM4_num || reg == XMM4b_num ||
1746     reg == XMM5_num || reg == XMM5b_num ||
1747     reg == XMM6_num || reg == XMM6b_num ||
1748     reg == XMM7_num || reg == XMM7b_num;
1749 }
1750 
1751 bool Matcher::is_spillable_arg(int reg)
1752 {
1753   return can_be_java_arg(reg);
1754 }
1755 
1756 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1757   // In 64 bit mode a code which use multiply when
1758   // devisor is constant is faster than hardware
1759   // DIV instruction (it uses MulHiL).
1760   return false;
1761 }
1762 
1763 // Register for DIVI projection of divmodI
1764 RegMask Matcher::divI_proj_mask() {
1765   return INT_RAX_REG_mask();
1766 }
1767 
1768 // Register for MODI projection of divmodI
1769 RegMask Matcher::modI_proj_mask() {
1770   return INT_RDX_REG_mask();
1771 }
1772 
1773 // Register for DIVL projection of divmodL
1774 RegMask Matcher::divL_proj_mask() {
1775   return LONG_RAX_REG_mask();
1776 }
1777 
1778 // Register for MODL projection of divmodL
1779 RegMask Matcher::modL_proj_mask() {
1780   return LONG_RDX_REG_mask();
1781 }
1782 
1783 // Register for saving SP into on method handle invokes. Not used on x86_64.
1784 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1785     return NO_REG_mask();
1786 }
1787 
1788 %}
1789 
1790 //----------ENCODING BLOCK-----------------------------------------------------
1791 // This block specifies the encoding classes used by the compiler to
1792 // output byte streams.  Encoding classes are parameterized macros
1793 // used by Machine Instruction Nodes in order to generate the bit
1794 // encoding of the instruction.  Operands specify their base encoding
1795 // interface with the interface keyword.  There are currently
1796 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1797 // COND_INTER.  REG_INTER causes an operand to generate a function
1798 // which returns its register number when queried.  CONST_INTER causes
1799 // an operand to generate a function which returns the value of the
1800 // constant when queried.  MEMORY_INTER causes an operand to generate
1801 // four functions which return the Base Register, the Index Register,
1802 // the Scale Value, and the Offset Value of the operand when queried.
1803 // COND_INTER causes an operand to generate six functions which return
1804 // the encoding code (ie - encoding bits for the instruction)
1805 // associated with each basic boolean condition for a conditional
1806 // instruction.
1807 //
1808 // Instructions specify two basic values for encoding.  Again, a
1809 // function is available to check if the constant displacement is an
1810 // oop. They use the ins_encode keyword to specify their encoding
1811 // classes (which must be a sequence of enc_class names, and their
1812 // parameters, specified in the encoding block), and they use the
1813 // opcode keyword to specify, in order, their primary, secondary, and
1814 // tertiary opcode.  Only the opcode sections which a particular
1815 // instruction needs for encoding need to be specified.
1816 encode %{
1817   // Build emit functions for each basic byte or larger field in the
1818   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1819   // from C++ code in the enc_class source block.  Emit functions will
1820   // live in the main source block for now.  In future, we can
1821   // generalize this by adding a syntax that specifies the sizes of
1822   // fields in an order, so that the adlc can build the emit functions
1823   // automagically
1824 
1825   // Emit primary opcode
1826   enc_class OpcP
1827   %{
1828     emit_opcode(cbuf, $primary);
1829   %}
1830 
1831   // Emit secondary opcode
1832   enc_class OpcS
1833   %{
1834     emit_opcode(cbuf, $secondary);
1835   %}
1836 
1837   // Emit tertiary opcode
1838   enc_class OpcT
1839   %{
1840     emit_opcode(cbuf, $tertiary);
1841   %}
1842 
1843   // Emit opcode directly
1844   enc_class Opcode(immI d8)
1845   %{
1846     emit_opcode(cbuf, $d8$$constant);
1847   %}
1848 
1849   // Emit size prefix
1850   enc_class SizePrefix
1851   %{
1852     emit_opcode(cbuf, 0x66);
1853   %}
1854 
1855   enc_class reg(rRegI reg)
1856   %{
1857     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1858   %}
1859 
1860   enc_class reg_reg(rRegI dst, rRegI src)
1861   %{
1862     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1863   %}
1864 
1865   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1866   %{
1867     emit_opcode(cbuf, $opcode$$constant);
1868     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1869   %}
1870 
1871   enc_class cdql_enc(no_rax_rdx_RegI div)
1872   %{
1873     // Full implementation of Java idiv and irem; checks for
1874     // special case as described in JVM spec., p.243 & p.271.
1875     //
1876     //         normal case                           special case
1877     //
1878     // input : rax: dividend                         min_int
1879     //         reg: divisor                          -1
1880     //
1881     // output: rax: quotient  (= rax idiv reg)       min_int
1882     //         rdx: remainder (= rax irem reg)       0
1883     //
1884     //  Code sequnce:
1885     //
1886     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1887     //    5:   75 07/08                jne    e <normal>
1888     //    7:   33 d2                   xor    %edx,%edx
1889     //  [div >= 8 -> offset + 1]
1890     //  [REX_B]
1891     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1892     //    c:   74 03/04                je     11 <done>
1893     // 000000000000000e <normal>:
1894     //    e:   99                      cltd
1895     //  [div >= 8 -> offset + 1]
1896     //  [REX_B]
1897     //    f:   f7 f9                   idiv   $div
1898     // 0000000000000011 <done>:
1899 
1900     // cmp    $0x80000000,%eax
1901     emit_opcode(cbuf, 0x3d);
1902     emit_d8(cbuf, 0x00);
1903     emit_d8(cbuf, 0x00);
1904     emit_d8(cbuf, 0x00);
1905     emit_d8(cbuf, 0x80);
1906 
1907     // jne    e <normal>
1908     emit_opcode(cbuf, 0x75);
1909     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1910 
1911     // xor    %edx,%edx
1912     emit_opcode(cbuf, 0x33);
1913     emit_d8(cbuf, 0xD2);
1914 
1915     // cmp    $0xffffffffffffffff,%ecx
1916     if ($div$$reg >= 8) {
1917       emit_opcode(cbuf, Assembler::REX_B);
1918     }
1919     emit_opcode(cbuf, 0x83);
1920     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1921     emit_d8(cbuf, 0xFF);
1922 
1923     // je     11 <done>
1924     emit_opcode(cbuf, 0x74);
1925     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1926 
1927     // <normal>
1928     // cltd
1929     emit_opcode(cbuf, 0x99);
1930 
1931     // idivl (note: must be emitted by the user of this rule)
1932     // <done>
1933   %}
1934 
1935   enc_class cdqq_enc(no_rax_rdx_RegL div)
1936   %{
1937     // Full implementation of Java ldiv and lrem; checks for
1938     // special case as described in JVM spec., p.243 & p.271.
1939     //
1940     //         normal case                           special case
1941     //
1942     // input : rax: dividend                         min_long
1943     //         reg: divisor                          -1
1944     //
1945     // output: rax: quotient  (= rax idiv reg)       min_long
1946     //         rdx: remainder (= rax irem reg)       0
1947     //
1948     //  Code sequnce:
1949     //
1950     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1951     //    7:   00 00 80
1952     //    a:   48 39 d0                cmp    %rdx,%rax
1953     //    d:   75 08                   jne    17 <normal>
1954     //    f:   33 d2                   xor    %edx,%edx
1955     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1956     //   15:   74 05                   je     1c <done>
1957     // 0000000000000017 <normal>:
1958     //   17:   48 99                   cqto
1959     //   19:   48 f7 f9                idiv   $div
1960     // 000000000000001c <done>:
1961 
1962     // mov    $0x8000000000000000,%rdx
1963     emit_opcode(cbuf, Assembler::REX_W);
1964     emit_opcode(cbuf, 0xBA);
1965     emit_d8(cbuf, 0x00);
1966     emit_d8(cbuf, 0x00);
1967     emit_d8(cbuf, 0x00);
1968     emit_d8(cbuf, 0x00);
1969     emit_d8(cbuf, 0x00);
1970     emit_d8(cbuf, 0x00);
1971     emit_d8(cbuf, 0x00);
1972     emit_d8(cbuf, 0x80);
1973 
1974     // cmp    %rdx,%rax
1975     emit_opcode(cbuf, Assembler::REX_W);
1976     emit_opcode(cbuf, 0x39);
1977     emit_d8(cbuf, 0xD0);
1978 
1979     // jne    17 <normal>
1980     emit_opcode(cbuf, 0x75);
1981     emit_d8(cbuf, 0x08);
1982 
1983     // xor    %edx,%edx
1984     emit_opcode(cbuf, 0x33);
1985     emit_d8(cbuf, 0xD2);
1986 
1987     // cmp    $0xffffffffffffffff,$div
1988     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1989     emit_opcode(cbuf, 0x83);
1990     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1991     emit_d8(cbuf, 0xFF);
1992 
1993     // je     1e <done>
1994     emit_opcode(cbuf, 0x74);
1995     emit_d8(cbuf, 0x05);
1996 
1997     // <normal>
1998     // cqto
1999     emit_opcode(cbuf, Assembler::REX_W);
2000     emit_opcode(cbuf, 0x99);
2001 
2002     // idivq (note: must be emitted by the user of this rule)
2003     // <done>
2004   %}
2005 
2006   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2007   enc_class OpcSE(immI imm)
2008   %{
2009     // Emit primary opcode and set sign-extend bit
2010     // Check for 8-bit immediate, and set sign extend bit in opcode
2011     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2012       emit_opcode(cbuf, $primary | 0x02);
2013     } else {
2014       // 32-bit immediate
2015       emit_opcode(cbuf, $primary);
2016     }
2017   %}
2018 
2019   enc_class OpcSErm(rRegI dst, immI imm)
2020   %{
2021     // OpcSEr/m
2022     int dstenc = $dst$$reg;
2023     if (dstenc >= 8) {
2024       emit_opcode(cbuf, Assembler::REX_B);
2025       dstenc -= 8;
2026     }
2027     // Emit primary opcode and set sign-extend bit
2028     // Check for 8-bit immediate, and set sign extend bit in opcode
2029     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2030       emit_opcode(cbuf, $primary | 0x02);
2031     } else {
2032       // 32-bit immediate
2033       emit_opcode(cbuf, $primary);
2034     }
2035     // Emit r/m byte with secondary opcode, after primary opcode.
2036     emit_rm(cbuf, 0x3, $secondary, dstenc);
2037   %}
2038 
2039   enc_class OpcSErm_wide(rRegL dst, immI imm)
2040   %{
2041     // OpcSEr/m
2042     int dstenc = $dst$$reg;
2043     if (dstenc < 8) {
2044       emit_opcode(cbuf, Assembler::REX_W);
2045     } else {
2046       emit_opcode(cbuf, Assembler::REX_WB);
2047       dstenc -= 8;
2048     }
2049     // Emit primary opcode and set sign-extend bit
2050     // Check for 8-bit immediate, and set sign extend bit in opcode
2051     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2052       emit_opcode(cbuf, $primary | 0x02);
2053     } else {
2054       // 32-bit immediate
2055       emit_opcode(cbuf, $primary);
2056     }
2057     // Emit r/m byte with secondary opcode, after primary opcode.
2058     emit_rm(cbuf, 0x3, $secondary, dstenc);
2059   %}
2060 
2061   enc_class Con8or32(immI imm)
2062   %{
2063     // Check for 8-bit immediate, and set sign extend bit in opcode
2064     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2065       $$$emit8$imm$$constant;
2066     } else {
2067       // 32-bit immediate
2068       $$$emit32$imm$$constant;
2069     }
2070   %}
2071 
2072   enc_class opc2_reg(rRegI dst)
2073   %{
2074     // BSWAP
2075     emit_cc(cbuf, $secondary, $dst$$reg);
2076   %}
2077 
2078   enc_class opc3_reg(rRegI dst)
2079   %{
2080     // BSWAP
2081     emit_cc(cbuf, $tertiary, $dst$$reg);
2082   %}
2083 
2084   enc_class reg_opc(rRegI div)
2085   %{
2086     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2087     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2088   %}
2089 
2090   enc_class enc_cmov(cmpOp cop)
2091   %{
2092     // CMOV
2093     $$$emit8$primary;
2094     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2095   %}
2096 
2097   enc_class enc_PartialSubtypeCheck()
2098   %{
2099     Register Rrdi = as_Register(RDI_enc); // result register
2100     Register Rrax = as_Register(RAX_enc); // super class
2101     Register Rrcx = as_Register(RCX_enc); // killed
2102     Register Rrsi = as_Register(RSI_enc); // sub class
2103     Label miss;
2104     const bool set_cond_codes = true;
2105 
2106     MacroAssembler _masm(&cbuf);
2107     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2108                                      NULL, &miss,
2109                                      /*set_cond_codes:*/ true);
2110     if ($primary) {
2111       __ xorptr(Rrdi, Rrdi);
2112     }
2113     __ bind(miss);
2114   %}
2115 
2116   enc_class clear_avx %{
2117     debug_only(int off0 = cbuf.insts_size());
2118     if (generate_vzeroupper(Compile::current())) {
2119       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
2120       // Clear upper bits of YMM registers when current compiled code uses
2121       // wide vectors to avoid AVX <-> SSE transition penalty during call.
2122       MacroAssembler _masm(&cbuf);
2123       __ vzeroupper();
2124     }
2125     debug_only(int off1 = cbuf.insts_size());
2126     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
2127   %}
2128 
2129   enc_class Java_To_Runtime(method meth) %{
2130     // No relocation needed
2131     MacroAssembler _masm(&cbuf);
2132     __ mov64(r10, (int64_t) $meth$$method);
2133     __ call(r10);
2134   %}
2135 
2136   enc_class Java_To_Interpreter(method meth)
2137   %{
2138     // CALL Java_To_Interpreter
2139     // This is the instruction starting address for relocation info.
2140     cbuf.set_insts_mark();
2141     $$$emit8$primary;
2142     // CALL directly to the runtime
2143     emit_d32_reloc(cbuf,
2144                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2145                    runtime_call_Relocation::spec(),
2146                    RELOC_DISP32);
2147   %}
2148 
2149   enc_class Java_Static_Call(method meth)
2150   %{
2151     // JAVA STATIC CALL
2152     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2153     // determine who we intended to call.
2154     cbuf.set_insts_mark();
2155     $$$emit8$primary;
2156 
2157     if (!_method) {
2158       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2159                      runtime_call_Relocation::spec(),
2160                      RELOC_DISP32);
2161     } else {
2162       int method_index = resolved_method_index(cbuf);
2163       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
2164                                                   : static_call_Relocation::spec(method_index);
2165       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2166                      rspec, RELOC_DISP32);
2167       // Emit stubs for static call.
2168       address mark = cbuf.insts_mark();
2169       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
2170       if (stub == NULL) {
2171         ciEnv::current()->record_failure("CodeCache is full");
2172         return;
2173       }
2174 #if INCLUDE_AOT
2175       CompiledStaticCall::emit_to_aot_stub(cbuf, mark);
2176 #endif
2177     }
2178   %}
2179 
2180   enc_class Java_Dynamic_Call(method meth) %{
2181     MacroAssembler _masm(&cbuf);
2182     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
2183   %}
2184 
2185   enc_class Java_Compiled_Call(method meth)
2186   %{
2187     // JAVA COMPILED CALL
2188     int disp = in_bytes(Method:: from_compiled_offset());
2189 
2190     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2191     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2192 
2193     // callq *disp(%rax)
2194     cbuf.set_insts_mark();
2195     $$$emit8$primary;
2196     if (disp < 0x80) {
2197       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2198       emit_d8(cbuf, disp); // Displacement
2199     } else {
2200       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2201       emit_d32(cbuf, disp); // Displacement
2202     }
2203   %}
2204 
2205   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2206   %{
2207     // SAL, SAR, SHR
2208     int dstenc = $dst$$reg;
2209     if (dstenc >= 8) {
2210       emit_opcode(cbuf, Assembler::REX_B);
2211       dstenc -= 8;
2212     }
2213     $$$emit8$primary;
2214     emit_rm(cbuf, 0x3, $secondary, dstenc);
2215     $$$emit8$shift$$constant;
2216   %}
2217 
2218   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2219   %{
2220     // SAL, SAR, SHR
2221     int dstenc = $dst$$reg;
2222     if (dstenc < 8) {
2223       emit_opcode(cbuf, Assembler::REX_W);
2224     } else {
2225       emit_opcode(cbuf, Assembler::REX_WB);
2226       dstenc -= 8;
2227     }
2228     $$$emit8$primary;
2229     emit_rm(cbuf, 0x3, $secondary, dstenc);
2230     $$$emit8$shift$$constant;
2231   %}
2232 
2233   enc_class load_immI(rRegI dst, immI src)
2234   %{
2235     int dstenc = $dst$$reg;
2236     if (dstenc >= 8) {
2237       emit_opcode(cbuf, Assembler::REX_B);
2238       dstenc -= 8;
2239     }
2240     emit_opcode(cbuf, 0xB8 | dstenc);
2241     $$$emit32$src$$constant;
2242   %}
2243 
2244   enc_class load_immL(rRegL dst, immL src)
2245   %{
2246     int dstenc = $dst$$reg;
2247     if (dstenc < 8) {
2248       emit_opcode(cbuf, Assembler::REX_W);
2249     } else {
2250       emit_opcode(cbuf, Assembler::REX_WB);
2251       dstenc -= 8;
2252     }
2253     emit_opcode(cbuf, 0xB8 | dstenc);
2254     emit_d64(cbuf, $src$$constant);
2255   %}
2256 
2257   enc_class load_immUL32(rRegL dst, immUL32 src)
2258   %{
2259     // same as load_immI, but this time we care about zeroes in the high word
2260     int dstenc = $dst$$reg;
2261     if (dstenc >= 8) {
2262       emit_opcode(cbuf, Assembler::REX_B);
2263       dstenc -= 8;
2264     }
2265     emit_opcode(cbuf, 0xB8 | dstenc);
2266     $$$emit32$src$$constant;
2267   %}
2268 
2269   enc_class load_immL32(rRegL dst, immL32 src)
2270   %{
2271     int dstenc = $dst$$reg;
2272     if (dstenc < 8) {
2273       emit_opcode(cbuf, Assembler::REX_W);
2274     } else {
2275       emit_opcode(cbuf, Assembler::REX_WB);
2276       dstenc -= 8;
2277     }
2278     emit_opcode(cbuf, 0xC7);
2279     emit_rm(cbuf, 0x03, 0x00, dstenc);
2280     $$$emit32$src$$constant;
2281   %}
2282 
2283   enc_class load_immP31(rRegP dst, immP32 src)
2284   %{
2285     // same as load_immI, but this time we care about zeroes in the high word
2286     int dstenc = $dst$$reg;
2287     if (dstenc >= 8) {
2288       emit_opcode(cbuf, Assembler::REX_B);
2289       dstenc -= 8;
2290     }
2291     emit_opcode(cbuf, 0xB8 | dstenc);
2292     $$$emit32$src$$constant;
2293   %}
2294 
2295   enc_class load_immP(rRegP dst, immP src)
2296   %{
2297     int dstenc = $dst$$reg;
2298     if (dstenc < 8) {
2299       emit_opcode(cbuf, Assembler::REX_W);
2300     } else {
2301       emit_opcode(cbuf, Assembler::REX_WB);
2302       dstenc -= 8;
2303     }
2304     emit_opcode(cbuf, 0xB8 | dstenc);
2305     // This next line should be generated from ADLC
2306     if ($src->constant_reloc() != relocInfo::none) {
2307       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2308     } else {
2309       emit_d64(cbuf, $src$$constant);
2310     }
2311   %}
2312 
2313   enc_class Con32(immI src)
2314   %{
2315     // Output immediate
2316     $$$emit32$src$$constant;
2317   %}
2318 
2319   enc_class Con32F_as_bits(immF src)
2320   %{
2321     // Output Float immediate bits
2322     jfloat jf = $src$$constant;
2323     jint jf_as_bits = jint_cast(jf);
2324     emit_d32(cbuf, jf_as_bits);
2325   %}
2326 
2327   enc_class Con16(immI src)
2328   %{
2329     // Output immediate
2330     $$$emit16$src$$constant;
2331   %}
2332 
2333   // How is this different from Con32??? XXX
2334   enc_class Con_d32(immI src)
2335   %{
2336     emit_d32(cbuf,$src$$constant);
2337   %}
2338 
2339   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2340     // Output immediate memory reference
2341     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2342     emit_d32(cbuf, 0x00);
2343   %}
2344 
2345   enc_class lock_prefix()
2346   %{
2347     emit_opcode(cbuf, 0xF0); // lock
2348   %}
2349 
2350   enc_class REX_mem(memory mem)
2351   %{
2352     if ($mem$$base >= 8) {
2353       if ($mem$$index < 8) {
2354         emit_opcode(cbuf, Assembler::REX_B);
2355       } else {
2356         emit_opcode(cbuf, Assembler::REX_XB);
2357       }
2358     } else {
2359       if ($mem$$index >= 8) {
2360         emit_opcode(cbuf, Assembler::REX_X);
2361       }
2362     }
2363   %}
2364 
2365   enc_class REX_mem_wide(memory mem)
2366   %{
2367     if ($mem$$base >= 8) {
2368       if ($mem$$index < 8) {
2369         emit_opcode(cbuf, Assembler::REX_WB);
2370       } else {
2371         emit_opcode(cbuf, Assembler::REX_WXB);
2372       }
2373     } else {
2374       if ($mem$$index < 8) {
2375         emit_opcode(cbuf, Assembler::REX_W);
2376       } else {
2377         emit_opcode(cbuf, Assembler::REX_WX);
2378       }
2379     }
2380   %}
2381 
2382   // for byte regs
2383   enc_class REX_breg(rRegI reg)
2384   %{
2385     if ($reg$$reg >= 4) {
2386       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2387     }
2388   %}
2389 
2390   // for byte regs
2391   enc_class REX_reg_breg(rRegI dst, rRegI src)
2392   %{
2393     if ($dst$$reg < 8) {
2394       if ($src$$reg >= 4) {
2395         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2396       }
2397     } else {
2398       if ($src$$reg < 8) {
2399         emit_opcode(cbuf, Assembler::REX_R);
2400       } else {
2401         emit_opcode(cbuf, Assembler::REX_RB);
2402       }
2403     }
2404   %}
2405 
2406   // for byte regs
2407   enc_class REX_breg_mem(rRegI reg, memory mem)
2408   %{
2409     if ($reg$$reg < 8) {
2410       if ($mem$$base < 8) {
2411         if ($mem$$index >= 8) {
2412           emit_opcode(cbuf, Assembler::REX_X);
2413         } else if ($reg$$reg >= 4) {
2414           emit_opcode(cbuf, Assembler::REX);
2415         }
2416       } else {
2417         if ($mem$$index < 8) {
2418           emit_opcode(cbuf, Assembler::REX_B);
2419         } else {
2420           emit_opcode(cbuf, Assembler::REX_XB);
2421         }
2422       }
2423     } else {
2424       if ($mem$$base < 8) {
2425         if ($mem$$index < 8) {
2426           emit_opcode(cbuf, Assembler::REX_R);
2427         } else {
2428           emit_opcode(cbuf, Assembler::REX_RX);
2429         }
2430       } else {
2431         if ($mem$$index < 8) {
2432           emit_opcode(cbuf, Assembler::REX_RB);
2433         } else {
2434           emit_opcode(cbuf, Assembler::REX_RXB);
2435         }
2436       }
2437     }
2438   %}
2439 
2440   enc_class REX_reg(rRegI reg)
2441   %{
2442     if ($reg$$reg >= 8) {
2443       emit_opcode(cbuf, Assembler::REX_B);
2444     }
2445   %}
2446 
2447   enc_class REX_reg_wide(rRegI reg)
2448   %{
2449     if ($reg$$reg < 8) {
2450       emit_opcode(cbuf, Assembler::REX_W);
2451     } else {
2452       emit_opcode(cbuf, Assembler::REX_WB);
2453     }
2454   %}
2455 
2456   enc_class REX_reg_reg(rRegI dst, rRegI src)
2457   %{
2458     if ($dst$$reg < 8) {
2459       if ($src$$reg >= 8) {
2460         emit_opcode(cbuf, Assembler::REX_B);
2461       }
2462     } else {
2463       if ($src$$reg < 8) {
2464         emit_opcode(cbuf, Assembler::REX_R);
2465       } else {
2466         emit_opcode(cbuf, Assembler::REX_RB);
2467       }
2468     }
2469   %}
2470 
2471   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2472   %{
2473     if ($dst$$reg < 8) {
2474       if ($src$$reg < 8) {
2475         emit_opcode(cbuf, Assembler::REX_W);
2476       } else {
2477         emit_opcode(cbuf, Assembler::REX_WB);
2478       }
2479     } else {
2480       if ($src$$reg < 8) {
2481         emit_opcode(cbuf, Assembler::REX_WR);
2482       } else {
2483         emit_opcode(cbuf, Assembler::REX_WRB);
2484       }
2485     }
2486   %}
2487 
2488   enc_class REX_reg_mem(rRegI reg, memory mem)
2489   %{
2490     if ($reg$$reg < 8) {
2491       if ($mem$$base < 8) {
2492         if ($mem$$index >= 8) {
2493           emit_opcode(cbuf, Assembler::REX_X);
2494         }
2495       } else {
2496         if ($mem$$index < 8) {
2497           emit_opcode(cbuf, Assembler::REX_B);
2498         } else {
2499           emit_opcode(cbuf, Assembler::REX_XB);
2500         }
2501       }
2502     } else {
2503       if ($mem$$base < 8) {
2504         if ($mem$$index < 8) {
2505           emit_opcode(cbuf, Assembler::REX_R);
2506         } else {
2507           emit_opcode(cbuf, Assembler::REX_RX);
2508         }
2509       } else {
2510         if ($mem$$index < 8) {
2511           emit_opcode(cbuf, Assembler::REX_RB);
2512         } else {
2513           emit_opcode(cbuf, Assembler::REX_RXB);
2514         }
2515       }
2516     }
2517   %}
2518 
2519   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2520   %{
2521     if ($reg$$reg < 8) {
2522       if ($mem$$base < 8) {
2523         if ($mem$$index < 8) {
2524           emit_opcode(cbuf, Assembler::REX_W);
2525         } else {
2526           emit_opcode(cbuf, Assembler::REX_WX);
2527         }
2528       } else {
2529         if ($mem$$index < 8) {
2530           emit_opcode(cbuf, Assembler::REX_WB);
2531         } else {
2532           emit_opcode(cbuf, Assembler::REX_WXB);
2533         }
2534       }
2535     } else {
2536       if ($mem$$base < 8) {
2537         if ($mem$$index < 8) {
2538           emit_opcode(cbuf, Assembler::REX_WR);
2539         } else {
2540           emit_opcode(cbuf, Assembler::REX_WRX);
2541         }
2542       } else {
2543         if ($mem$$index < 8) {
2544           emit_opcode(cbuf, Assembler::REX_WRB);
2545         } else {
2546           emit_opcode(cbuf, Assembler::REX_WRXB);
2547         }
2548       }
2549     }
2550   %}
2551 
2552   enc_class reg_mem(rRegI ereg, memory mem)
2553   %{
2554     // High registers handle in encode_RegMem
2555     int reg = $ereg$$reg;
2556     int base = $mem$$base;
2557     int index = $mem$$index;
2558     int scale = $mem$$scale;
2559     int disp = $mem$$disp;
2560     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2561 
2562     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2563   %}
2564 
2565   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2566   %{
2567     int rm_byte_opcode = $rm_opcode$$constant;
2568 
2569     // High registers handle in encode_RegMem
2570     int base = $mem$$base;
2571     int index = $mem$$index;
2572     int scale = $mem$$scale;
2573     int displace = $mem$$disp;
2574 
2575     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2576                                             // working with static
2577                                             // globals
2578     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2579                   disp_reloc);
2580   %}
2581 
2582   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2583   %{
2584     int reg_encoding = $dst$$reg;
2585     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2586     int index        = 0x04;            // 0x04 indicates no index
2587     int scale        = 0x00;            // 0x00 indicates no scale
2588     int displace     = $src1$$constant; // 0x00 indicates no displacement
2589     relocInfo::relocType disp_reloc = relocInfo::none;
2590     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2591                   disp_reloc);
2592   %}
2593 
2594   enc_class neg_reg(rRegI dst)
2595   %{
2596     int dstenc = $dst$$reg;
2597     if (dstenc >= 8) {
2598       emit_opcode(cbuf, Assembler::REX_B);
2599       dstenc -= 8;
2600     }
2601     // NEG $dst
2602     emit_opcode(cbuf, 0xF7);
2603     emit_rm(cbuf, 0x3, 0x03, dstenc);
2604   %}
2605 
2606   enc_class neg_reg_wide(rRegI dst)
2607   %{
2608     int dstenc = $dst$$reg;
2609     if (dstenc < 8) {
2610       emit_opcode(cbuf, Assembler::REX_W);
2611     } else {
2612       emit_opcode(cbuf, Assembler::REX_WB);
2613       dstenc -= 8;
2614     }
2615     // NEG $dst
2616     emit_opcode(cbuf, 0xF7);
2617     emit_rm(cbuf, 0x3, 0x03, dstenc);
2618   %}
2619 
2620   enc_class setLT_reg(rRegI dst)
2621   %{
2622     int dstenc = $dst$$reg;
2623     if (dstenc >= 8) {
2624       emit_opcode(cbuf, Assembler::REX_B);
2625       dstenc -= 8;
2626     } else if (dstenc >= 4) {
2627       emit_opcode(cbuf, Assembler::REX);
2628     }
2629     // SETLT $dst
2630     emit_opcode(cbuf, 0x0F);
2631     emit_opcode(cbuf, 0x9C);
2632     emit_rm(cbuf, 0x3, 0x0, dstenc);
2633   %}
2634 
2635   enc_class setNZ_reg(rRegI dst)
2636   %{
2637     int dstenc = $dst$$reg;
2638     if (dstenc >= 8) {
2639       emit_opcode(cbuf, Assembler::REX_B);
2640       dstenc -= 8;
2641     } else if (dstenc >= 4) {
2642       emit_opcode(cbuf, Assembler::REX);
2643     }
2644     // SETNZ $dst
2645     emit_opcode(cbuf, 0x0F);
2646     emit_opcode(cbuf, 0x95);
2647     emit_rm(cbuf, 0x3, 0x0, dstenc);
2648   %}
2649 
2650 
2651   // Compare the lonogs and set -1, 0, or 1 into dst
2652   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2653   %{
2654     int src1enc = $src1$$reg;
2655     int src2enc = $src2$$reg;
2656     int dstenc = $dst$$reg;
2657 
2658     // cmpq $src1, $src2
2659     if (src1enc < 8) {
2660       if (src2enc < 8) {
2661         emit_opcode(cbuf, Assembler::REX_W);
2662       } else {
2663         emit_opcode(cbuf, Assembler::REX_WB);
2664       }
2665     } else {
2666       if (src2enc < 8) {
2667         emit_opcode(cbuf, Assembler::REX_WR);
2668       } else {
2669         emit_opcode(cbuf, Assembler::REX_WRB);
2670       }
2671     }
2672     emit_opcode(cbuf, 0x3B);
2673     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2674 
2675     // movl $dst, -1
2676     if (dstenc >= 8) {
2677       emit_opcode(cbuf, Assembler::REX_B);
2678     }
2679     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2680     emit_d32(cbuf, -1);
2681 
2682     // jl,s done
2683     emit_opcode(cbuf, 0x7C);
2684     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2685 
2686     // setne $dst
2687     if (dstenc >= 4) {
2688       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2689     }
2690     emit_opcode(cbuf, 0x0F);
2691     emit_opcode(cbuf, 0x95);
2692     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2693 
2694     // movzbl $dst, $dst
2695     if (dstenc >= 4) {
2696       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2697     }
2698     emit_opcode(cbuf, 0x0F);
2699     emit_opcode(cbuf, 0xB6);
2700     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2701   %}
2702 
2703   enc_class Push_ResultXD(regD dst) %{
2704     MacroAssembler _masm(&cbuf);
2705     __ fstp_d(Address(rsp, 0));
2706     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2707     __ addptr(rsp, 8);
2708   %}
2709 
2710   enc_class Push_SrcXD(regD src) %{
2711     MacroAssembler _masm(&cbuf);
2712     __ subptr(rsp, 8);
2713     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2714     __ fld_d(Address(rsp, 0));
2715   %}
2716 
2717 
2718   enc_class enc_rethrow()
2719   %{
2720     cbuf.set_insts_mark();
2721     emit_opcode(cbuf, 0xE9); // jmp entry
2722     emit_d32_reloc(cbuf,
2723                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2724                    runtime_call_Relocation::spec(),
2725                    RELOC_DISP32);
2726   %}
2727 
2728 %}
2729 
2730 
2731 
2732 //----------FRAME--------------------------------------------------------------
2733 // Definition of frame structure and management information.
2734 //
2735 //  S T A C K   L A Y O U T    Allocators stack-slot number
2736 //                             |   (to get allocators register number
2737 //  G  Owned by    |        |  v    add OptoReg::stack0())
2738 //  r   CALLER     |        |
2739 //  o     |        +--------+      pad to even-align allocators stack-slot
2740 //  w     V        |  pad0  |        numbers; owned by CALLER
2741 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2742 //  h     ^        |   in   |  5
2743 //        |        |  args  |  4   Holes in incoming args owned by SELF
2744 //  |     |        |        |  3
2745 //  |     |        +--------+
2746 //  V     |        | old out|      Empty on Intel, window on Sparc
2747 //        |    old |preserve|      Must be even aligned.
2748 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2749 //        |        |   in   |  3   area for Intel ret address
2750 //     Owned by    |preserve|      Empty on Sparc.
2751 //       SELF      +--------+
2752 //        |        |  pad2  |  2   pad to align old SP
2753 //        |        +--------+  1
2754 //        |        | locks  |  0
2755 //        |        +--------+----> OptoReg::stack0(), even aligned
2756 //        |        |  pad1  | 11   pad to align new SP
2757 //        |        +--------+
2758 //        |        |        | 10
2759 //        |        | spills |  9   spills
2760 //        V        |        |  8   (pad0 slot for callee)
2761 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2762 //        ^        |  out   |  7
2763 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2764 //     Owned by    +--------+
2765 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2766 //        |    new |preserve|      Must be even-aligned.
2767 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2768 //        |        |        |
2769 //
2770 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2771 //         known from SELF's arguments and the Java calling convention.
2772 //         Region 6-7 is determined per call site.
2773 // Note 2: If the calling convention leaves holes in the incoming argument
2774 //         area, those holes are owned by SELF.  Holes in the outgoing area
2775 //         are owned by the CALLEE.  Holes should not be nessecary in the
2776 //         incoming area, as the Java calling convention is completely under
2777 //         the control of the AD file.  Doubles can be sorted and packed to
2778 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2779 //         varargs C calling conventions.
2780 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2781 //         even aligned with pad0 as needed.
2782 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2783 //         region 6-11 is even aligned; it may be padded out more so that
2784 //         the region from SP to FP meets the minimum stack alignment.
2785 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2786 //         alignment.  Region 11, pad1, may be dynamically extended so that
2787 //         SP meets the minimum alignment.
2788 
2789 frame
2790 %{
2791   // What direction does stack grow in (assumed to be same for C & Java)
2792   stack_direction(TOWARDS_LOW);
2793 
2794   // These three registers define part of the calling convention
2795   // between compiled code and the interpreter.
2796   inline_cache_reg(RAX);                // Inline Cache Register
2797   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2798                                         // calling interpreter
2799 
2800   // Optional: name the operand used by cisc-spilling to access
2801   // [stack_pointer + offset]
2802   cisc_spilling_operand_name(indOffset32);
2803 
2804   // Number of stack slots consumed by locking an object
2805   sync_stack_slots(2);
2806 
2807   // Compiled code's Frame Pointer
2808   frame_pointer(RSP);
2809 
2810   // Interpreter stores its frame pointer in a register which is
2811   // stored to the stack by I2CAdaptors.
2812   // I2CAdaptors convert from interpreted java to compiled java.
2813   interpreter_frame_pointer(RBP);
2814 
2815   // Stack alignment requirement
2816   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2817 
2818   // Number of stack slots between incoming argument block and the start of
2819   // a new frame.  The PROLOG must add this many slots to the stack.  The
2820   // EPILOG must remove this many slots.  amd64 needs two slots for
2821   // return address.
2822   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2823 
2824   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2825   // for calls to C.  Supports the var-args backing area for register parms.
2826   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2827 
2828   // The after-PROLOG location of the return address.  Location of
2829   // return address specifies a type (REG or STACK) and a number
2830   // representing the register number (i.e. - use a register name) or
2831   // stack slot.
2832   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2833   // Otherwise, it is above the locks and verification slot and alignment word
2834   return_addr(STACK - 2 +
2835               align_up((Compile::current()->in_preserve_stack_slots() +
2836                         Compile::current()->fixed_slots()),
2837                        stack_alignment_in_slots()));
2838 
2839   // Body of function which returns an integer array locating
2840   // arguments either in registers or in stack slots.  Passed an array
2841   // of ideal registers called "sig" and a "length" count.  Stack-slot
2842   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2843   // arguments for a CALLEE.  Incoming stack arguments are
2844   // automatically biased by the preserve_stack_slots field above.
2845 
2846   calling_convention
2847   %{
2848     // No difference between ingoing/outgoing just pass false
2849     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2850   %}
2851 
2852   c_calling_convention
2853   %{
2854     // This is obviously always outgoing
2855     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
2856   %}
2857 
2858   // Location of compiled Java return values.  Same as C for now.
2859   return_value
2860   %{
2861     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2862            "only return normal values");
2863 
2864     static const int lo[Op_RegL + 1] = {
2865       0,
2866       0,
2867       RAX_num,  // Op_RegN
2868       RAX_num,  // Op_RegI
2869       RAX_num,  // Op_RegP
2870       XMM0_num, // Op_RegF
2871       XMM0_num, // Op_RegD
2872       RAX_num   // Op_RegL
2873     };
2874     static const int hi[Op_RegL + 1] = {
2875       0,
2876       0,
2877       OptoReg::Bad, // Op_RegN
2878       OptoReg::Bad, // Op_RegI
2879       RAX_H_num,    // Op_RegP
2880       OptoReg::Bad, // Op_RegF
2881       XMM0b_num,    // Op_RegD
2882       RAX_H_num     // Op_RegL
2883     };
2884     // Excluded flags and vector registers.
2885     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
2886     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2887   %}
2888 %}
2889 
2890 //----------ATTRIBUTES---------------------------------------------------------
2891 //----------Operand Attributes-------------------------------------------------
2892 op_attrib op_cost(0);        // Required cost attribute
2893 
2894 //----------Instruction Attributes---------------------------------------------
2895 ins_attrib ins_cost(100);       // Required cost attribute
2896 ins_attrib ins_size(8);         // Required size attribute (in bits)
2897 ins_attrib ins_short_branch(0); // Required flag: is this instruction
2898                                 // a non-matching short branch variant
2899                                 // of some long branch?
2900 ins_attrib ins_alignment(1);    // Required alignment attribute (must
2901                                 // be a power of 2) specifies the
2902                                 // alignment that some part of the
2903                                 // instruction (not necessarily the
2904                                 // start) requires.  If > 1, a
2905                                 // compute_padding() function must be
2906                                 // provided for the instruction
2907 
2908 //----------OPERANDS-----------------------------------------------------------
2909 // Operand definitions must precede instruction definitions for correct parsing
2910 // in the ADLC because operands constitute user defined types which are used in
2911 // instruction definitions.
2912 
2913 //----------Simple Operands----------------------------------------------------
2914 // Immediate Operands
2915 // Integer Immediate
2916 operand immI()
2917 %{
2918   match(ConI);
2919 
2920   op_cost(10);
2921   format %{ %}
2922   interface(CONST_INTER);
2923 %}
2924 
2925 // Constant for test vs zero
2926 operand immI0()
2927 %{
2928   predicate(n->get_int() == 0);
2929   match(ConI);
2930 
2931   op_cost(0);
2932   format %{ %}
2933   interface(CONST_INTER);
2934 %}
2935 
2936 // Constant for increment
2937 operand immI1()
2938 %{
2939   predicate(n->get_int() == 1);
2940   match(ConI);
2941 
2942   op_cost(0);
2943   format %{ %}
2944   interface(CONST_INTER);
2945 %}
2946 
2947 // Constant for decrement
2948 operand immI_M1()
2949 %{
2950   predicate(n->get_int() == -1);
2951   match(ConI);
2952 
2953   op_cost(0);
2954   format %{ %}
2955   interface(CONST_INTER);
2956 %}
2957 
2958 // Valid scale values for addressing modes
2959 operand immI2()
2960 %{
2961   predicate(0 <= n->get_int() && (n->get_int() <= 3));
2962   match(ConI);
2963 
2964   format %{ %}
2965   interface(CONST_INTER);
2966 %}
2967 
2968 operand immI8()
2969 %{
2970   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
2971   match(ConI);
2972 
2973   op_cost(5);
2974   format %{ %}
2975   interface(CONST_INTER);
2976 %}
2977 
2978 operand immU8()
2979 %{
2980   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
2981   match(ConI);
2982 
2983   op_cost(5);
2984   format %{ %}
2985   interface(CONST_INTER);
2986 %}
2987 
2988 operand immI16()
2989 %{
2990   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
2991   match(ConI);
2992 
2993   op_cost(10);
2994   format %{ %}
2995   interface(CONST_INTER);
2996 %}
2997 
2998 // Int Immediate non-negative
2999 operand immU31()
3000 %{
3001   predicate(n->get_int() >= 0);
3002   match(ConI);
3003 
3004   op_cost(0);
3005   format %{ %}
3006   interface(CONST_INTER);
3007 %}
3008 
3009 // Constant for long shifts
3010 operand immI_32()
3011 %{
3012   predicate( n->get_int() == 32 );
3013   match(ConI);
3014 
3015   op_cost(0);
3016   format %{ %}
3017   interface(CONST_INTER);
3018 %}
3019 
3020 // Constant for long shifts
3021 operand immI_64()
3022 %{
3023   predicate( n->get_int() == 64 );
3024   match(ConI);
3025 
3026   op_cost(0);
3027   format %{ %}
3028   interface(CONST_INTER);
3029 %}
3030 
3031 // Pointer Immediate
3032 operand immP()
3033 %{
3034   match(ConP);
3035 
3036   op_cost(10);
3037   format %{ %}
3038   interface(CONST_INTER);
3039 %}
3040 
3041 // NULL Pointer Immediate
3042 operand immP0()
3043 %{
3044   predicate(n->get_ptr() == 0);
3045   match(ConP);
3046 
3047   op_cost(5);
3048   format %{ %}
3049   interface(CONST_INTER);
3050 %}
3051 
3052 // Pointer Immediate
3053 operand immN() %{
3054   match(ConN);
3055 
3056   op_cost(10);
3057   format %{ %}
3058   interface(CONST_INTER);
3059 %}
3060 
3061 operand immNKlass() %{
3062   match(ConNKlass);
3063 
3064   op_cost(10);
3065   format %{ %}
3066   interface(CONST_INTER);
3067 %}
3068 
3069 // NULL Pointer Immediate
3070 operand immN0() %{
3071   predicate(n->get_narrowcon() == 0);
3072   match(ConN);
3073 
3074   op_cost(5);
3075   format %{ %}
3076   interface(CONST_INTER);
3077 %}
3078 
3079 operand immP31()
3080 %{
3081   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3082             && (n->get_ptr() >> 31) == 0);
3083   match(ConP);
3084 
3085   op_cost(5);
3086   format %{ %}
3087   interface(CONST_INTER);
3088 %}
3089 
3090 
3091 // Long Immediate
3092 operand immL()
3093 %{
3094   match(ConL);
3095 
3096   op_cost(20);
3097   format %{ %}
3098   interface(CONST_INTER);
3099 %}
3100 
3101 // Long Immediate 8-bit
3102 operand immL8()
3103 %{
3104   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3105   match(ConL);
3106 
3107   op_cost(5);
3108   format %{ %}
3109   interface(CONST_INTER);
3110 %}
3111 
3112 // Long Immediate 32-bit unsigned
3113 operand immUL32()
3114 %{
3115   predicate(n->get_long() == (unsigned int) (n->get_long()));
3116   match(ConL);
3117 
3118   op_cost(10);
3119   format %{ %}
3120   interface(CONST_INTER);
3121 %}
3122 
3123 // Long Immediate 32-bit signed
3124 operand immL32()
3125 %{
3126   predicate(n->get_long() == (int) (n->get_long()));
3127   match(ConL);
3128 
3129   op_cost(15);
3130   format %{ %}
3131   interface(CONST_INTER);
3132 %}
3133 
3134 // Long Immediate zero
3135 operand immL0()
3136 %{
3137   predicate(n->get_long() == 0L);
3138   match(ConL);
3139 
3140   op_cost(10);
3141   format %{ %}
3142   interface(CONST_INTER);
3143 %}
3144 
3145 // Constant for increment
3146 operand immL1()
3147 %{
3148   predicate(n->get_long() == 1);
3149   match(ConL);
3150 
3151   format %{ %}
3152   interface(CONST_INTER);
3153 %}
3154 
3155 // Constant for decrement
3156 operand immL_M1()
3157 %{
3158   predicate(n->get_long() == -1);
3159   match(ConL);
3160 
3161   format %{ %}
3162   interface(CONST_INTER);
3163 %}
3164 
3165 // Long Immediate: the value 10
3166 operand immL10()
3167 %{
3168   predicate(n->get_long() == 10);
3169   match(ConL);
3170 
3171   format %{ %}
3172   interface(CONST_INTER);
3173 %}
3174 
3175 // Long immediate from 0 to 127.
3176 // Used for a shorter form of long mul by 10.
3177 operand immL_127()
3178 %{
3179   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3180   match(ConL);
3181 
3182   op_cost(10);
3183   format %{ %}
3184   interface(CONST_INTER);
3185 %}
3186 
3187 // Long Immediate: low 32-bit mask
3188 operand immL_32bits()
3189 %{
3190   predicate(n->get_long() == 0xFFFFFFFFL);
3191   match(ConL);
3192   op_cost(20);
3193 
3194   format %{ %}
3195   interface(CONST_INTER);
3196 %}
3197 
3198 // Float Immediate zero
3199 operand immF0()
3200 %{
3201   predicate(jint_cast(n->getf()) == 0);
3202   match(ConF);
3203 
3204   op_cost(5);
3205   format %{ %}
3206   interface(CONST_INTER);
3207 %}
3208 
3209 // Float Immediate
3210 operand immF()
3211 %{
3212   match(ConF);
3213 
3214   op_cost(15);
3215   format %{ %}
3216   interface(CONST_INTER);
3217 %}
3218 
3219 // Double Immediate zero
3220 operand immD0()
3221 %{
3222   predicate(jlong_cast(n->getd()) == 0);
3223   match(ConD);
3224 
3225   op_cost(5);
3226   format %{ %}
3227   interface(CONST_INTER);
3228 %}
3229 
3230 // Double Immediate
3231 operand immD()
3232 %{
3233   match(ConD);
3234 
3235   op_cost(15);
3236   format %{ %}
3237   interface(CONST_INTER);
3238 %}
3239 
3240 // Immediates for special shifts (sign extend)
3241 
3242 // Constants for increment
3243 operand immI_16()
3244 %{
3245   predicate(n->get_int() == 16);
3246   match(ConI);
3247 
3248   format %{ %}
3249   interface(CONST_INTER);
3250 %}
3251 
3252 operand immI_24()
3253 %{
3254   predicate(n->get_int() == 24);
3255   match(ConI);
3256 
3257   format %{ %}
3258   interface(CONST_INTER);
3259 %}
3260 
3261 // Constant for byte-wide masking
3262 operand immI_255()
3263 %{
3264   predicate(n->get_int() == 255);
3265   match(ConI);
3266 
3267   format %{ %}
3268   interface(CONST_INTER);
3269 %}
3270 
3271 // Constant for short-wide masking
3272 operand immI_65535()
3273 %{
3274   predicate(n->get_int() == 65535);
3275   match(ConI);
3276 
3277   format %{ %}
3278   interface(CONST_INTER);
3279 %}
3280 
3281 // Constant for byte-wide masking
3282 operand immL_255()
3283 %{
3284   predicate(n->get_long() == 255);
3285   match(ConL);
3286 
3287   format %{ %}
3288   interface(CONST_INTER);
3289 %}
3290 
3291 // Constant for short-wide masking
3292 operand immL_65535()
3293 %{
3294   predicate(n->get_long() == 65535);
3295   match(ConL);
3296 
3297   format %{ %}
3298   interface(CONST_INTER);
3299 %}
3300 
3301 // Register Operands
3302 // Integer Register
3303 operand rRegI()
3304 %{
3305   constraint(ALLOC_IN_RC(int_reg));
3306   match(RegI);
3307 
3308   match(rax_RegI);
3309   match(rbx_RegI);
3310   match(rcx_RegI);
3311   match(rdx_RegI);
3312   match(rdi_RegI);
3313 
3314   format %{ %}
3315   interface(REG_INTER);
3316 %}
3317 
3318 // Special Registers
3319 operand rax_RegI()
3320 %{
3321   constraint(ALLOC_IN_RC(int_rax_reg));
3322   match(RegI);
3323   match(rRegI);
3324 
3325   format %{ "RAX" %}
3326   interface(REG_INTER);
3327 %}
3328 
3329 // Special Registers
3330 operand rbx_RegI()
3331 %{
3332   constraint(ALLOC_IN_RC(int_rbx_reg));
3333   match(RegI);
3334   match(rRegI);
3335 
3336   format %{ "RBX" %}
3337   interface(REG_INTER);
3338 %}
3339 
3340 operand rcx_RegI()
3341 %{
3342   constraint(ALLOC_IN_RC(int_rcx_reg));
3343   match(RegI);
3344   match(rRegI);
3345 
3346   format %{ "RCX" %}
3347   interface(REG_INTER);
3348 %}
3349 
3350 operand rdx_RegI()
3351 %{
3352   constraint(ALLOC_IN_RC(int_rdx_reg));
3353   match(RegI);
3354   match(rRegI);
3355 
3356   format %{ "RDX" %}
3357   interface(REG_INTER);
3358 %}
3359 
3360 operand rdi_RegI()
3361 %{
3362   constraint(ALLOC_IN_RC(int_rdi_reg));
3363   match(RegI);
3364   match(rRegI);
3365 
3366   format %{ "RDI" %}
3367   interface(REG_INTER);
3368 %}
3369 
3370 operand no_rcx_RegI()
3371 %{
3372   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3373   match(RegI);
3374   match(rax_RegI);
3375   match(rbx_RegI);
3376   match(rdx_RegI);
3377   match(rdi_RegI);
3378 
3379   format %{ %}
3380   interface(REG_INTER);
3381 %}
3382 
3383 operand no_rax_rdx_RegI()
3384 %{
3385   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3386   match(RegI);
3387   match(rbx_RegI);
3388   match(rcx_RegI);
3389   match(rdi_RegI);
3390 
3391   format %{ %}
3392   interface(REG_INTER);
3393 %}
3394 
3395 // Pointer Register
3396 operand any_RegP()
3397 %{
3398   constraint(ALLOC_IN_RC(any_reg));
3399   match(RegP);
3400   match(rax_RegP);
3401   match(rbx_RegP);
3402   match(rdi_RegP);
3403   match(rsi_RegP);
3404   match(rbp_RegP);
3405   match(r15_RegP);
3406   match(rRegP);
3407 
3408   format %{ %}
3409   interface(REG_INTER);
3410 %}
3411 
3412 operand rRegP()
3413 %{
3414   constraint(ALLOC_IN_RC(ptr_reg));
3415   match(RegP);
3416   match(rax_RegP);
3417   match(rbx_RegP);
3418   match(rdi_RegP);
3419   match(rsi_RegP);
3420   match(rbp_RegP);  // See Q&A below about
3421   match(r15_RegP);  // r15_RegP and rbp_RegP.
3422 
3423   format %{ %}
3424   interface(REG_INTER);
3425 %}
3426 
3427 operand rRegN() %{
3428   constraint(ALLOC_IN_RC(int_reg));
3429   match(RegN);
3430 
3431   format %{ %}
3432   interface(REG_INTER);
3433 %}
3434 
3435 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3436 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3437 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
3438 // The output of an instruction is controlled by the allocator, which respects
3439 // register class masks, not match rules.  Unless an instruction mentions
3440 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3441 // by the allocator as an input.
3442 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
3443 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
3444 // result, RBP is not included in the output of the instruction either.
3445 
3446 operand no_rax_RegP()
3447 %{
3448   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3449   match(RegP);
3450   match(rbx_RegP);
3451   match(rsi_RegP);
3452   match(rdi_RegP);
3453 
3454   format %{ %}
3455   interface(REG_INTER);
3456 %}
3457 
3458 // This operand is not allowed to use RBP even if
3459 // RBP is not used to hold the frame pointer.
3460 operand no_rbp_RegP()
3461 %{
3462   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
3463   match(RegP);
3464   match(rbx_RegP);
3465   match(rsi_RegP);
3466   match(rdi_RegP);
3467 
3468   format %{ %}
3469   interface(REG_INTER);
3470 %}
3471 
3472 operand no_rax_rbx_RegP()
3473 %{
3474   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3475   match(RegP);
3476   match(rsi_RegP);
3477   match(rdi_RegP);
3478 
3479   format %{ %}
3480   interface(REG_INTER);
3481 %}
3482 
3483 // Special Registers
3484 // Return a pointer value
3485 operand rax_RegP()
3486 %{
3487   constraint(ALLOC_IN_RC(ptr_rax_reg));
3488   match(RegP);
3489   match(rRegP);
3490 
3491   format %{ %}
3492   interface(REG_INTER);
3493 %}
3494 
3495 // Special Registers
3496 // Return a compressed pointer value
3497 operand rax_RegN()
3498 %{
3499   constraint(ALLOC_IN_RC(int_rax_reg));
3500   match(RegN);
3501   match(rRegN);
3502 
3503   format %{ %}
3504   interface(REG_INTER);
3505 %}
3506 
3507 // Used in AtomicAdd
3508 operand rbx_RegP()
3509 %{
3510   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3511   match(RegP);
3512   match(rRegP);
3513 
3514   format %{ %}
3515   interface(REG_INTER);
3516 %}
3517 
3518 operand rsi_RegP()
3519 %{
3520   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3521   match(RegP);
3522   match(rRegP);
3523 
3524   format %{ %}
3525   interface(REG_INTER);
3526 %}
3527 
3528 // Used in rep stosq
3529 operand rdi_RegP()
3530 %{
3531   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3532   match(RegP);
3533   match(rRegP);
3534 
3535   format %{ %}
3536   interface(REG_INTER);
3537 %}
3538 
3539 operand r15_RegP()
3540 %{
3541   constraint(ALLOC_IN_RC(ptr_r15_reg));
3542   match(RegP);
3543   match(rRegP);
3544 
3545   format %{ %}
3546   interface(REG_INTER);
3547 %}
3548 
3549 operand rRegL()
3550 %{
3551   constraint(ALLOC_IN_RC(long_reg));
3552   match(RegL);
3553   match(rax_RegL);
3554   match(rdx_RegL);
3555 
3556   format %{ %}
3557   interface(REG_INTER);
3558 %}
3559 
3560 // Special Registers
3561 operand no_rax_rdx_RegL()
3562 %{
3563   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3564   match(RegL);
3565   match(rRegL);
3566 
3567   format %{ %}
3568   interface(REG_INTER);
3569 %}
3570 
3571 operand no_rax_RegL()
3572 %{
3573   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3574   match(RegL);
3575   match(rRegL);
3576   match(rdx_RegL);
3577 
3578   format %{ %}
3579   interface(REG_INTER);
3580 %}
3581 
3582 operand no_rcx_RegL()
3583 %{
3584   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3585   match(RegL);
3586   match(rRegL);
3587 
3588   format %{ %}
3589   interface(REG_INTER);
3590 %}
3591 
3592 operand rax_RegL()
3593 %{
3594   constraint(ALLOC_IN_RC(long_rax_reg));
3595   match(RegL);
3596   match(rRegL);
3597 
3598   format %{ "RAX" %}
3599   interface(REG_INTER);
3600 %}
3601 
3602 operand rcx_RegL()
3603 %{
3604   constraint(ALLOC_IN_RC(long_rcx_reg));
3605   match(RegL);
3606   match(rRegL);
3607 
3608   format %{ %}
3609   interface(REG_INTER);
3610 %}
3611 
3612 operand rdx_RegL()
3613 %{
3614   constraint(ALLOC_IN_RC(long_rdx_reg));
3615   match(RegL);
3616   match(rRegL);
3617 
3618   format %{ %}
3619   interface(REG_INTER);
3620 %}
3621 
3622 // Flags register, used as output of compare instructions
3623 operand rFlagsReg()
3624 %{
3625   constraint(ALLOC_IN_RC(int_flags));
3626   match(RegFlags);
3627 
3628   format %{ "RFLAGS" %}
3629   interface(REG_INTER);
3630 %}
3631 
3632 // Flags register, used as output of FLOATING POINT compare instructions
3633 operand rFlagsRegU()
3634 %{
3635   constraint(ALLOC_IN_RC(int_flags));
3636   match(RegFlags);
3637 
3638   format %{ "RFLAGS_U" %}
3639   interface(REG_INTER);
3640 %}
3641 
3642 operand rFlagsRegUCF() %{
3643   constraint(ALLOC_IN_RC(int_flags));
3644   match(RegFlags);
3645   predicate(false);
3646 
3647   format %{ "RFLAGS_U_CF" %}
3648   interface(REG_INTER);
3649 %}
3650 
3651 // Float register operands
3652 operand regF() %{
3653    constraint(ALLOC_IN_RC(float_reg));
3654    match(RegF);
3655 
3656    format %{ %}
3657    interface(REG_INTER);
3658 %}
3659 
3660 // Float register operands
3661 operand vlRegF() %{
3662    constraint(ALLOC_IN_RC(float_reg_vl));
3663    match(RegF);
3664 
3665    format %{ %}
3666    interface(REG_INTER);
3667 %}
3668 
3669 // Double register operands
3670 operand regD() %{
3671    constraint(ALLOC_IN_RC(double_reg));
3672    match(RegD);
3673 
3674    format %{ %}
3675    interface(REG_INTER);
3676 %}
3677 
3678 // Double register operands
3679 operand vlRegD() %{
3680    constraint(ALLOC_IN_RC(double_reg_vl));
3681    match(RegD);
3682 
3683    format %{ %}
3684    interface(REG_INTER);
3685 %}
3686 
3687 // Vectors
3688 operand vecS() %{
3689   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
3690   match(VecS);
3691 
3692   format %{ %}
3693   interface(REG_INTER);
3694 %}
3695 
3696 // Vectors
3697 operand legVecS() %{
3698   constraint(ALLOC_IN_RC(vectors_reg_legacy));
3699   match(VecS);
3700 
3701   format %{ %}
3702   interface(REG_INTER);
3703 %}
3704 
3705 operand vecD() %{
3706   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
3707   match(VecD);
3708 
3709   format %{ %}
3710   interface(REG_INTER);
3711 %}
3712 
3713 operand legVecD() %{
3714   constraint(ALLOC_IN_RC(vectord_reg_legacy));
3715   match(VecD);
3716 
3717   format %{ %}
3718   interface(REG_INTER);
3719 %}
3720 
3721 operand vecX() %{
3722   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
3723   match(VecX);
3724 
3725   format %{ %}
3726   interface(REG_INTER);
3727 %}
3728 
3729 operand legVecX() %{
3730   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
3731   match(VecX);
3732 
3733   format %{ %}
3734   interface(REG_INTER);
3735 %}
3736 
3737 operand vecY() %{
3738   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
3739   match(VecY);
3740 
3741   format %{ %}
3742   interface(REG_INTER);
3743 %}
3744 
3745 operand legVecY() %{
3746   constraint(ALLOC_IN_RC(vectory_reg_legacy));
3747   match(VecY);
3748 
3749   format %{ %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 //----------Memory Operands----------------------------------------------------
3754 // Direct Memory Operand
3755 // operand direct(immP addr)
3756 // %{
3757 //   match(addr);
3758 
3759 //   format %{ "[$addr]" %}
3760 //   interface(MEMORY_INTER) %{
3761 //     base(0xFFFFFFFF);
3762 //     index(0x4);
3763 //     scale(0x0);
3764 //     disp($addr);
3765 //   %}
3766 // %}
3767 
3768 // Indirect Memory Operand
3769 operand indirect(any_RegP reg)
3770 %{
3771   constraint(ALLOC_IN_RC(ptr_reg));
3772   match(reg);
3773 
3774   format %{ "[$reg]" %}
3775   interface(MEMORY_INTER) %{
3776     base($reg);
3777     index(0x4);
3778     scale(0x0);
3779     disp(0x0);
3780   %}
3781 %}
3782 
3783 // Indirect Memory Plus Short Offset Operand
3784 operand indOffset8(any_RegP reg, immL8 off)
3785 %{
3786   constraint(ALLOC_IN_RC(ptr_reg));
3787   match(AddP reg off);
3788 
3789   format %{ "[$reg + $off (8-bit)]" %}
3790   interface(MEMORY_INTER) %{
3791     base($reg);
3792     index(0x4);
3793     scale(0x0);
3794     disp($off);
3795   %}
3796 %}
3797 
3798 // Indirect Memory Plus Long Offset Operand
3799 operand indOffset32(any_RegP reg, immL32 off)
3800 %{
3801   constraint(ALLOC_IN_RC(ptr_reg));
3802   match(AddP reg off);
3803 
3804   format %{ "[$reg + $off (32-bit)]" %}
3805   interface(MEMORY_INTER) %{
3806     base($reg);
3807     index(0x4);
3808     scale(0x0);
3809     disp($off);
3810   %}
3811 %}
3812 
3813 // Indirect Memory Plus Index Register Plus Offset Operand
3814 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3815 %{
3816   constraint(ALLOC_IN_RC(ptr_reg));
3817   match(AddP (AddP reg lreg) off);
3818 
3819   op_cost(10);
3820   format %{"[$reg + $off + $lreg]" %}
3821   interface(MEMORY_INTER) %{
3822     base($reg);
3823     index($lreg);
3824     scale(0x0);
3825     disp($off);
3826   %}
3827 %}
3828 
3829 // Indirect Memory Plus Index Register Plus Offset Operand
3830 operand indIndex(any_RegP reg, rRegL lreg)
3831 %{
3832   constraint(ALLOC_IN_RC(ptr_reg));
3833   match(AddP reg lreg);
3834 
3835   op_cost(10);
3836   format %{"[$reg + $lreg]" %}
3837   interface(MEMORY_INTER) %{
3838     base($reg);
3839     index($lreg);
3840     scale(0x0);
3841     disp(0x0);
3842   %}
3843 %}
3844 
3845 // Indirect Memory Times Scale Plus Index Register
3846 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3847 %{
3848   constraint(ALLOC_IN_RC(ptr_reg));
3849   match(AddP reg (LShiftL lreg scale));
3850 
3851   op_cost(10);
3852   format %{"[$reg + $lreg << $scale]" %}
3853   interface(MEMORY_INTER) %{
3854     base($reg);
3855     index($lreg);
3856     scale($scale);
3857     disp(0x0);
3858   %}
3859 %}
3860 
3861 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
3862 %{
3863   constraint(ALLOC_IN_RC(ptr_reg));
3864   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3865   match(AddP reg (LShiftL (ConvI2L idx) scale));
3866 
3867   op_cost(10);
3868   format %{"[$reg + pos $idx << $scale]" %}
3869   interface(MEMORY_INTER) %{
3870     base($reg);
3871     index($idx);
3872     scale($scale);
3873     disp(0x0);
3874   %}
3875 %}
3876 
3877 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3878 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3879 %{
3880   constraint(ALLOC_IN_RC(ptr_reg));
3881   match(AddP (AddP reg (LShiftL lreg scale)) off);
3882 
3883   op_cost(10);
3884   format %{"[$reg + $off + $lreg << $scale]" %}
3885   interface(MEMORY_INTER) %{
3886     base($reg);
3887     index($lreg);
3888     scale($scale);
3889     disp($off);
3890   %}
3891 %}
3892 
3893 // Indirect Memory Plus Positive Index Register Plus Offset Operand
3894 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
3895 %{
3896   constraint(ALLOC_IN_RC(ptr_reg));
3897   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
3898   match(AddP (AddP reg (ConvI2L idx)) off);
3899 
3900   op_cost(10);
3901   format %{"[$reg + $off + $idx]" %}
3902   interface(MEMORY_INTER) %{
3903     base($reg);
3904     index($idx);
3905     scale(0x0);
3906     disp($off);
3907   %}
3908 %}
3909 
3910 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3911 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3912 %{
3913   constraint(ALLOC_IN_RC(ptr_reg));
3914   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3915   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3916 
3917   op_cost(10);
3918   format %{"[$reg + $off + $idx << $scale]" %}
3919   interface(MEMORY_INTER) %{
3920     base($reg);
3921     index($idx);
3922     scale($scale);
3923     disp($off);
3924   %}
3925 %}
3926 
3927 // Indirect Narrow Oop Plus Offset Operand
3928 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3929 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
3930 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3931   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
3932   constraint(ALLOC_IN_RC(ptr_reg));
3933   match(AddP (DecodeN reg) off);
3934 
3935   op_cost(10);
3936   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3937   interface(MEMORY_INTER) %{
3938     base(0xc); // R12
3939     index($reg);
3940     scale(0x3);
3941     disp($off);
3942   %}
3943 %}
3944 
3945 // Indirect Memory Operand
3946 operand indirectNarrow(rRegN reg)
3947 %{
3948   predicate(Universe::narrow_oop_shift() == 0);
3949   constraint(ALLOC_IN_RC(ptr_reg));
3950   match(DecodeN reg);
3951 
3952   format %{ "[$reg]" %}
3953   interface(MEMORY_INTER) %{
3954     base($reg);
3955     index(0x4);
3956     scale(0x0);
3957     disp(0x0);
3958   %}
3959 %}
3960 
3961 // Indirect Memory Plus Short Offset Operand
3962 operand indOffset8Narrow(rRegN reg, immL8 off)
3963 %{
3964   predicate(Universe::narrow_oop_shift() == 0);
3965   constraint(ALLOC_IN_RC(ptr_reg));
3966   match(AddP (DecodeN reg) off);
3967 
3968   format %{ "[$reg + $off (8-bit)]" %}
3969   interface(MEMORY_INTER) %{
3970     base($reg);
3971     index(0x4);
3972     scale(0x0);
3973     disp($off);
3974   %}
3975 %}
3976 
3977 // Indirect Memory Plus Long Offset Operand
3978 operand indOffset32Narrow(rRegN reg, immL32 off)
3979 %{
3980   predicate(Universe::narrow_oop_shift() == 0);
3981   constraint(ALLOC_IN_RC(ptr_reg));
3982   match(AddP (DecodeN reg) off);
3983 
3984   format %{ "[$reg + $off (32-bit)]" %}
3985   interface(MEMORY_INTER) %{
3986     base($reg);
3987     index(0x4);
3988     scale(0x0);
3989     disp($off);
3990   %}
3991 %}
3992 
3993 // Indirect Memory Plus Index Register Plus Offset Operand
3994 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
3995 %{
3996   predicate(Universe::narrow_oop_shift() == 0);
3997   constraint(ALLOC_IN_RC(ptr_reg));
3998   match(AddP (AddP (DecodeN reg) lreg) off);
3999 
4000   op_cost(10);
4001   format %{"[$reg + $off + $lreg]" %}
4002   interface(MEMORY_INTER) %{
4003     base($reg);
4004     index($lreg);
4005     scale(0x0);
4006     disp($off);
4007   %}
4008 %}
4009 
4010 // Indirect Memory Plus Index Register Plus Offset Operand
4011 operand indIndexNarrow(rRegN reg, rRegL lreg)
4012 %{
4013   predicate(Universe::narrow_oop_shift() == 0);
4014   constraint(ALLOC_IN_RC(ptr_reg));
4015   match(AddP (DecodeN reg) lreg);
4016 
4017   op_cost(10);
4018   format %{"[$reg + $lreg]" %}
4019   interface(MEMORY_INTER) %{
4020     base($reg);
4021     index($lreg);
4022     scale(0x0);
4023     disp(0x0);
4024   %}
4025 %}
4026 
4027 // Indirect Memory Times Scale Plus Index Register
4028 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4029 %{
4030   predicate(Universe::narrow_oop_shift() == 0);
4031   constraint(ALLOC_IN_RC(ptr_reg));
4032   match(AddP (DecodeN reg) (LShiftL lreg scale));
4033 
4034   op_cost(10);
4035   format %{"[$reg + $lreg << $scale]" %}
4036   interface(MEMORY_INTER) %{
4037     base($reg);
4038     index($lreg);
4039     scale($scale);
4040     disp(0x0);
4041   %}
4042 %}
4043 
4044 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4045 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4046 %{
4047   predicate(Universe::narrow_oop_shift() == 0);
4048   constraint(ALLOC_IN_RC(ptr_reg));
4049   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4050 
4051   op_cost(10);
4052   format %{"[$reg + $off + $lreg << $scale]" %}
4053   interface(MEMORY_INTER) %{
4054     base($reg);
4055     index($lreg);
4056     scale($scale);
4057     disp($off);
4058   %}
4059 %}
4060 
4061 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
4062 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
4063 %{
4064   constraint(ALLOC_IN_RC(ptr_reg));
4065   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
4066   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
4067 
4068   op_cost(10);
4069   format %{"[$reg + $off + $idx]" %}
4070   interface(MEMORY_INTER) %{
4071     base($reg);
4072     index($idx);
4073     scale(0x0);
4074     disp($off);
4075   %}
4076 %}
4077 
4078 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4079 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4080 %{
4081   constraint(ALLOC_IN_RC(ptr_reg));
4082   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4083   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4084 
4085   op_cost(10);
4086   format %{"[$reg + $off + $idx << $scale]" %}
4087   interface(MEMORY_INTER) %{
4088     base($reg);
4089     index($idx);
4090     scale($scale);
4091     disp($off);
4092   %}
4093 %}
4094 
4095 //----------Special Memory Operands--------------------------------------------
4096 // Stack Slot Operand - This operand is used for loading and storing temporary
4097 //                      values on the stack where a match requires a value to
4098 //                      flow through memory.
4099 operand stackSlotP(sRegP reg)
4100 %{
4101   constraint(ALLOC_IN_RC(stack_slots));
4102   // No match rule because this operand is only generated in matching
4103 
4104   format %{ "[$reg]" %}
4105   interface(MEMORY_INTER) %{
4106     base(0x4);   // RSP
4107     index(0x4);  // No Index
4108     scale(0x0);  // No Scale
4109     disp($reg);  // Stack Offset
4110   %}
4111 %}
4112 
4113 operand stackSlotI(sRegI reg)
4114 %{
4115   constraint(ALLOC_IN_RC(stack_slots));
4116   // No match rule because this operand is only generated in matching
4117 
4118   format %{ "[$reg]" %}
4119   interface(MEMORY_INTER) %{
4120     base(0x4);   // RSP
4121     index(0x4);  // No Index
4122     scale(0x0);  // No Scale
4123     disp($reg);  // Stack Offset
4124   %}
4125 %}
4126 
4127 operand stackSlotF(sRegF reg)
4128 %{
4129   constraint(ALLOC_IN_RC(stack_slots));
4130   // No match rule because this operand is only generated in matching
4131 
4132   format %{ "[$reg]" %}
4133   interface(MEMORY_INTER) %{
4134     base(0x4);   // RSP
4135     index(0x4);  // No Index
4136     scale(0x0);  // No Scale
4137     disp($reg);  // Stack Offset
4138   %}
4139 %}
4140 
4141 operand stackSlotD(sRegD reg)
4142 %{
4143   constraint(ALLOC_IN_RC(stack_slots));
4144   // No match rule because this operand is only generated in matching
4145 
4146   format %{ "[$reg]" %}
4147   interface(MEMORY_INTER) %{
4148     base(0x4);   // RSP
4149     index(0x4);  // No Index
4150     scale(0x0);  // No Scale
4151     disp($reg);  // Stack Offset
4152   %}
4153 %}
4154 operand stackSlotL(sRegL reg)
4155 %{
4156   constraint(ALLOC_IN_RC(stack_slots));
4157   // No match rule because this operand is only generated in matching
4158 
4159   format %{ "[$reg]" %}
4160   interface(MEMORY_INTER) %{
4161     base(0x4);   // RSP
4162     index(0x4);  // No Index
4163     scale(0x0);  // No Scale
4164     disp($reg);  // Stack Offset
4165   %}
4166 %}
4167 
4168 //----------Conditional Branch Operands----------------------------------------
4169 // Comparison Op  - This is the operation of the comparison, and is limited to
4170 //                  the following set of codes:
4171 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4172 //
4173 // Other attributes of the comparison, such as unsignedness, are specified
4174 // by the comparison instruction that sets a condition code flags register.
4175 // That result is represented by a flags operand whose subtype is appropriate
4176 // to the unsignedness (etc.) of the comparison.
4177 //
4178 // Later, the instruction which matches both the Comparison Op (a Bool) and
4179 // the flags (produced by the Cmp) specifies the coding of the comparison op
4180 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4181 
4182 // Comparision Code
4183 operand cmpOp()
4184 %{
4185   match(Bool);
4186 
4187   format %{ "" %}
4188   interface(COND_INTER) %{
4189     equal(0x4, "e");
4190     not_equal(0x5, "ne");
4191     less(0xC, "l");
4192     greater_equal(0xD, "ge");
4193     less_equal(0xE, "le");
4194     greater(0xF, "g");
4195     overflow(0x0, "o");
4196     no_overflow(0x1, "no");
4197   %}
4198 %}
4199 
4200 // Comparison Code, unsigned compare.  Used by FP also, with
4201 // C2 (unordered) turned into GT or LT already.  The other bits
4202 // C0 and C3 are turned into Carry & Zero flags.
4203 operand cmpOpU()
4204 %{
4205   match(Bool);
4206 
4207   format %{ "" %}
4208   interface(COND_INTER) %{
4209     equal(0x4, "e");
4210     not_equal(0x5, "ne");
4211     less(0x2, "b");
4212     greater_equal(0x3, "nb");
4213     less_equal(0x6, "be");
4214     greater(0x7, "nbe");
4215     overflow(0x0, "o");
4216     no_overflow(0x1, "no");
4217   %}
4218 %}
4219 
4220 
4221 // Floating comparisons that don't require any fixup for the unordered case
4222 operand cmpOpUCF() %{
4223   match(Bool);
4224   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4225             n->as_Bool()->_test._test == BoolTest::ge ||
4226             n->as_Bool()->_test._test == BoolTest::le ||
4227             n->as_Bool()->_test._test == BoolTest::gt);
4228   format %{ "" %}
4229   interface(COND_INTER) %{
4230     equal(0x4, "e");
4231     not_equal(0x5, "ne");
4232     less(0x2, "b");
4233     greater_equal(0x3, "nb");
4234     less_equal(0x6, "be");
4235     greater(0x7, "nbe");
4236     overflow(0x0, "o");
4237     no_overflow(0x1, "no");
4238   %}
4239 %}
4240 
4241 
4242 // Floating comparisons that can be fixed up with extra conditional jumps
4243 operand cmpOpUCF2() %{
4244   match(Bool);
4245   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4246             n->as_Bool()->_test._test == BoolTest::eq);
4247   format %{ "" %}
4248   interface(COND_INTER) %{
4249     equal(0x4, "e");
4250     not_equal(0x5, "ne");
4251     less(0x2, "b");
4252     greater_equal(0x3, "nb");
4253     less_equal(0x6, "be");
4254     greater(0x7, "nbe");
4255     overflow(0x0, "o");
4256     no_overflow(0x1, "no");
4257   %}
4258 %}
4259 
4260 // Operands for bound floating pointer register arguments
4261 operand rxmm0() %{
4262   constraint(ALLOC_IN_RC(xmm0_reg));
4263   match(VecX);
4264   format%{%}
4265   interface(REG_INTER);
4266 %}
4267 operand rxmm1() %{
4268   constraint(ALLOC_IN_RC(xmm1_reg));
4269   match(VecX);
4270   format%{%}
4271   interface(REG_INTER);
4272 %}
4273 operand rxmm2() %{
4274   constraint(ALLOC_IN_RC(xmm2_reg));
4275   match(VecX);
4276   format%{%}
4277   interface(REG_INTER);
4278 %}
4279 operand rxmm3() %{
4280   constraint(ALLOC_IN_RC(xmm3_reg));
4281   match(VecX);
4282   format%{%}
4283   interface(REG_INTER);
4284 %}
4285 operand rxmm4() %{
4286   constraint(ALLOC_IN_RC(xmm4_reg));
4287   match(VecX);
4288   format%{%}
4289   interface(REG_INTER);
4290 %}
4291 operand rxmm5() %{
4292   constraint(ALLOC_IN_RC(xmm5_reg));
4293   match(VecX);
4294   format%{%}
4295   interface(REG_INTER);
4296 %}
4297 operand rxmm6() %{
4298   constraint(ALLOC_IN_RC(xmm6_reg));
4299   match(VecX);
4300   format%{%}
4301   interface(REG_INTER);
4302 %}
4303 operand rxmm7() %{
4304   constraint(ALLOC_IN_RC(xmm7_reg));
4305   match(VecX);
4306   format%{%}
4307   interface(REG_INTER);
4308 %}
4309 operand rxmm8() %{
4310   constraint(ALLOC_IN_RC(xmm8_reg));
4311   match(VecX);
4312   format%{%}
4313   interface(REG_INTER);
4314 %}
4315 operand rxmm9() %{
4316   constraint(ALLOC_IN_RC(xmm9_reg));
4317   match(VecX);
4318   format%{%}
4319   interface(REG_INTER);
4320 %}
4321 operand rxmm10() %{
4322   constraint(ALLOC_IN_RC(xmm10_reg));
4323   match(VecX);
4324   format%{%}
4325   interface(REG_INTER);
4326 %}
4327 operand rxmm11() %{
4328   constraint(ALLOC_IN_RC(xmm11_reg));
4329   match(VecX);
4330   format%{%}
4331   interface(REG_INTER);
4332 %}
4333 operand rxmm12() %{
4334   constraint(ALLOC_IN_RC(xmm12_reg));
4335   match(VecX);
4336   format%{%}
4337   interface(REG_INTER);
4338 %}
4339 operand rxmm13() %{
4340   constraint(ALLOC_IN_RC(xmm13_reg));
4341   match(VecX);
4342   format%{%}
4343   interface(REG_INTER);
4344 %}
4345 operand rxmm14() %{
4346   constraint(ALLOC_IN_RC(xmm14_reg));
4347   match(VecX);
4348   format%{%}
4349   interface(REG_INTER);
4350 %}
4351 operand rxmm15() %{
4352   constraint(ALLOC_IN_RC(xmm15_reg));
4353   match(VecX);
4354   format%{%}
4355   interface(REG_INTER);
4356 %}
4357 operand rxmm16() %{
4358   constraint(ALLOC_IN_RC(xmm16_reg));
4359   match(VecX);
4360   format%{%}
4361   interface(REG_INTER);
4362 %}
4363 operand rxmm17() %{
4364   constraint(ALLOC_IN_RC(xmm17_reg));
4365   match(VecX);
4366   format%{%}
4367   interface(REG_INTER);
4368 %}
4369 operand rxmm18() %{
4370   constraint(ALLOC_IN_RC(xmm18_reg));
4371   match(VecX);
4372   format%{%}
4373   interface(REG_INTER);
4374 %}
4375 operand rxmm19() %{
4376   constraint(ALLOC_IN_RC(xmm19_reg));
4377   match(VecX);
4378   format%{%}
4379   interface(REG_INTER);
4380 %}
4381 operand rxmm20() %{
4382   constraint(ALLOC_IN_RC(xmm20_reg));
4383   match(VecX);
4384   format%{%}
4385   interface(REG_INTER);
4386 %}
4387 operand rxmm21() %{
4388   constraint(ALLOC_IN_RC(xmm21_reg));
4389   match(VecX);
4390   format%{%}
4391   interface(REG_INTER);
4392 %}
4393 operand rxmm22() %{
4394   constraint(ALLOC_IN_RC(xmm22_reg));
4395   match(VecX);
4396   format%{%}
4397   interface(REG_INTER);
4398 %}
4399 operand rxmm23() %{
4400   constraint(ALLOC_IN_RC(xmm23_reg));
4401   match(VecX);
4402   format%{%}
4403   interface(REG_INTER);
4404 %}
4405 operand rxmm24() %{
4406   constraint(ALLOC_IN_RC(xmm24_reg));
4407   match(VecX);
4408   format%{%}
4409   interface(REG_INTER);
4410 %}
4411 operand rxmm25() %{
4412   constraint(ALLOC_IN_RC(xmm25_reg));
4413   match(VecX);
4414   format%{%}
4415   interface(REG_INTER);
4416 %}
4417 operand rxmm26() %{
4418   constraint(ALLOC_IN_RC(xmm26_reg));
4419   match(VecX);
4420   format%{%}
4421   interface(REG_INTER);
4422 %}
4423 operand rxmm27() %{
4424   constraint(ALLOC_IN_RC(xmm27_reg));
4425   match(VecX);
4426   format%{%}
4427   interface(REG_INTER);
4428 %}
4429 operand rxmm28() %{
4430   constraint(ALLOC_IN_RC(xmm28_reg));
4431   match(VecX);
4432   format%{%}
4433   interface(REG_INTER);
4434 %}
4435 operand rxmm29() %{
4436   constraint(ALLOC_IN_RC(xmm29_reg));
4437   match(VecX);
4438   format%{%}
4439   interface(REG_INTER);
4440 %}
4441 operand rxmm30() %{
4442   constraint(ALLOC_IN_RC(xmm30_reg));
4443   match(VecX);
4444   format%{%}
4445   interface(REG_INTER);
4446 %}
4447 operand rxmm31() %{
4448   constraint(ALLOC_IN_RC(xmm31_reg));
4449   match(VecX);
4450   format%{%}
4451   interface(REG_INTER);
4452 %}
4453 
4454 //----------OPERAND CLASSES----------------------------------------------------
4455 // Operand Classes are groups of operands that are used as to simplify
4456 // instruction definitions by not requiring the AD writer to specify separate
4457 // instructions for every form of operand when the instruction accepts
4458 // multiple operand types with the same basic encoding and format.  The classic
4459 // case of this is memory operands.
4460 
4461 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4462                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
4463                indCompressedOopOffset,
4464                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4465                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4466                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
4467 
4468 //----------PIPELINE-----------------------------------------------------------
4469 // Rules which define the behavior of the target architectures pipeline.
4470 pipeline %{
4471 
4472 //----------ATTRIBUTES---------------------------------------------------------
4473 attributes %{
4474   variable_size_instructions;        // Fixed size instructions
4475   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4476   instruction_unit_size = 1;         // An instruction is 1 bytes long
4477   instruction_fetch_unit_size = 16;  // The processor fetches one line
4478   instruction_fetch_units = 1;       // of 16 bytes
4479 
4480   // List of nop instructions
4481   nops( MachNop );
4482 %}
4483 
4484 //----------RESOURCES----------------------------------------------------------
4485 // Resources are the functional units available to the machine
4486 
4487 // Generic P2/P3 pipeline
4488 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4489 // 3 instructions decoded per cycle.
4490 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4491 // 3 ALU op, only ALU0 handles mul instructions.
4492 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4493            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4494            BR, FPU,
4495            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4496 
4497 //----------PIPELINE DESCRIPTION-----------------------------------------------
4498 // Pipeline Description specifies the stages in the machine's pipeline
4499 
4500 // Generic P2/P3 pipeline
4501 pipe_desc(S0, S1, S2, S3, S4, S5);
4502 
4503 //----------PIPELINE CLASSES---------------------------------------------------
4504 // Pipeline Classes describe the stages in which input and output are
4505 // referenced by the hardware pipeline.
4506 
4507 // Naming convention: ialu or fpu
4508 // Then: _reg
4509 // Then: _reg if there is a 2nd register
4510 // Then: _long if it's a pair of instructions implementing a long
4511 // Then: _fat if it requires the big decoder
4512 //   Or: _mem if it requires the big decoder and a memory unit.
4513 
4514 // Integer ALU reg operation
4515 pipe_class ialu_reg(rRegI dst)
4516 %{
4517     single_instruction;
4518     dst    : S4(write);
4519     dst    : S3(read);
4520     DECODE : S0;        // any decoder
4521     ALU    : S3;        // any alu
4522 %}
4523 
4524 // Long ALU reg operation
4525 pipe_class ialu_reg_long(rRegL dst)
4526 %{
4527     instruction_count(2);
4528     dst    : S4(write);
4529     dst    : S3(read);
4530     DECODE : S0(2);     // any 2 decoders
4531     ALU    : S3(2);     // both alus
4532 %}
4533 
4534 // Integer ALU reg operation using big decoder
4535 pipe_class ialu_reg_fat(rRegI dst)
4536 %{
4537     single_instruction;
4538     dst    : S4(write);
4539     dst    : S3(read);
4540     D0     : S0;        // big decoder only
4541     ALU    : S3;        // any alu
4542 %}
4543 
4544 // Long ALU reg operation using big decoder
4545 pipe_class ialu_reg_long_fat(rRegL dst)
4546 %{
4547     instruction_count(2);
4548     dst    : S4(write);
4549     dst    : S3(read);
4550     D0     : S0(2);     // big decoder only; twice
4551     ALU    : S3(2);     // any 2 alus
4552 %}
4553 
4554 // Integer ALU reg-reg operation
4555 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4556 %{
4557     single_instruction;
4558     dst    : S4(write);
4559     src    : S3(read);
4560     DECODE : S0;        // any decoder
4561     ALU    : S3;        // any alu
4562 %}
4563 
4564 // Long ALU reg-reg operation
4565 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4566 %{
4567     instruction_count(2);
4568     dst    : S4(write);
4569     src    : S3(read);
4570     DECODE : S0(2);     // any 2 decoders
4571     ALU    : S3(2);     // both alus
4572 %}
4573 
4574 // Integer ALU reg-reg operation
4575 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4576 %{
4577     single_instruction;
4578     dst    : S4(write);
4579     src    : S3(read);
4580     D0     : S0;        // big decoder only
4581     ALU    : S3;        // any alu
4582 %}
4583 
4584 // Long ALU reg-reg operation
4585 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4586 %{
4587     instruction_count(2);
4588     dst    : S4(write);
4589     src    : S3(read);
4590     D0     : S0(2);     // big decoder only; twice
4591     ALU    : S3(2);     // both alus
4592 %}
4593 
4594 // Integer ALU reg-mem operation
4595 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4596 %{
4597     single_instruction;
4598     dst    : S5(write);
4599     mem    : S3(read);
4600     D0     : S0;        // big decoder only
4601     ALU    : S4;        // any alu
4602     MEM    : S3;        // any mem
4603 %}
4604 
4605 // Integer mem operation (prefetch)
4606 pipe_class ialu_mem(memory mem)
4607 %{
4608     single_instruction;
4609     mem    : S3(read);
4610     D0     : S0;        // big decoder only
4611     MEM    : S3;        // any mem
4612 %}
4613 
4614 // Integer Store to Memory
4615 pipe_class ialu_mem_reg(memory mem, rRegI src)
4616 %{
4617     single_instruction;
4618     mem    : S3(read);
4619     src    : S5(read);
4620     D0     : S0;        // big decoder only
4621     ALU    : S4;        // any alu
4622     MEM    : S3;
4623 %}
4624 
4625 // // Long Store to Memory
4626 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4627 // %{
4628 //     instruction_count(2);
4629 //     mem    : S3(read);
4630 //     src    : S5(read);
4631 //     D0     : S0(2);          // big decoder only; twice
4632 //     ALU    : S4(2);     // any 2 alus
4633 //     MEM    : S3(2);  // Both mems
4634 // %}
4635 
4636 // Integer Store to Memory
4637 pipe_class ialu_mem_imm(memory mem)
4638 %{
4639     single_instruction;
4640     mem    : S3(read);
4641     D0     : S0;        // big decoder only
4642     ALU    : S4;        // any alu
4643     MEM    : S3;
4644 %}
4645 
4646 // Integer ALU0 reg-reg operation
4647 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4648 %{
4649     single_instruction;
4650     dst    : S4(write);
4651     src    : S3(read);
4652     D0     : S0;        // Big decoder only
4653     ALU0   : S3;        // only alu0
4654 %}
4655 
4656 // Integer ALU0 reg-mem operation
4657 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4658 %{
4659     single_instruction;
4660     dst    : S5(write);
4661     mem    : S3(read);
4662     D0     : S0;        // big decoder only
4663     ALU0   : S4;        // ALU0 only
4664     MEM    : S3;        // any mem
4665 %}
4666 
4667 // Integer ALU reg-reg operation
4668 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4669 %{
4670     single_instruction;
4671     cr     : S4(write);
4672     src1   : S3(read);
4673     src2   : S3(read);
4674     DECODE : S0;        // any decoder
4675     ALU    : S3;        // any alu
4676 %}
4677 
4678 // Integer ALU reg-imm operation
4679 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4680 %{
4681     single_instruction;
4682     cr     : S4(write);
4683     src1   : S3(read);
4684     DECODE : S0;        // any decoder
4685     ALU    : S3;        // any alu
4686 %}
4687 
4688 // Integer ALU reg-mem operation
4689 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4690 %{
4691     single_instruction;
4692     cr     : S4(write);
4693     src1   : S3(read);
4694     src2   : S3(read);
4695     D0     : S0;        // big decoder only
4696     ALU    : S4;        // any alu
4697     MEM    : S3;
4698 %}
4699 
4700 // Conditional move reg-reg
4701 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4702 %{
4703     instruction_count(4);
4704     y      : S4(read);
4705     q      : S3(read);
4706     p      : S3(read);
4707     DECODE : S0(4);     // any decoder
4708 %}
4709 
4710 // Conditional move reg-reg
4711 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4712 %{
4713     single_instruction;
4714     dst    : S4(write);
4715     src    : S3(read);
4716     cr     : S3(read);
4717     DECODE : S0;        // any decoder
4718 %}
4719 
4720 // Conditional move reg-mem
4721 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4722 %{
4723     single_instruction;
4724     dst    : S4(write);
4725     src    : S3(read);
4726     cr     : S3(read);
4727     DECODE : S0;        // any decoder
4728     MEM    : S3;
4729 %}
4730 
4731 // Conditional move reg-reg long
4732 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4733 %{
4734     single_instruction;
4735     dst    : S4(write);
4736     src    : S3(read);
4737     cr     : S3(read);
4738     DECODE : S0(2);     // any 2 decoders
4739 %}
4740 
4741 // XXX
4742 // // Conditional move double reg-reg
4743 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4744 // %{
4745 //     single_instruction;
4746 //     dst    : S4(write);
4747 //     src    : S3(read);
4748 //     cr     : S3(read);
4749 //     DECODE : S0;     // any decoder
4750 // %}
4751 
4752 // Float reg-reg operation
4753 pipe_class fpu_reg(regD dst)
4754 %{
4755     instruction_count(2);
4756     dst    : S3(read);
4757     DECODE : S0(2);     // any 2 decoders
4758     FPU    : S3;
4759 %}
4760 
4761 // Float reg-reg operation
4762 pipe_class fpu_reg_reg(regD dst, regD src)
4763 %{
4764     instruction_count(2);
4765     dst    : S4(write);
4766     src    : S3(read);
4767     DECODE : S0(2);     // any 2 decoders
4768     FPU    : S3;
4769 %}
4770 
4771 // Float reg-reg operation
4772 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4773 %{
4774     instruction_count(3);
4775     dst    : S4(write);
4776     src1   : S3(read);
4777     src2   : S3(read);
4778     DECODE : S0(3);     // any 3 decoders
4779     FPU    : S3(2);
4780 %}
4781 
4782 // Float reg-reg operation
4783 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4784 %{
4785     instruction_count(4);
4786     dst    : S4(write);
4787     src1   : S3(read);
4788     src2   : S3(read);
4789     src3   : S3(read);
4790     DECODE : S0(4);     // any 3 decoders
4791     FPU    : S3(2);
4792 %}
4793 
4794 // Float reg-reg operation
4795 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4796 %{
4797     instruction_count(4);
4798     dst    : S4(write);
4799     src1   : S3(read);
4800     src2   : S3(read);
4801     src3   : S3(read);
4802     DECODE : S1(3);     // any 3 decoders
4803     D0     : S0;        // Big decoder only
4804     FPU    : S3(2);
4805     MEM    : S3;
4806 %}
4807 
4808 // Float reg-mem operation
4809 pipe_class fpu_reg_mem(regD dst, memory mem)
4810 %{
4811     instruction_count(2);
4812     dst    : S5(write);
4813     mem    : S3(read);
4814     D0     : S0;        // big decoder only
4815     DECODE : S1;        // any decoder for FPU POP
4816     FPU    : S4;
4817     MEM    : S3;        // any mem
4818 %}
4819 
4820 // Float reg-mem operation
4821 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4822 %{
4823     instruction_count(3);
4824     dst    : S5(write);
4825     src1   : S3(read);
4826     mem    : S3(read);
4827     D0     : S0;        // big decoder only
4828     DECODE : S1(2);     // any decoder for FPU POP
4829     FPU    : S4;
4830     MEM    : S3;        // any mem
4831 %}
4832 
4833 // Float mem-reg operation
4834 pipe_class fpu_mem_reg(memory mem, regD src)
4835 %{
4836     instruction_count(2);
4837     src    : S5(read);
4838     mem    : S3(read);
4839     DECODE : S0;        // any decoder for FPU PUSH
4840     D0     : S1;        // big decoder only
4841     FPU    : S4;
4842     MEM    : S3;        // any mem
4843 %}
4844 
4845 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4846 %{
4847     instruction_count(3);
4848     src1   : S3(read);
4849     src2   : S3(read);
4850     mem    : S3(read);
4851     DECODE : S0(2);     // any decoder for FPU PUSH
4852     D0     : S1;        // big decoder only
4853     FPU    : S4;
4854     MEM    : S3;        // any mem
4855 %}
4856 
4857 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4858 %{
4859     instruction_count(3);
4860     src1   : S3(read);
4861     src2   : S3(read);
4862     mem    : S4(read);
4863     DECODE : S0;        // any decoder for FPU PUSH
4864     D0     : S0(2);     // big decoder only
4865     FPU    : S4;
4866     MEM    : S3(2);     // any mem
4867 %}
4868 
4869 pipe_class fpu_mem_mem(memory dst, memory src1)
4870 %{
4871     instruction_count(2);
4872     src1   : S3(read);
4873     dst    : S4(read);
4874     D0     : S0(2);     // big decoder only
4875     MEM    : S3(2);     // any mem
4876 %}
4877 
4878 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4879 %{
4880     instruction_count(3);
4881     src1   : S3(read);
4882     src2   : S3(read);
4883     dst    : S4(read);
4884     D0     : S0(3);     // big decoder only
4885     FPU    : S4;
4886     MEM    : S3(3);     // any mem
4887 %}
4888 
4889 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4890 %{
4891     instruction_count(3);
4892     src1   : S4(read);
4893     mem    : S4(read);
4894     DECODE : S0;        // any decoder for FPU PUSH
4895     D0     : S0(2);     // big decoder only
4896     FPU    : S4;
4897     MEM    : S3(2);     // any mem
4898 %}
4899 
4900 // Float load constant
4901 pipe_class fpu_reg_con(regD dst)
4902 %{
4903     instruction_count(2);
4904     dst    : S5(write);
4905     D0     : S0;        // big decoder only for the load
4906     DECODE : S1;        // any decoder for FPU POP
4907     FPU    : S4;
4908     MEM    : S3;        // any mem
4909 %}
4910 
4911 // Float load constant
4912 pipe_class fpu_reg_reg_con(regD dst, regD src)
4913 %{
4914     instruction_count(3);
4915     dst    : S5(write);
4916     src    : S3(read);
4917     D0     : S0;        // big decoder only for the load
4918     DECODE : S1(2);     // any decoder for FPU POP
4919     FPU    : S4;
4920     MEM    : S3;        // any mem
4921 %}
4922 
4923 // UnConditional branch
4924 pipe_class pipe_jmp(label labl)
4925 %{
4926     single_instruction;
4927     BR   : S3;
4928 %}
4929 
4930 // Conditional branch
4931 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4932 %{
4933     single_instruction;
4934     cr    : S1(read);
4935     BR    : S3;
4936 %}
4937 
4938 // Allocation idiom
4939 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4940 %{
4941     instruction_count(1); force_serialization;
4942     fixed_latency(6);
4943     heap_ptr : S3(read);
4944     DECODE   : S0(3);
4945     D0       : S2;
4946     MEM      : S3;
4947     ALU      : S3(2);
4948     dst      : S5(write);
4949     BR       : S5;
4950 %}
4951 
4952 // Generic big/slow expanded idiom
4953 pipe_class pipe_slow()
4954 %{
4955     instruction_count(10); multiple_bundles; force_serialization;
4956     fixed_latency(100);
4957     D0  : S0(2);
4958     MEM : S3(2);
4959 %}
4960 
4961 // The real do-nothing guy
4962 pipe_class empty()
4963 %{
4964     instruction_count(0);
4965 %}
4966 
4967 // Define the class for the Nop node
4968 define
4969 %{
4970    MachNop = empty;
4971 %}
4972 
4973 %}
4974 
4975 //----------INSTRUCTIONS-------------------------------------------------------
4976 //
4977 // match      -- States which machine-independent subtree may be replaced
4978 //               by this instruction.
4979 // ins_cost   -- The estimated cost of this instruction is used by instruction
4980 //               selection to identify a minimum cost tree of machine
4981 //               instructions that matches a tree of machine-independent
4982 //               instructions.
4983 // format     -- A string providing the disassembly for this instruction.
4984 //               The value of an instruction's operand may be inserted
4985 //               by referring to it with a '$' prefix.
4986 // opcode     -- Three instruction opcodes may be provided.  These are referred
4987 //               to within an encode class as $primary, $secondary, and $tertiary
4988 //               rrspectively.  The primary opcode is commonly used to
4989 //               indicate the type of machine instruction, while secondary
4990 //               and tertiary are often used for prefix options or addressing
4991 //               modes.
4992 // ins_encode -- A list of encode classes with parameters. The encode class
4993 //               name must have been defined in an 'enc_class' specification
4994 //               in the encode section of the architecture description.
4995 
4996 
4997 //----------Load/Store/Move Instructions---------------------------------------
4998 //----------Load Instructions--------------------------------------------------
4999 
5000 // Load Byte (8 bit signed)
5001 instruct loadB(rRegI dst, memory mem)
5002 %{
5003   match(Set dst (LoadB mem));
5004 
5005   ins_cost(125);
5006   format %{ "movsbl  $dst, $mem\t# byte" %}
5007 
5008   ins_encode %{
5009     __ movsbl($dst$$Register, $mem$$Address);
5010   %}
5011 
5012   ins_pipe(ialu_reg_mem);
5013 %}
5014 
5015 // Load Byte (8 bit signed) into Long Register
5016 instruct loadB2L(rRegL dst, memory mem)
5017 %{
5018   match(Set dst (ConvI2L (LoadB mem)));
5019 
5020   ins_cost(125);
5021   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
5022 
5023   ins_encode %{
5024     __ movsbq($dst$$Register, $mem$$Address);
5025   %}
5026 
5027   ins_pipe(ialu_reg_mem);
5028 %}
5029 
5030 // Load Unsigned Byte (8 bit UNsigned)
5031 instruct loadUB(rRegI dst, memory mem)
5032 %{
5033   match(Set dst (LoadUB mem));
5034 
5035   ins_cost(125);
5036   format %{ "movzbl  $dst, $mem\t# ubyte" %}
5037 
5038   ins_encode %{
5039     __ movzbl($dst$$Register, $mem$$Address);
5040   %}
5041 
5042   ins_pipe(ialu_reg_mem);
5043 %}
5044 
5045 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5046 instruct loadUB2L(rRegL dst, memory mem)
5047 %{
5048   match(Set dst (ConvI2L (LoadUB mem)));
5049 
5050   ins_cost(125);
5051   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
5052 
5053   ins_encode %{
5054     __ movzbq($dst$$Register, $mem$$Address);
5055   %}
5056 
5057   ins_pipe(ialu_reg_mem);
5058 %}
5059 
5060 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
5061 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5062   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5063   effect(KILL cr);
5064 
5065   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
5066             "andl    $dst, right_n_bits($mask, 8)" %}
5067   ins_encode %{
5068     Register Rdst = $dst$$Register;
5069     __ movzbq(Rdst, $mem$$Address);
5070     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5071   %}
5072   ins_pipe(ialu_reg_mem);
5073 %}
5074 
5075 // Load Short (16 bit signed)
5076 instruct loadS(rRegI dst, memory mem)
5077 %{
5078   match(Set dst (LoadS mem));
5079 
5080   ins_cost(125);
5081   format %{ "movswl $dst, $mem\t# short" %}
5082 
5083   ins_encode %{
5084     __ movswl($dst$$Register, $mem$$Address);
5085   %}
5086 
5087   ins_pipe(ialu_reg_mem);
5088 %}
5089 
5090 // Load Short (16 bit signed) to Byte (8 bit signed)
5091 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5092   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5093 
5094   ins_cost(125);
5095   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5096   ins_encode %{
5097     __ movsbl($dst$$Register, $mem$$Address);
5098   %}
5099   ins_pipe(ialu_reg_mem);
5100 %}
5101 
5102 // Load Short (16 bit signed) into Long Register
5103 instruct loadS2L(rRegL dst, memory mem)
5104 %{
5105   match(Set dst (ConvI2L (LoadS mem)));
5106 
5107   ins_cost(125);
5108   format %{ "movswq $dst, $mem\t# short -> long" %}
5109 
5110   ins_encode %{
5111     __ movswq($dst$$Register, $mem$$Address);
5112   %}
5113 
5114   ins_pipe(ialu_reg_mem);
5115 %}
5116 
5117 // Load Unsigned Short/Char (16 bit UNsigned)
5118 instruct loadUS(rRegI dst, memory mem)
5119 %{
5120   match(Set dst (LoadUS mem));
5121 
5122   ins_cost(125);
5123   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5124 
5125   ins_encode %{
5126     __ movzwl($dst$$Register, $mem$$Address);
5127   %}
5128 
5129   ins_pipe(ialu_reg_mem);
5130 %}
5131 
5132 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5133 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5134   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5135 
5136   ins_cost(125);
5137   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5138   ins_encode %{
5139     __ movsbl($dst$$Register, $mem$$Address);
5140   %}
5141   ins_pipe(ialu_reg_mem);
5142 %}
5143 
5144 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5145 instruct loadUS2L(rRegL dst, memory mem)
5146 %{
5147   match(Set dst (ConvI2L (LoadUS mem)));
5148 
5149   ins_cost(125);
5150   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5151 
5152   ins_encode %{
5153     __ movzwq($dst$$Register, $mem$$Address);
5154   %}
5155 
5156   ins_pipe(ialu_reg_mem);
5157 %}
5158 
5159 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5160 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5161   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5162 
5163   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5164   ins_encode %{
5165     __ movzbq($dst$$Register, $mem$$Address);
5166   %}
5167   ins_pipe(ialu_reg_mem);
5168 %}
5169 
5170 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
5171 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5172   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5173   effect(KILL cr);
5174 
5175   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5176             "andl    $dst, right_n_bits($mask, 16)" %}
5177   ins_encode %{
5178     Register Rdst = $dst$$Register;
5179     __ movzwq(Rdst, $mem$$Address);
5180     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5181   %}
5182   ins_pipe(ialu_reg_mem);
5183 %}
5184 
5185 // Load Integer
5186 instruct loadI(rRegI dst, memory mem)
5187 %{
5188   match(Set dst (LoadI mem));
5189 
5190   ins_cost(125);
5191   format %{ "movl    $dst, $mem\t# int" %}
5192 
5193   ins_encode %{
5194     __ movl($dst$$Register, $mem$$Address);
5195   %}
5196 
5197   ins_pipe(ialu_reg_mem);
5198 %}
5199 
5200 // Load Integer (32 bit signed) to Byte (8 bit signed)
5201 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5202   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5203 
5204   ins_cost(125);
5205   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5206   ins_encode %{
5207     __ movsbl($dst$$Register, $mem$$Address);
5208   %}
5209   ins_pipe(ialu_reg_mem);
5210 %}
5211 
5212 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5213 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5214   match(Set dst (AndI (LoadI mem) mask));
5215 
5216   ins_cost(125);
5217   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5218   ins_encode %{
5219     __ movzbl($dst$$Register, $mem$$Address);
5220   %}
5221   ins_pipe(ialu_reg_mem);
5222 %}
5223 
5224 // Load Integer (32 bit signed) to Short (16 bit signed)
5225 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5226   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5227 
5228   ins_cost(125);
5229   format %{ "movswl  $dst, $mem\t# int -> short" %}
5230   ins_encode %{
5231     __ movswl($dst$$Register, $mem$$Address);
5232   %}
5233   ins_pipe(ialu_reg_mem);
5234 %}
5235 
5236 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5237 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5238   match(Set dst (AndI (LoadI mem) mask));
5239 
5240   ins_cost(125);
5241   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5242   ins_encode %{
5243     __ movzwl($dst$$Register, $mem$$Address);
5244   %}
5245   ins_pipe(ialu_reg_mem);
5246 %}
5247 
5248 // Load Integer into Long Register
5249 instruct loadI2L(rRegL dst, memory mem)
5250 %{
5251   match(Set dst (ConvI2L (LoadI mem)));
5252 
5253   ins_cost(125);
5254   format %{ "movslq  $dst, $mem\t# int -> long" %}
5255 
5256   ins_encode %{
5257     __ movslq($dst$$Register, $mem$$Address);
5258   %}
5259 
5260   ins_pipe(ialu_reg_mem);
5261 %}
5262 
5263 // Load Integer with mask 0xFF into Long Register
5264 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5265   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5266 
5267   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5268   ins_encode %{
5269     __ movzbq($dst$$Register, $mem$$Address);
5270   %}
5271   ins_pipe(ialu_reg_mem);
5272 %}
5273 
5274 // Load Integer with mask 0xFFFF into Long Register
5275 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5276   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5277 
5278   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5279   ins_encode %{
5280     __ movzwq($dst$$Register, $mem$$Address);
5281   %}
5282   ins_pipe(ialu_reg_mem);
5283 %}
5284 
5285 // Load Integer with a 31-bit mask into Long Register
5286 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
5287   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5288   effect(KILL cr);
5289 
5290   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
5291             "andl    $dst, $mask" %}
5292   ins_encode %{
5293     Register Rdst = $dst$$Register;
5294     __ movl(Rdst, $mem$$Address);
5295     __ andl(Rdst, $mask$$constant);
5296   %}
5297   ins_pipe(ialu_reg_mem);
5298 %}
5299 
5300 // Load Unsigned Integer into Long Register
5301 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
5302 %{
5303   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5304 
5305   ins_cost(125);
5306   format %{ "movl    $dst, $mem\t# uint -> long" %}
5307 
5308   ins_encode %{
5309     __ movl($dst$$Register, $mem$$Address);
5310   %}
5311 
5312   ins_pipe(ialu_reg_mem);
5313 %}
5314 
5315 // Load Long
5316 instruct loadL(rRegL dst, memory mem)
5317 %{
5318   match(Set dst (LoadL mem));
5319 
5320   ins_cost(125);
5321   format %{ "movq    $dst, $mem\t# long" %}
5322 
5323   ins_encode %{
5324     __ movq($dst$$Register, $mem$$Address);
5325   %}
5326 
5327   ins_pipe(ialu_reg_mem); // XXX
5328 %}
5329 
5330 // Load Range
5331 instruct loadRange(rRegI dst, memory mem)
5332 %{
5333   match(Set dst (LoadRange mem));
5334 
5335   ins_cost(125); // XXX
5336   format %{ "movl    $dst, $mem\t# range" %}
5337   opcode(0x8B);
5338   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5339   ins_pipe(ialu_reg_mem);
5340 %}
5341 
5342 // Load Pointer
5343 instruct loadP(rRegP dst, memory mem)
5344 %{
5345   match(Set dst (LoadP mem));
5346 
5347   ins_cost(125); // XXX
5348   format %{ "movq    $dst, $mem\t# ptr" %}
5349   opcode(0x8B);
5350   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5351   ins_pipe(ialu_reg_mem); // XXX
5352 %}
5353 
5354 // Load Compressed Pointer
5355 instruct loadN(rRegN dst, memory mem)
5356 %{
5357    match(Set dst (LoadN mem));
5358 
5359    ins_cost(125); // XXX
5360    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5361    ins_encode %{
5362      __ movl($dst$$Register, $mem$$Address);
5363    %}
5364    ins_pipe(ialu_reg_mem); // XXX
5365 %}
5366 
5367 
5368 // Load Klass Pointer
5369 instruct loadKlass(rRegP dst, memory mem)
5370 %{
5371   match(Set dst (LoadKlass mem));
5372 
5373   ins_cost(125); // XXX
5374   format %{ "movq    $dst, $mem\t# class" %}
5375   opcode(0x8B);
5376   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5377   ins_pipe(ialu_reg_mem); // XXX
5378 %}
5379 
5380 // Load narrow Klass Pointer
5381 instruct loadNKlass(rRegN dst, memory mem)
5382 %{
5383   match(Set dst (LoadNKlass mem));
5384 
5385   ins_cost(125); // XXX
5386   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5387   ins_encode %{
5388     __ movl($dst$$Register, $mem$$Address);
5389   %}
5390   ins_pipe(ialu_reg_mem); // XXX
5391 %}
5392 
5393 // Load Float
5394 instruct loadF(regF dst, memory mem)
5395 %{
5396   match(Set dst (LoadF mem));
5397 
5398   ins_cost(145); // XXX
5399   format %{ "movss   $dst, $mem\t# float" %}
5400   ins_encode %{
5401     __ movflt($dst$$XMMRegister, $mem$$Address);
5402   %}
5403   ins_pipe(pipe_slow); // XXX
5404 %}
5405 
5406 // Load Float
5407 instruct MoveF2VL(vlRegF dst, regF src) %{
5408   match(Set dst src);
5409   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
5410   ins_encode %{
5411     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5412   %}
5413   ins_pipe( fpu_reg_reg );
5414 %}
5415 
5416 // Load Float
5417 instruct MoveVL2F(regF dst, vlRegF src) %{
5418   match(Set dst src);
5419   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
5420   ins_encode %{
5421     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5422   %}
5423   ins_pipe( fpu_reg_reg );
5424 %}
5425 
5426 // Load Double
5427 instruct loadD_partial(regD dst, memory mem)
5428 %{
5429   predicate(!UseXmmLoadAndClearUpper);
5430   match(Set dst (LoadD mem));
5431 
5432   ins_cost(145); // XXX
5433   format %{ "movlpd  $dst, $mem\t# double" %}
5434   ins_encode %{
5435     __ movdbl($dst$$XMMRegister, $mem$$Address);
5436   %}
5437   ins_pipe(pipe_slow); // XXX
5438 %}
5439 
5440 instruct loadD(regD dst, memory mem)
5441 %{
5442   predicate(UseXmmLoadAndClearUpper);
5443   match(Set dst (LoadD mem));
5444 
5445   ins_cost(145); // XXX
5446   format %{ "movsd   $dst, $mem\t# double" %}
5447   ins_encode %{
5448     __ movdbl($dst$$XMMRegister, $mem$$Address);
5449   %}
5450   ins_pipe(pipe_slow); // XXX
5451 %}
5452 
5453 // Load Double
5454 instruct MoveD2VL(vlRegD dst, regD src) %{
5455   match(Set dst src);
5456   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
5457   ins_encode %{
5458     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5459   %}
5460   ins_pipe( fpu_reg_reg );
5461 %}
5462 
5463 // Load Double
5464 instruct MoveVL2D(regD dst, vlRegD src) %{
5465   match(Set dst src);
5466   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
5467   ins_encode %{
5468     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5469   %}
5470   ins_pipe( fpu_reg_reg );
5471 %}
5472 
5473 // Load Effective Address
5474 instruct leaP8(rRegP dst, indOffset8 mem)
5475 %{
5476   match(Set dst mem);
5477 
5478   ins_cost(110); // XXX
5479   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5480   opcode(0x8D);
5481   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5482   ins_pipe(ialu_reg_reg_fat);
5483 %}
5484 
5485 instruct leaP32(rRegP dst, indOffset32 mem)
5486 %{
5487   match(Set dst mem);
5488 
5489   ins_cost(110);
5490   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5491   opcode(0x8D);
5492   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5493   ins_pipe(ialu_reg_reg_fat);
5494 %}
5495 
5496 // instruct leaPIdx(rRegP dst, indIndex mem)
5497 // %{
5498 //   match(Set dst mem);
5499 
5500 //   ins_cost(110);
5501 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5502 //   opcode(0x8D);
5503 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5504 //   ins_pipe(ialu_reg_reg_fat);
5505 // %}
5506 
5507 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5508 %{
5509   match(Set dst mem);
5510 
5511   ins_cost(110);
5512   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5513   opcode(0x8D);
5514   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5515   ins_pipe(ialu_reg_reg_fat);
5516 %}
5517 
5518 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5519 %{
5520   match(Set dst mem);
5521 
5522   ins_cost(110);
5523   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5524   opcode(0x8D);
5525   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5526   ins_pipe(ialu_reg_reg_fat);
5527 %}
5528 
5529 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
5530 %{
5531   match(Set dst mem);
5532 
5533   ins_cost(110);
5534   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5535   opcode(0x8D);
5536   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5537   ins_pipe(ialu_reg_reg_fat);
5538 %}
5539 
5540 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5541 %{
5542   match(Set dst mem);
5543 
5544   ins_cost(110);
5545   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5546   opcode(0x8D);
5547   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5548   ins_pipe(ialu_reg_reg_fat);
5549 %}
5550 
5551 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
5552 %{
5553   match(Set dst mem);
5554 
5555   ins_cost(110);
5556   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
5557   opcode(0x8D);
5558   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5559   ins_pipe(ialu_reg_reg_fat);
5560 %}
5561 
5562 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5563 %{
5564   match(Set dst mem);
5565 
5566   ins_cost(110);
5567   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5568   opcode(0x8D);
5569   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5570   ins_pipe(ialu_reg_reg_fat);
5571 %}
5572 
5573 // Load Effective Address which uses Narrow (32-bits) oop
5574 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5575 %{
5576   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5577   match(Set dst mem);
5578 
5579   ins_cost(110);
5580   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5581   opcode(0x8D);
5582   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5583   ins_pipe(ialu_reg_reg_fat);
5584 %}
5585 
5586 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5587 %{
5588   predicate(Universe::narrow_oop_shift() == 0);
5589   match(Set dst mem);
5590 
5591   ins_cost(110); // XXX
5592   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5593   opcode(0x8D);
5594   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5595   ins_pipe(ialu_reg_reg_fat);
5596 %}
5597 
5598 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5599 %{
5600   predicate(Universe::narrow_oop_shift() == 0);
5601   match(Set dst mem);
5602 
5603   ins_cost(110);
5604   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5605   opcode(0x8D);
5606   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5607   ins_pipe(ialu_reg_reg_fat);
5608 %}
5609 
5610 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5611 %{
5612   predicate(Universe::narrow_oop_shift() == 0);
5613   match(Set dst mem);
5614 
5615   ins_cost(110);
5616   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5617   opcode(0x8D);
5618   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5619   ins_pipe(ialu_reg_reg_fat);
5620 %}
5621 
5622 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5623 %{
5624   predicate(Universe::narrow_oop_shift() == 0);
5625   match(Set dst mem);
5626 
5627   ins_cost(110);
5628   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5629   opcode(0x8D);
5630   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5631   ins_pipe(ialu_reg_reg_fat);
5632 %}
5633 
5634 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5635 %{
5636   predicate(Universe::narrow_oop_shift() == 0);
5637   match(Set dst mem);
5638 
5639   ins_cost(110);
5640   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5641   opcode(0x8D);
5642   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5643   ins_pipe(ialu_reg_reg_fat);
5644 %}
5645 
5646 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
5647 %{
5648   predicate(Universe::narrow_oop_shift() == 0);
5649   match(Set dst mem);
5650 
5651   ins_cost(110);
5652   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
5653   opcode(0x8D);
5654   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5655   ins_pipe(ialu_reg_reg_fat);
5656 %}
5657 
5658 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5659 %{
5660   predicate(Universe::narrow_oop_shift() == 0);
5661   match(Set dst mem);
5662 
5663   ins_cost(110);
5664   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5665   opcode(0x8D);
5666   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5667   ins_pipe(ialu_reg_reg_fat);
5668 %}
5669 
5670 instruct loadConI(rRegI dst, immI src)
5671 %{
5672   match(Set dst src);
5673 
5674   format %{ "movl    $dst, $src\t# int" %}
5675   ins_encode(load_immI(dst, src));
5676   ins_pipe(ialu_reg_fat); // XXX
5677 %}
5678 
5679 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5680 %{
5681   match(Set dst src);
5682   effect(KILL cr);
5683 
5684   ins_cost(50);
5685   format %{ "xorl    $dst, $dst\t# int" %}
5686   opcode(0x33); /* + rd */
5687   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5688   ins_pipe(ialu_reg);
5689 %}
5690 
5691 instruct loadConL(rRegL dst, immL src)
5692 %{
5693   match(Set dst src);
5694 
5695   ins_cost(150);
5696   format %{ "movq    $dst, $src\t# long" %}
5697   ins_encode(load_immL(dst, src));
5698   ins_pipe(ialu_reg);
5699 %}
5700 
5701 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5702 %{
5703   match(Set dst src);
5704   effect(KILL cr);
5705 
5706   ins_cost(50);
5707   format %{ "xorl    $dst, $dst\t# long" %}
5708   opcode(0x33); /* + rd */
5709   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5710   ins_pipe(ialu_reg); // XXX
5711 %}
5712 
5713 instruct loadConUL32(rRegL dst, immUL32 src)
5714 %{
5715   match(Set dst src);
5716 
5717   ins_cost(60);
5718   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5719   ins_encode(load_immUL32(dst, src));
5720   ins_pipe(ialu_reg);
5721 %}
5722 
5723 instruct loadConL32(rRegL dst, immL32 src)
5724 %{
5725   match(Set dst src);
5726 
5727   ins_cost(70);
5728   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5729   ins_encode(load_immL32(dst, src));
5730   ins_pipe(ialu_reg);
5731 %}
5732 
5733 instruct loadConP(rRegP dst, immP con) %{
5734   match(Set dst con);
5735 
5736   format %{ "movq    $dst, $con\t# ptr" %}
5737   ins_encode(load_immP(dst, con));
5738   ins_pipe(ialu_reg_fat); // XXX
5739 %}
5740 
5741 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5742 %{
5743   match(Set dst src);
5744   effect(KILL cr);
5745 
5746   ins_cost(50);
5747   format %{ "xorl    $dst, $dst\t# ptr" %}
5748   opcode(0x33); /* + rd */
5749   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5750   ins_pipe(ialu_reg);
5751 %}
5752 
5753 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5754 %{
5755   match(Set dst src);
5756   effect(KILL cr);
5757 
5758   ins_cost(60);
5759   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5760   ins_encode(load_immP31(dst, src));
5761   ins_pipe(ialu_reg);
5762 %}
5763 
5764 instruct loadConF(regF dst, immF con) %{
5765   match(Set dst con);
5766   ins_cost(125);
5767   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5768   ins_encode %{
5769     __ movflt($dst$$XMMRegister, $constantaddress($con));
5770   %}
5771   ins_pipe(pipe_slow);
5772 %}
5773 
5774 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5775   match(Set dst src);
5776   effect(KILL cr);
5777   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5778   ins_encode %{
5779     __ xorq($dst$$Register, $dst$$Register);
5780   %}
5781   ins_pipe(ialu_reg);
5782 %}
5783 
5784 instruct loadConN(rRegN dst, immN src) %{
5785   match(Set dst src);
5786 
5787   ins_cost(125);
5788   format %{ "movl    $dst, $src\t# compressed ptr" %}
5789   ins_encode %{
5790     address con = (address)$src$$constant;
5791     if (con == NULL) {
5792       ShouldNotReachHere();
5793     } else {
5794       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5795     }
5796   %}
5797   ins_pipe(ialu_reg_fat); // XXX
5798 %}
5799 
5800 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5801   match(Set dst src);
5802 
5803   ins_cost(125);
5804   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
5805   ins_encode %{
5806     address con = (address)$src$$constant;
5807     if (con == NULL) {
5808       ShouldNotReachHere();
5809     } else {
5810       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5811     }
5812   %}
5813   ins_pipe(ialu_reg_fat); // XXX
5814 %}
5815 
5816 instruct loadConF0(regF dst, immF0 src)
5817 %{
5818   match(Set dst src);
5819   ins_cost(100);
5820 
5821   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5822   ins_encode %{
5823     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5824   %}
5825   ins_pipe(pipe_slow);
5826 %}
5827 
5828 // Use the same format since predicate() can not be used here.
5829 instruct loadConD(regD dst, immD con) %{
5830   match(Set dst con);
5831   ins_cost(125);
5832   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5833   ins_encode %{
5834     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5835   %}
5836   ins_pipe(pipe_slow);
5837 %}
5838 
5839 instruct loadConD0(regD dst, immD0 src)
5840 %{
5841   match(Set dst src);
5842   ins_cost(100);
5843 
5844   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5845   ins_encode %{
5846     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5847   %}
5848   ins_pipe(pipe_slow);
5849 %}
5850 
5851 instruct loadSSI(rRegI dst, stackSlotI src)
5852 %{
5853   match(Set dst src);
5854 
5855   ins_cost(125);
5856   format %{ "movl    $dst, $src\t# int stk" %}
5857   opcode(0x8B);
5858   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5859   ins_pipe(ialu_reg_mem);
5860 %}
5861 
5862 instruct loadSSL(rRegL dst, stackSlotL src)
5863 %{
5864   match(Set dst src);
5865 
5866   ins_cost(125);
5867   format %{ "movq    $dst, $src\t# long stk" %}
5868   opcode(0x8B);
5869   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5870   ins_pipe(ialu_reg_mem);
5871 %}
5872 
5873 instruct loadSSP(rRegP dst, stackSlotP src)
5874 %{
5875   match(Set dst src);
5876 
5877   ins_cost(125);
5878   format %{ "movq    $dst, $src\t# ptr stk" %}
5879   opcode(0x8B);
5880   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5881   ins_pipe(ialu_reg_mem);
5882 %}
5883 
5884 instruct loadSSF(regF dst, stackSlotF src)
5885 %{
5886   match(Set dst src);
5887 
5888   ins_cost(125);
5889   format %{ "movss   $dst, $src\t# float stk" %}
5890   ins_encode %{
5891     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5892   %}
5893   ins_pipe(pipe_slow); // XXX
5894 %}
5895 
5896 // Use the same format since predicate() can not be used here.
5897 instruct loadSSD(regD dst, stackSlotD src)
5898 %{
5899   match(Set dst src);
5900 
5901   ins_cost(125);
5902   format %{ "movsd   $dst, $src\t# double stk" %}
5903   ins_encode  %{
5904     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5905   %}
5906   ins_pipe(pipe_slow); // XXX
5907 %}
5908 
5909 // Prefetch instructions for allocation.
5910 // Must be safe to execute with invalid address (cannot fault).
5911 
5912 instruct prefetchAlloc( memory mem ) %{
5913   predicate(AllocatePrefetchInstr==3);
5914   match(PrefetchAllocation mem);
5915   ins_cost(125);
5916 
5917   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5918   ins_encode %{
5919     __ prefetchw($mem$$Address);
5920   %}
5921   ins_pipe(ialu_mem);
5922 %}
5923 
5924 instruct prefetchAllocNTA( memory mem ) %{
5925   predicate(AllocatePrefetchInstr==0);
5926   match(PrefetchAllocation mem);
5927   ins_cost(125);
5928 
5929   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5930   ins_encode %{
5931     __ prefetchnta($mem$$Address);
5932   %}
5933   ins_pipe(ialu_mem);
5934 %}
5935 
5936 instruct prefetchAllocT0( memory mem ) %{
5937   predicate(AllocatePrefetchInstr==1);
5938   match(PrefetchAllocation mem);
5939   ins_cost(125);
5940 
5941   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
5942   ins_encode %{
5943     __ prefetcht0($mem$$Address);
5944   %}
5945   ins_pipe(ialu_mem);
5946 %}
5947 
5948 instruct prefetchAllocT2( memory mem ) %{
5949   predicate(AllocatePrefetchInstr==2);
5950   match(PrefetchAllocation mem);
5951   ins_cost(125);
5952 
5953   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
5954   ins_encode %{
5955     __ prefetcht2($mem$$Address);
5956   %}
5957   ins_pipe(ialu_mem);
5958 %}
5959 
5960 //----------Store Instructions-------------------------------------------------
5961 
5962 // Store Byte
5963 instruct storeB(memory mem, rRegI src)
5964 %{
5965   match(Set mem (StoreB mem src));
5966 
5967   ins_cost(125); // XXX
5968   format %{ "movb    $mem, $src\t# byte" %}
5969   opcode(0x88);
5970   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
5971   ins_pipe(ialu_mem_reg);
5972 %}
5973 
5974 // Store Char/Short
5975 instruct storeC(memory mem, rRegI src)
5976 %{
5977   match(Set mem (StoreC mem src));
5978 
5979   ins_cost(125); // XXX
5980   format %{ "movw    $mem, $src\t# char/short" %}
5981   opcode(0x89);
5982   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5983   ins_pipe(ialu_mem_reg);
5984 %}
5985 
5986 // Store Integer
5987 instruct storeI(memory mem, rRegI src)
5988 %{
5989   match(Set mem (StoreI mem src));
5990 
5991   ins_cost(125); // XXX
5992   format %{ "movl    $mem, $src\t# int" %}
5993   opcode(0x89);
5994   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5995   ins_pipe(ialu_mem_reg);
5996 %}
5997 
5998 // Store Long
5999 instruct storeL(memory mem, rRegL src)
6000 %{
6001   match(Set mem (StoreL mem src));
6002 
6003   ins_cost(125); // XXX
6004   format %{ "movq    $mem, $src\t# long" %}
6005   opcode(0x89);
6006   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6007   ins_pipe(ialu_mem_reg); // XXX
6008 %}
6009 
6010 // Store Pointer
6011 instruct storeP(memory mem, any_RegP src)
6012 %{
6013   match(Set mem (StoreP mem src));
6014 
6015   ins_cost(125); // XXX
6016   format %{ "movq    $mem, $src\t# ptr" %}
6017   opcode(0x89);
6018   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
6019   ins_pipe(ialu_mem_reg);
6020 %}
6021 
6022 instruct storeImmP0(memory mem, immP0 zero)
6023 %{
6024   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6025   match(Set mem (StoreP mem zero));
6026 
6027   ins_cost(125); // XXX
6028   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
6029   ins_encode %{
6030     __ movq($mem$$Address, r12);
6031   %}
6032   ins_pipe(ialu_mem_reg);
6033 %}
6034 
6035 // Store NULL Pointer, mark word, or other simple pointer constant.
6036 instruct storeImmP(memory mem, immP31 src)
6037 %{
6038   match(Set mem (StoreP mem src));
6039 
6040   ins_cost(150); // XXX
6041   format %{ "movq    $mem, $src\t# ptr" %}
6042   opcode(0xC7); /* C7 /0 */
6043   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6044   ins_pipe(ialu_mem_imm);
6045 %}
6046 
6047 // Store Compressed Pointer
6048 instruct storeN(memory mem, rRegN src)
6049 %{
6050   match(Set mem (StoreN mem src));
6051 
6052   ins_cost(125); // XXX
6053   format %{ "movl    $mem, $src\t# compressed ptr" %}
6054   ins_encode %{
6055     __ movl($mem$$Address, $src$$Register);
6056   %}
6057   ins_pipe(ialu_mem_reg);
6058 %}
6059 
6060 instruct storeNKlass(memory mem, rRegN src)
6061 %{
6062   match(Set mem (StoreNKlass mem src));
6063 
6064   ins_cost(125); // XXX
6065   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
6066   ins_encode %{
6067     __ movl($mem$$Address, $src$$Register);
6068   %}
6069   ins_pipe(ialu_mem_reg);
6070 %}
6071 
6072 instruct storeImmN0(memory mem, immN0 zero)
6073 %{
6074   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
6075   match(Set mem (StoreN mem zero));
6076 
6077   ins_cost(125); // XXX
6078   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6079   ins_encode %{
6080     __ movl($mem$$Address, r12);
6081   %}
6082   ins_pipe(ialu_mem_reg);
6083 %}
6084 
6085 instruct storeImmN(memory mem, immN src)
6086 %{
6087   match(Set mem (StoreN mem src));
6088 
6089   ins_cost(150); // XXX
6090   format %{ "movl    $mem, $src\t# compressed ptr" %}
6091   ins_encode %{
6092     address con = (address)$src$$constant;
6093     if (con == NULL) {
6094       __ movl($mem$$Address, (int32_t)0);
6095     } else {
6096       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6097     }
6098   %}
6099   ins_pipe(ialu_mem_imm);
6100 %}
6101 
6102 instruct storeImmNKlass(memory mem, immNKlass src)
6103 %{
6104   match(Set mem (StoreNKlass mem src));
6105 
6106   ins_cost(150); // XXX
6107   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
6108   ins_encode %{
6109     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
6110   %}
6111   ins_pipe(ialu_mem_imm);
6112 %}
6113 
6114 // Store Integer Immediate
6115 instruct storeImmI0(memory mem, immI0 zero)
6116 %{
6117   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6118   match(Set mem (StoreI mem zero));
6119 
6120   ins_cost(125); // XXX
6121   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6122   ins_encode %{
6123     __ movl($mem$$Address, r12);
6124   %}
6125   ins_pipe(ialu_mem_reg);
6126 %}
6127 
6128 instruct storeImmI(memory mem, immI src)
6129 %{
6130   match(Set mem (StoreI mem src));
6131 
6132   ins_cost(150);
6133   format %{ "movl    $mem, $src\t# int" %}
6134   opcode(0xC7); /* C7 /0 */
6135   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6136   ins_pipe(ialu_mem_imm);
6137 %}
6138 
6139 // Store Long Immediate
6140 instruct storeImmL0(memory mem, immL0 zero)
6141 %{
6142   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6143   match(Set mem (StoreL mem zero));
6144 
6145   ins_cost(125); // XXX
6146   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6147   ins_encode %{
6148     __ movq($mem$$Address, r12);
6149   %}
6150   ins_pipe(ialu_mem_reg);
6151 %}
6152 
6153 instruct storeImmL(memory mem, immL32 src)
6154 %{
6155   match(Set mem (StoreL mem src));
6156 
6157   ins_cost(150);
6158   format %{ "movq    $mem, $src\t# long" %}
6159   opcode(0xC7); /* C7 /0 */
6160   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6161   ins_pipe(ialu_mem_imm);
6162 %}
6163 
6164 // Store Short/Char Immediate
6165 instruct storeImmC0(memory mem, immI0 zero)
6166 %{
6167   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6168   match(Set mem (StoreC mem zero));
6169 
6170   ins_cost(125); // XXX
6171   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6172   ins_encode %{
6173     __ movw($mem$$Address, r12);
6174   %}
6175   ins_pipe(ialu_mem_reg);
6176 %}
6177 
6178 instruct storeImmI16(memory mem, immI16 src)
6179 %{
6180   predicate(UseStoreImmI16);
6181   match(Set mem (StoreC mem src));
6182 
6183   ins_cost(150);
6184   format %{ "movw    $mem, $src\t# short/char" %}
6185   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6186   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6187   ins_pipe(ialu_mem_imm);
6188 %}
6189 
6190 // Store Byte Immediate
6191 instruct storeImmB0(memory mem, immI0 zero)
6192 %{
6193   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6194   match(Set mem (StoreB mem zero));
6195 
6196   ins_cost(125); // XXX
6197   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6198   ins_encode %{
6199     __ movb($mem$$Address, r12);
6200   %}
6201   ins_pipe(ialu_mem_reg);
6202 %}
6203 
6204 instruct storeImmB(memory mem, immI8 src)
6205 %{
6206   match(Set mem (StoreB mem src));
6207 
6208   ins_cost(150); // XXX
6209   format %{ "movb    $mem, $src\t# byte" %}
6210   opcode(0xC6); /* C6 /0 */
6211   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6212   ins_pipe(ialu_mem_imm);
6213 %}
6214 
6215 // Store CMS card-mark Immediate
6216 instruct storeImmCM0_reg(memory mem, immI0 zero)
6217 %{
6218   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6219   match(Set mem (StoreCM mem zero));
6220 
6221   ins_cost(125); // XXX
6222   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6223   ins_encode %{
6224     __ movb($mem$$Address, r12);
6225   %}
6226   ins_pipe(ialu_mem_reg);
6227 %}
6228 
6229 instruct storeImmCM0(memory mem, immI0 src)
6230 %{
6231   match(Set mem (StoreCM mem src));
6232 
6233   ins_cost(150); // XXX
6234   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6235   opcode(0xC6); /* C6 /0 */
6236   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6237   ins_pipe(ialu_mem_imm);
6238 %}
6239 
6240 // Store Float
6241 instruct storeF(memory mem, regF src)
6242 %{
6243   match(Set mem (StoreF mem src));
6244 
6245   ins_cost(95); // XXX
6246   format %{ "movss   $mem, $src\t# float" %}
6247   ins_encode %{
6248     __ movflt($mem$$Address, $src$$XMMRegister);
6249   %}
6250   ins_pipe(pipe_slow); // XXX
6251 %}
6252 
6253 // Store immediate Float value (it is faster than store from XMM register)
6254 instruct storeF0(memory mem, immF0 zero)
6255 %{
6256   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6257   match(Set mem (StoreF mem zero));
6258 
6259   ins_cost(25); // XXX
6260   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6261   ins_encode %{
6262     __ movl($mem$$Address, r12);
6263   %}
6264   ins_pipe(ialu_mem_reg);
6265 %}
6266 
6267 instruct storeF_imm(memory mem, immF src)
6268 %{
6269   match(Set mem (StoreF mem src));
6270 
6271   ins_cost(50);
6272   format %{ "movl    $mem, $src\t# float" %}
6273   opcode(0xC7); /* C7 /0 */
6274   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6275   ins_pipe(ialu_mem_imm);
6276 %}
6277 
6278 // Store Double
6279 instruct storeD(memory mem, regD src)
6280 %{
6281   match(Set mem (StoreD mem src));
6282 
6283   ins_cost(95); // XXX
6284   format %{ "movsd   $mem, $src\t# double" %}
6285   ins_encode %{
6286     __ movdbl($mem$$Address, $src$$XMMRegister);
6287   %}
6288   ins_pipe(pipe_slow); // XXX
6289 %}
6290 
6291 // Store immediate double 0.0 (it is faster than store from XMM register)
6292 instruct storeD0_imm(memory mem, immD0 src)
6293 %{
6294   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6295   match(Set mem (StoreD mem src));
6296 
6297   ins_cost(50);
6298   format %{ "movq    $mem, $src\t# double 0." %}
6299   opcode(0xC7); /* C7 /0 */
6300   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6301   ins_pipe(ialu_mem_imm);
6302 %}
6303 
6304 instruct storeD0(memory mem, immD0 zero)
6305 %{
6306   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6307   match(Set mem (StoreD mem zero));
6308 
6309   ins_cost(25); // XXX
6310   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6311   ins_encode %{
6312     __ movq($mem$$Address, r12);
6313   %}
6314   ins_pipe(ialu_mem_reg);
6315 %}
6316 
6317 instruct storeSSI(stackSlotI dst, rRegI src)
6318 %{
6319   match(Set dst src);
6320 
6321   ins_cost(100);
6322   format %{ "movl    $dst, $src\t# int stk" %}
6323   opcode(0x89);
6324   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6325   ins_pipe( ialu_mem_reg );
6326 %}
6327 
6328 instruct storeSSL(stackSlotL dst, rRegL src)
6329 %{
6330   match(Set dst src);
6331 
6332   ins_cost(100);
6333   format %{ "movq    $dst, $src\t# long stk" %}
6334   opcode(0x89);
6335   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6336   ins_pipe(ialu_mem_reg);
6337 %}
6338 
6339 instruct storeSSP(stackSlotP dst, rRegP src)
6340 %{
6341   match(Set dst src);
6342 
6343   ins_cost(100);
6344   format %{ "movq    $dst, $src\t# ptr stk" %}
6345   opcode(0x89);
6346   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6347   ins_pipe(ialu_mem_reg);
6348 %}
6349 
6350 instruct storeSSF(stackSlotF dst, regF src)
6351 %{
6352   match(Set dst src);
6353 
6354   ins_cost(95); // XXX
6355   format %{ "movss   $dst, $src\t# float stk" %}
6356   ins_encode %{
6357     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6358   %}
6359   ins_pipe(pipe_slow); // XXX
6360 %}
6361 
6362 instruct storeSSD(stackSlotD dst, regD src)
6363 %{
6364   match(Set dst src);
6365 
6366   ins_cost(95); // XXX
6367   format %{ "movsd   $dst, $src\t# double stk" %}
6368   ins_encode %{
6369     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6370   %}
6371   ins_pipe(pipe_slow); // XXX
6372 %}
6373 
6374 //----------BSWAP Instructions-------------------------------------------------
6375 instruct bytes_reverse_int(rRegI dst) %{
6376   match(Set dst (ReverseBytesI dst));
6377 
6378   format %{ "bswapl  $dst" %}
6379   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6380   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6381   ins_pipe( ialu_reg );
6382 %}
6383 
6384 instruct bytes_reverse_long(rRegL dst) %{
6385   match(Set dst (ReverseBytesL dst));
6386 
6387   format %{ "bswapq  $dst" %}
6388   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6389   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6390   ins_pipe( ialu_reg);
6391 %}
6392 
6393 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6394   match(Set dst (ReverseBytesUS dst));
6395   effect(KILL cr);
6396 
6397   format %{ "bswapl  $dst\n\t"
6398             "shrl    $dst,16\n\t" %}
6399   ins_encode %{
6400     __ bswapl($dst$$Register);
6401     __ shrl($dst$$Register, 16);
6402   %}
6403   ins_pipe( ialu_reg );
6404 %}
6405 
6406 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6407   match(Set dst (ReverseBytesS dst));
6408   effect(KILL cr);
6409 
6410   format %{ "bswapl  $dst\n\t"
6411             "sar     $dst,16\n\t" %}
6412   ins_encode %{
6413     __ bswapl($dst$$Register);
6414     __ sarl($dst$$Register, 16);
6415   %}
6416   ins_pipe( ialu_reg );
6417 %}
6418 
6419 //---------- Zeros Count Instructions ------------------------------------------
6420 
6421 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6422   predicate(UseCountLeadingZerosInstruction);
6423   match(Set dst (CountLeadingZerosI src));
6424   effect(KILL cr);
6425 
6426   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6427   ins_encode %{
6428     __ lzcntl($dst$$Register, $src$$Register);
6429   %}
6430   ins_pipe(ialu_reg);
6431 %}
6432 
6433 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6434   predicate(!UseCountLeadingZerosInstruction);
6435   match(Set dst (CountLeadingZerosI src));
6436   effect(KILL cr);
6437 
6438   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6439             "jnz     skip\n\t"
6440             "movl    $dst, -1\n"
6441       "skip:\n\t"
6442             "negl    $dst\n\t"
6443             "addl    $dst, 31" %}
6444   ins_encode %{
6445     Register Rdst = $dst$$Register;
6446     Register Rsrc = $src$$Register;
6447     Label skip;
6448     __ bsrl(Rdst, Rsrc);
6449     __ jccb(Assembler::notZero, skip);
6450     __ movl(Rdst, -1);
6451     __ bind(skip);
6452     __ negl(Rdst);
6453     __ addl(Rdst, BitsPerInt - 1);
6454   %}
6455   ins_pipe(ialu_reg);
6456 %}
6457 
6458 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6459   predicate(UseCountLeadingZerosInstruction);
6460   match(Set dst (CountLeadingZerosL src));
6461   effect(KILL cr);
6462 
6463   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6464   ins_encode %{
6465     __ lzcntq($dst$$Register, $src$$Register);
6466   %}
6467   ins_pipe(ialu_reg);
6468 %}
6469 
6470 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6471   predicate(!UseCountLeadingZerosInstruction);
6472   match(Set dst (CountLeadingZerosL src));
6473   effect(KILL cr);
6474 
6475   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6476             "jnz     skip\n\t"
6477             "movl    $dst, -1\n"
6478       "skip:\n\t"
6479             "negl    $dst\n\t"
6480             "addl    $dst, 63" %}
6481   ins_encode %{
6482     Register Rdst = $dst$$Register;
6483     Register Rsrc = $src$$Register;
6484     Label skip;
6485     __ bsrq(Rdst, Rsrc);
6486     __ jccb(Assembler::notZero, skip);
6487     __ movl(Rdst, -1);
6488     __ bind(skip);
6489     __ negl(Rdst);
6490     __ addl(Rdst, BitsPerLong - 1);
6491   %}
6492   ins_pipe(ialu_reg);
6493 %}
6494 
6495 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6496   predicate(UseCountTrailingZerosInstruction);
6497   match(Set dst (CountTrailingZerosI src));
6498   effect(KILL cr);
6499 
6500   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
6501   ins_encode %{
6502     __ tzcntl($dst$$Register, $src$$Register);
6503   %}
6504   ins_pipe(ialu_reg);
6505 %}
6506 
6507 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
6508   predicate(!UseCountTrailingZerosInstruction);
6509   match(Set dst (CountTrailingZerosI src));
6510   effect(KILL cr);
6511 
6512   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6513             "jnz     done\n\t"
6514             "movl    $dst, 32\n"
6515       "done:" %}
6516   ins_encode %{
6517     Register Rdst = $dst$$Register;
6518     Label done;
6519     __ bsfl(Rdst, $src$$Register);
6520     __ jccb(Assembler::notZero, done);
6521     __ movl(Rdst, BitsPerInt);
6522     __ bind(done);
6523   %}
6524   ins_pipe(ialu_reg);
6525 %}
6526 
6527 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6528   predicate(UseCountTrailingZerosInstruction);
6529   match(Set dst (CountTrailingZerosL src));
6530   effect(KILL cr);
6531 
6532   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
6533   ins_encode %{
6534     __ tzcntq($dst$$Register, $src$$Register);
6535   %}
6536   ins_pipe(ialu_reg);
6537 %}
6538 
6539 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
6540   predicate(!UseCountTrailingZerosInstruction);
6541   match(Set dst (CountTrailingZerosL src));
6542   effect(KILL cr);
6543 
6544   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6545             "jnz     done\n\t"
6546             "movl    $dst, 64\n"
6547       "done:" %}
6548   ins_encode %{
6549     Register Rdst = $dst$$Register;
6550     Label done;
6551     __ bsfq(Rdst, $src$$Register);
6552     __ jccb(Assembler::notZero, done);
6553     __ movl(Rdst, BitsPerLong);
6554     __ bind(done);
6555   %}
6556   ins_pipe(ialu_reg);
6557 %}
6558 
6559 
6560 //---------- Population Count Instructions -------------------------------------
6561 
6562 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6563   predicate(UsePopCountInstruction);
6564   match(Set dst (PopCountI src));
6565   effect(KILL cr);
6566 
6567   format %{ "popcnt  $dst, $src" %}
6568   ins_encode %{
6569     __ popcntl($dst$$Register, $src$$Register);
6570   %}
6571   ins_pipe(ialu_reg);
6572 %}
6573 
6574 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6575   predicate(UsePopCountInstruction);
6576   match(Set dst (PopCountI (LoadI mem)));
6577   effect(KILL cr);
6578 
6579   format %{ "popcnt  $dst, $mem" %}
6580   ins_encode %{
6581     __ popcntl($dst$$Register, $mem$$Address);
6582   %}
6583   ins_pipe(ialu_reg);
6584 %}
6585 
6586 // Note: Long.bitCount(long) returns an int.
6587 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6588   predicate(UsePopCountInstruction);
6589   match(Set dst (PopCountL src));
6590   effect(KILL cr);
6591 
6592   format %{ "popcnt  $dst, $src" %}
6593   ins_encode %{
6594     __ popcntq($dst$$Register, $src$$Register);
6595   %}
6596   ins_pipe(ialu_reg);
6597 %}
6598 
6599 // Note: Long.bitCount(long) returns an int.
6600 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6601   predicate(UsePopCountInstruction);
6602   match(Set dst (PopCountL (LoadL mem)));
6603   effect(KILL cr);
6604 
6605   format %{ "popcnt  $dst, $mem" %}
6606   ins_encode %{
6607     __ popcntq($dst$$Register, $mem$$Address);
6608   %}
6609   ins_pipe(ialu_reg);
6610 %}
6611 
6612 
6613 //----------MemBar Instructions-----------------------------------------------
6614 // Memory barrier flavors
6615 
6616 instruct membar_acquire()
6617 %{
6618   match(MemBarAcquire);
6619   match(LoadFence);
6620   ins_cost(0);
6621 
6622   size(0);
6623   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6624   ins_encode();
6625   ins_pipe(empty);
6626 %}
6627 
6628 instruct membar_acquire_lock()
6629 %{
6630   match(MemBarAcquireLock);
6631   ins_cost(0);
6632 
6633   size(0);
6634   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6635   ins_encode();
6636   ins_pipe(empty);
6637 %}
6638 
6639 instruct membar_release()
6640 %{
6641   match(MemBarRelease);
6642   match(StoreFence);
6643   ins_cost(0);
6644 
6645   size(0);
6646   format %{ "MEMBAR-release ! (empty encoding)" %}
6647   ins_encode();
6648   ins_pipe(empty);
6649 %}
6650 
6651 instruct membar_release_lock()
6652 %{
6653   match(MemBarReleaseLock);
6654   ins_cost(0);
6655 
6656   size(0);
6657   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6658   ins_encode();
6659   ins_pipe(empty);
6660 %}
6661 
6662 instruct membar_volatile(rFlagsReg cr) %{
6663   match(MemBarVolatile);
6664   effect(KILL cr);
6665   ins_cost(400);
6666 
6667   format %{
6668     $$template
6669     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6670   %}
6671   ins_encode %{
6672     __ membar(Assembler::StoreLoad);
6673   %}
6674   ins_pipe(pipe_slow);
6675 %}
6676 
6677 instruct unnecessary_membar_volatile()
6678 %{
6679   match(MemBarVolatile);
6680   predicate(Matcher::post_store_load_barrier(n));
6681   ins_cost(0);
6682 
6683   size(0);
6684   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6685   ins_encode();
6686   ins_pipe(empty);
6687 %}
6688 
6689 instruct membar_storestore() %{
6690   match(MemBarStoreStore);
6691   ins_cost(0);
6692 
6693   size(0);
6694   format %{ "MEMBAR-storestore (empty encoding)" %}
6695   ins_encode( );
6696   ins_pipe(empty);
6697 %}
6698 
6699 //----------Move Instructions--------------------------------------------------
6700 
6701 instruct castX2P(rRegP dst, rRegL src)
6702 %{
6703   match(Set dst (CastX2P src));
6704 
6705   format %{ "movq    $dst, $src\t# long->ptr" %}
6706   ins_encode %{
6707     if ($dst$$reg != $src$$reg) {
6708       __ movptr($dst$$Register, $src$$Register);
6709     }
6710   %}
6711   ins_pipe(ialu_reg_reg); // XXX
6712 %}
6713 
6714 instruct castP2X(rRegL dst, rRegP src)
6715 %{
6716   match(Set dst (CastP2X src));
6717 
6718   format %{ "movq    $dst, $src\t# ptr -> long" %}
6719   ins_encode %{
6720     if ($dst$$reg != $src$$reg) {
6721       __ movptr($dst$$Register, $src$$Register);
6722     }
6723   %}
6724   ins_pipe(ialu_reg_reg); // XXX
6725 %}
6726 
6727 // Convert oop into int for vectors alignment masking
6728 instruct convP2I(rRegI dst, rRegP src)
6729 %{
6730   match(Set dst (ConvL2I (CastP2X src)));
6731 
6732   format %{ "movl    $dst, $src\t# ptr -> int" %}
6733   ins_encode %{
6734     __ movl($dst$$Register, $src$$Register);
6735   %}
6736   ins_pipe(ialu_reg_reg); // XXX
6737 %}
6738 
6739 // Convert compressed oop into int for vectors alignment masking
6740 // in case of 32bit oops (heap < 4Gb).
6741 instruct convN2I(rRegI dst, rRegN src)
6742 %{
6743   predicate(Universe::narrow_oop_shift() == 0);
6744   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6745 
6746   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
6747   ins_encode %{
6748     __ movl($dst$$Register, $src$$Register);
6749   %}
6750   ins_pipe(ialu_reg_reg); // XXX
6751 %}
6752 
6753 // Convert oop pointer into compressed form
6754 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6755   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6756   match(Set dst (EncodeP src));
6757   effect(KILL cr);
6758   format %{ "encode_heap_oop $dst,$src" %}
6759   ins_encode %{
6760     Register s = $src$$Register;
6761     Register d = $dst$$Register;
6762     if (s != d) {
6763       __ movq(d, s);
6764     }
6765     __ encode_heap_oop(d);
6766   %}
6767   ins_pipe(ialu_reg_long);
6768 %}
6769 
6770 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6771   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6772   match(Set dst (EncodeP src));
6773   effect(KILL cr);
6774   format %{ "encode_heap_oop_not_null $dst,$src" %}
6775   ins_encode %{
6776     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6777   %}
6778   ins_pipe(ialu_reg_long);
6779 %}
6780 
6781 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6782   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
6783             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
6784   match(Set dst (DecodeN src));
6785   effect(KILL cr);
6786   format %{ "decode_heap_oop $dst,$src" %}
6787   ins_encode %{
6788     Register s = $src$$Register;
6789     Register d = $dst$$Register;
6790     if (s != d) {
6791       __ movq(d, s);
6792     }
6793     __ decode_heap_oop(d);
6794   %}
6795   ins_pipe(ialu_reg_long);
6796 %}
6797 
6798 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6799   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
6800             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
6801   match(Set dst (DecodeN src));
6802   effect(KILL cr);
6803   format %{ "decode_heap_oop_not_null $dst,$src" %}
6804   ins_encode %{
6805     Register s = $src$$Register;
6806     Register d = $dst$$Register;
6807     if (s != d) {
6808       __ decode_heap_oop_not_null(d, s);
6809     } else {
6810       __ decode_heap_oop_not_null(d);
6811     }
6812   %}
6813   ins_pipe(ialu_reg_long);
6814 %}
6815 
6816 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6817   match(Set dst (EncodePKlass src));
6818   effect(KILL cr);
6819   format %{ "encode_klass_not_null $dst,$src" %}
6820   ins_encode %{
6821     __ encode_klass_not_null($dst$$Register, $src$$Register);
6822   %}
6823   ins_pipe(ialu_reg_long);
6824 %}
6825 
6826 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6827   match(Set dst (DecodeNKlass src));
6828   effect(KILL cr);
6829   format %{ "decode_klass_not_null $dst,$src" %}
6830   ins_encode %{
6831     Register s = $src$$Register;
6832     Register d = $dst$$Register;
6833     if (s != d) {
6834       __ decode_klass_not_null(d, s);
6835     } else {
6836       __ decode_klass_not_null(d);
6837     }
6838   %}
6839   ins_pipe(ialu_reg_long);
6840 %}
6841 
6842 
6843 //----------Conditional Move---------------------------------------------------
6844 // Jump
6845 // dummy instruction for generating temp registers
6846 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6847   match(Jump (LShiftL switch_val shift));
6848   ins_cost(350);
6849   predicate(false);
6850   effect(TEMP dest);
6851 
6852   format %{ "leaq    $dest, [$constantaddress]\n\t"
6853             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6854   ins_encode %{
6855     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6856     // to do that and the compiler is using that register as one it can allocate.
6857     // So we build it all by hand.
6858     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6859     // ArrayAddress dispatch(table, index);
6860     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6861     __ lea($dest$$Register, $constantaddress);
6862     __ jmp(dispatch);
6863   %}
6864   ins_pipe(pipe_jmp);
6865 %}
6866 
6867 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6868   match(Jump (AddL (LShiftL switch_val shift) offset));
6869   ins_cost(350);
6870   effect(TEMP dest);
6871 
6872   format %{ "leaq    $dest, [$constantaddress]\n\t"
6873             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6874   ins_encode %{
6875     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6876     // to do that and the compiler is using that register as one it can allocate.
6877     // So we build it all by hand.
6878     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6879     // ArrayAddress dispatch(table, index);
6880     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6881     __ lea($dest$$Register, $constantaddress);
6882     __ jmp(dispatch);
6883   %}
6884   ins_pipe(pipe_jmp);
6885 %}
6886 
6887 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6888   match(Jump switch_val);
6889   ins_cost(350);
6890   effect(TEMP dest);
6891 
6892   format %{ "leaq    $dest, [$constantaddress]\n\t"
6893             "jmp     [$dest + $switch_val]\n\t" %}
6894   ins_encode %{
6895     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6896     // to do that and the compiler is using that register as one it can allocate.
6897     // So we build it all by hand.
6898     // Address index(noreg, switch_reg, Address::times_1);
6899     // ArrayAddress dispatch(table, index);
6900     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6901     __ lea($dest$$Register, $constantaddress);
6902     __ jmp(dispatch);
6903   %}
6904   ins_pipe(pipe_jmp);
6905 %}
6906 
6907 // Conditional move
6908 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6909 %{
6910   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6911 
6912   ins_cost(200); // XXX
6913   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6914   opcode(0x0F, 0x40);
6915   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6916   ins_pipe(pipe_cmov_reg);
6917 %}
6918 
6919 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6920   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6921 
6922   ins_cost(200); // XXX
6923   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6924   opcode(0x0F, 0x40);
6925   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6926   ins_pipe(pipe_cmov_reg);
6927 %}
6928 
6929 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6930   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6931   ins_cost(200);
6932   expand %{
6933     cmovI_regU(cop, cr, dst, src);
6934   %}
6935 %}
6936 
6937 // Conditional move
6938 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6939   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6940 
6941   ins_cost(250); // XXX
6942   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6943   opcode(0x0F, 0x40);
6944   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6945   ins_pipe(pipe_cmov_mem);
6946 %}
6947 
6948 // Conditional move
6949 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6950 %{
6951   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6952 
6953   ins_cost(250); // XXX
6954   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6955   opcode(0x0F, 0x40);
6956   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6957   ins_pipe(pipe_cmov_mem);
6958 %}
6959 
6960 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6961   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6962   ins_cost(250);
6963   expand %{
6964     cmovI_memU(cop, cr, dst, src);
6965   %}
6966 %}
6967 
6968 // Conditional move
6969 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6970 %{
6971   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6972 
6973   ins_cost(200); // XXX
6974   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6975   opcode(0x0F, 0x40);
6976   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6977   ins_pipe(pipe_cmov_reg);
6978 %}
6979 
6980 // Conditional move
6981 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6982 %{
6983   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6984 
6985   ins_cost(200); // XXX
6986   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6987   opcode(0x0F, 0x40);
6988   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6989   ins_pipe(pipe_cmov_reg);
6990 %}
6991 
6992 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6993   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6994   ins_cost(200);
6995   expand %{
6996     cmovN_regU(cop, cr, dst, src);
6997   %}
6998 %}
6999 
7000 // Conditional move
7001 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
7002 %{
7003   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7004 
7005   ins_cost(200); // XXX
7006   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
7007   opcode(0x0F, 0x40);
7008   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7009   ins_pipe(pipe_cmov_reg);  // XXX
7010 %}
7011 
7012 // Conditional move
7013 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
7014 %{
7015   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7016 
7017   ins_cost(200); // XXX
7018   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
7019   opcode(0x0F, 0x40);
7020   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7021   ins_pipe(pipe_cmov_reg); // XXX
7022 %}
7023 
7024 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
7025   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7026   ins_cost(200);
7027   expand %{
7028     cmovP_regU(cop, cr, dst, src);
7029   %}
7030 %}
7031 
7032 // DISABLED: Requires the ADLC to emit a bottom_type call that
7033 // correctly meets the two pointer arguments; one is an incoming
7034 // register but the other is a memory operand.  ALSO appears to
7035 // be buggy with implicit null checks.
7036 //
7037 //// Conditional move
7038 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
7039 //%{
7040 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7041 //  ins_cost(250);
7042 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7043 //  opcode(0x0F,0x40);
7044 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7045 //  ins_pipe( pipe_cmov_mem );
7046 //%}
7047 //
7048 //// Conditional move
7049 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7050 //%{
7051 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7052 //  ins_cost(250);
7053 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7054 //  opcode(0x0F,0x40);
7055 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7056 //  ins_pipe( pipe_cmov_mem );
7057 //%}
7058 
7059 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7060 %{
7061   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7062 
7063   ins_cost(200); // XXX
7064   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7065   opcode(0x0F, 0x40);
7066   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7067   ins_pipe(pipe_cmov_reg);  // XXX
7068 %}
7069 
7070 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7071 %{
7072   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7073 
7074   ins_cost(200); // XXX
7075   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7076   opcode(0x0F, 0x40);
7077   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7078   ins_pipe(pipe_cmov_mem);  // XXX
7079 %}
7080 
7081 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7082 %{
7083   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7084 
7085   ins_cost(200); // XXX
7086   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7087   opcode(0x0F, 0x40);
7088   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7089   ins_pipe(pipe_cmov_reg); // XXX
7090 %}
7091 
7092 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7093   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7094   ins_cost(200);
7095   expand %{
7096     cmovL_regU(cop, cr, dst, src);
7097   %}
7098 %}
7099 
7100 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7101 %{
7102   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7103 
7104   ins_cost(200); // XXX
7105   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7106   opcode(0x0F, 0x40);
7107   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7108   ins_pipe(pipe_cmov_mem); // XXX
7109 %}
7110 
7111 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7112   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7113   ins_cost(200);
7114   expand %{
7115     cmovL_memU(cop, cr, dst, src);
7116   %}
7117 %}
7118 
7119 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7120 %{
7121   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7122 
7123   ins_cost(200); // XXX
7124   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7125             "movss     $dst, $src\n"
7126     "skip:" %}
7127   ins_encode %{
7128     Label Lskip;
7129     // Invert sense of branch from sense of CMOV
7130     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7131     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7132     __ bind(Lskip);
7133   %}
7134   ins_pipe(pipe_slow);
7135 %}
7136 
7137 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7138 // %{
7139 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7140 
7141 //   ins_cost(200); // XXX
7142 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7143 //             "movss     $dst, $src\n"
7144 //     "skip:" %}
7145 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7146 //   ins_pipe(pipe_slow);
7147 // %}
7148 
7149 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7150 %{
7151   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7152 
7153   ins_cost(200); // XXX
7154   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7155             "movss     $dst, $src\n"
7156     "skip:" %}
7157   ins_encode %{
7158     Label Lskip;
7159     // Invert sense of branch from sense of CMOV
7160     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7161     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7162     __ bind(Lskip);
7163   %}
7164   ins_pipe(pipe_slow);
7165 %}
7166 
7167 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7168   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7169   ins_cost(200);
7170   expand %{
7171     cmovF_regU(cop, cr, dst, src);
7172   %}
7173 %}
7174 
7175 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7176 %{
7177   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7178 
7179   ins_cost(200); // XXX
7180   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7181             "movsd     $dst, $src\n"
7182     "skip:" %}
7183   ins_encode %{
7184     Label Lskip;
7185     // Invert sense of branch from sense of CMOV
7186     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7187     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7188     __ bind(Lskip);
7189   %}
7190   ins_pipe(pipe_slow);
7191 %}
7192 
7193 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7194 %{
7195   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7196 
7197   ins_cost(200); // XXX
7198   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7199             "movsd     $dst, $src\n"
7200     "skip:" %}
7201   ins_encode %{
7202     Label Lskip;
7203     // Invert sense of branch from sense of CMOV
7204     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7205     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7206     __ bind(Lskip);
7207   %}
7208   ins_pipe(pipe_slow);
7209 %}
7210 
7211 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7212   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7213   ins_cost(200);
7214   expand %{
7215     cmovD_regU(cop, cr, dst, src);
7216   %}
7217 %}
7218 
7219 //----------Arithmetic Instructions--------------------------------------------
7220 //----------Addition Instructions----------------------------------------------
7221 
7222 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7223 %{
7224   match(Set dst (AddI dst src));
7225   effect(KILL cr);
7226 
7227   format %{ "addl    $dst, $src\t# int" %}
7228   opcode(0x03);
7229   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7230   ins_pipe(ialu_reg_reg);
7231 %}
7232 
7233 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7234 %{
7235   match(Set dst (AddI dst src));
7236   effect(KILL cr);
7237 
7238   format %{ "addl    $dst, $src\t# int" %}
7239   opcode(0x81, 0x00); /* /0 id */
7240   ins_encode(OpcSErm(dst, src), Con8or32(src));
7241   ins_pipe( ialu_reg );
7242 %}
7243 
7244 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7245 %{
7246   match(Set dst (AddI dst (LoadI src)));
7247   effect(KILL cr);
7248 
7249   ins_cost(125); // XXX
7250   format %{ "addl    $dst, $src\t# int" %}
7251   opcode(0x03);
7252   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7253   ins_pipe(ialu_reg_mem);
7254 %}
7255 
7256 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7257 %{
7258   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7259   effect(KILL cr);
7260 
7261   ins_cost(150); // XXX
7262   format %{ "addl    $dst, $src\t# int" %}
7263   opcode(0x01); /* Opcode 01 /r */
7264   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7265   ins_pipe(ialu_mem_reg);
7266 %}
7267 
7268 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7269 %{
7270   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7271   effect(KILL cr);
7272 
7273   ins_cost(125); // XXX
7274   format %{ "addl    $dst, $src\t# int" %}
7275   opcode(0x81); /* Opcode 81 /0 id */
7276   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7277   ins_pipe(ialu_mem_imm);
7278 %}
7279 
7280 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7281 %{
7282   predicate(UseIncDec);
7283   match(Set dst (AddI dst src));
7284   effect(KILL cr);
7285 
7286   format %{ "incl    $dst\t# int" %}
7287   opcode(0xFF, 0x00); // FF /0
7288   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7289   ins_pipe(ialu_reg);
7290 %}
7291 
7292 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7293 %{
7294   predicate(UseIncDec);
7295   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7296   effect(KILL cr);
7297 
7298   ins_cost(125); // XXX
7299   format %{ "incl    $dst\t# int" %}
7300   opcode(0xFF); /* Opcode FF /0 */
7301   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7302   ins_pipe(ialu_mem_imm);
7303 %}
7304 
7305 // XXX why does that use AddI
7306 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7307 %{
7308   predicate(UseIncDec);
7309   match(Set dst (AddI dst src));
7310   effect(KILL cr);
7311 
7312   format %{ "decl    $dst\t# int" %}
7313   opcode(0xFF, 0x01); // FF /1
7314   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7315   ins_pipe(ialu_reg);
7316 %}
7317 
7318 // XXX why does that use AddI
7319 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7320 %{
7321   predicate(UseIncDec);
7322   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7323   effect(KILL cr);
7324 
7325   ins_cost(125); // XXX
7326   format %{ "decl    $dst\t# int" %}
7327   opcode(0xFF); /* Opcode FF /1 */
7328   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7329   ins_pipe(ialu_mem_imm);
7330 %}
7331 
7332 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7333 %{
7334   match(Set dst (AddI src0 src1));
7335 
7336   ins_cost(110);
7337   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7338   opcode(0x8D); /* 0x8D /r */
7339   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7340   ins_pipe(ialu_reg_reg);
7341 %}
7342 
7343 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7344 %{
7345   match(Set dst (AddL dst src));
7346   effect(KILL cr);
7347 
7348   format %{ "addq    $dst, $src\t# long" %}
7349   opcode(0x03);
7350   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7351   ins_pipe(ialu_reg_reg);
7352 %}
7353 
7354 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7355 %{
7356   match(Set dst (AddL dst src));
7357   effect(KILL cr);
7358 
7359   format %{ "addq    $dst, $src\t# long" %}
7360   opcode(0x81, 0x00); /* /0 id */
7361   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7362   ins_pipe( ialu_reg );
7363 %}
7364 
7365 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7366 %{
7367   match(Set dst (AddL dst (LoadL src)));
7368   effect(KILL cr);
7369 
7370   ins_cost(125); // XXX
7371   format %{ "addq    $dst, $src\t# long" %}
7372   opcode(0x03);
7373   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7374   ins_pipe(ialu_reg_mem);
7375 %}
7376 
7377 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7378 %{
7379   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7380   effect(KILL cr);
7381 
7382   ins_cost(150); // XXX
7383   format %{ "addq    $dst, $src\t# long" %}
7384   opcode(0x01); /* Opcode 01 /r */
7385   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7386   ins_pipe(ialu_mem_reg);
7387 %}
7388 
7389 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7390 %{
7391   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7392   effect(KILL cr);
7393 
7394   ins_cost(125); // XXX
7395   format %{ "addq    $dst, $src\t# long" %}
7396   opcode(0x81); /* Opcode 81 /0 id */
7397   ins_encode(REX_mem_wide(dst),
7398              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7399   ins_pipe(ialu_mem_imm);
7400 %}
7401 
7402 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7403 %{
7404   predicate(UseIncDec);
7405   match(Set dst (AddL dst src));
7406   effect(KILL cr);
7407 
7408   format %{ "incq    $dst\t# long" %}
7409   opcode(0xFF, 0x00); // FF /0
7410   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7411   ins_pipe(ialu_reg);
7412 %}
7413 
7414 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7415 %{
7416   predicate(UseIncDec);
7417   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7418   effect(KILL cr);
7419 
7420   ins_cost(125); // XXX
7421   format %{ "incq    $dst\t# long" %}
7422   opcode(0xFF); /* Opcode FF /0 */
7423   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7424   ins_pipe(ialu_mem_imm);
7425 %}
7426 
7427 // XXX why does that use AddL
7428 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7429 %{
7430   predicate(UseIncDec);
7431   match(Set dst (AddL dst src));
7432   effect(KILL cr);
7433 
7434   format %{ "decq    $dst\t# long" %}
7435   opcode(0xFF, 0x01); // FF /1
7436   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7437   ins_pipe(ialu_reg);
7438 %}
7439 
7440 // XXX why does that use AddL
7441 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7442 %{
7443   predicate(UseIncDec);
7444   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7445   effect(KILL cr);
7446 
7447   ins_cost(125); // XXX
7448   format %{ "decq    $dst\t# long" %}
7449   opcode(0xFF); /* Opcode FF /1 */
7450   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7451   ins_pipe(ialu_mem_imm);
7452 %}
7453 
7454 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7455 %{
7456   match(Set dst (AddL src0 src1));
7457 
7458   ins_cost(110);
7459   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7460   opcode(0x8D); /* 0x8D /r */
7461   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7462   ins_pipe(ialu_reg_reg);
7463 %}
7464 
7465 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7466 %{
7467   match(Set dst (AddP dst src));
7468   effect(KILL cr);
7469 
7470   format %{ "addq    $dst, $src\t# ptr" %}
7471   opcode(0x03);
7472   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7473   ins_pipe(ialu_reg_reg);
7474 %}
7475 
7476 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7477 %{
7478   match(Set dst (AddP dst src));
7479   effect(KILL cr);
7480 
7481   format %{ "addq    $dst, $src\t# ptr" %}
7482   opcode(0x81, 0x00); /* /0 id */
7483   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7484   ins_pipe( ialu_reg );
7485 %}
7486 
7487 // XXX addP mem ops ????
7488 
7489 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7490 %{
7491   match(Set dst (AddP src0 src1));
7492 
7493   ins_cost(110);
7494   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7495   opcode(0x8D); /* 0x8D /r */
7496   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7497   ins_pipe(ialu_reg_reg);
7498 %}
7499 
7500 instruct checkCastPP(rRegP dst)
7501 %{
7502   match(Set dst (CheckCastPP dst));
7503 
7504   size(0);
7505   format %{ "# checkcastPP of $dst" %}
7506   ins_encode(/* empty encoding */);
7507   ins_pipe(empty);
7508 %}
7509 
7510 instruct castPP(rRegP dst)
7511 %{
7512   match(Set dst (CastPP dst));
7513 
7514   size(0);
7515   format %{ "# castPP of $dst" %}
7516   ins_encode(/* empty encoding */);
7517   ins_pipe(empty);
7518 %}
7519 
7520 instruct castII(rRegI dst)
7521 %{
7522   match(Set dst (CastII dst));
7523 
7524   size(0);
7525   format %{ "# castII of $dst" %}
7526   ins_encode(/* empty encoding */);
7527   ins_cost(0);
7528   ins_pipe(empty);
7529 %}
7530 
7531 // LoadP-locked same as a regular LoadP when used with compare-swap
7532 instruct loadPLocked(rRegP dst, memory mem)
7533 %{
7534   match(Set dst (LoadPLocked mem));
7535 
7536   ins_cost(125); // XXX
7537   format %{ "movq    $dst, $mem\t# ptr locked" %}
7538   opcode(0x8B);
7539   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7540   ins_pipe(ialu_reg_mem); // XXX
7541 %}
7542 
7543 // Conditional-store of the updated heap-top.
7544 // Used during allocation of the shared heap.
7545 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7546 
7547 instruct storePConditional(memory heap_top_ptr,
7548                            rax_RegP oldval, rRegP newval,
7549                            rFlagsReg cr)
7550 %{
7551   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7552 
7553   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7554             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7555   opcode(0x0F, 0xB1);
7556   ins_encode(lock_prefix,
7557              REX_reg_mem_wide(newval, heap_top_ptr),
7558              OpcP, OpcS,
7559              reg_mem(newval, heap_top_ptr));
7560   ins_pipe(pipe_cmpxchg);
7561 %}
7562 
7563 // Conditional-store of an int value.
7564 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7565 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7566 %{
7567   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7568   effect(KILL oldval);
7569 
7570   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7571   opcode(0x0F, 0xB1);
7572   ins_encode(lock_prefix,
7573              REX_reg_mem(newval, mem),
7574              OpcP, OpcS,
7575              reg_mem(newval, mem));
7576   ins_pipe(pipe_cmpxchg);
7577 %}
7578 
7579 // Conditional-store of a long value.
7580 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7581 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7582 %{
7583   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7584   effect(KILL oldval);
7585 
7586   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7587   opcode(0x0F, 0xB1);
7588   ins_encode(lock_prefix,
7589              REX_reg_mem_wide(newval, mem),
7590              OpcP, OpcS,
7591              reg_mem(newval, mem));
7592   ins_pipe(pipe_cmpxchg);
7593 %}
7594 
7595 
7596 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7597 instruct compareAndSwapP(rRegI res,
7598                          memory mem_ptr,
7599                          rax_RegP oldval, rRegP newval,
7600                          rFlagsReg cr)
7601 %{
7602   predicate(VM_Version::supports_cx8());
7603   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7604   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7605   effect(KILL cr, KILL oldval);
7606 
7607   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7608             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7609             "sete    $res\n\t"
7610             "movzbl  $res, $res" %}
7611   opcode(0x0F, 0xB1);
7612   ins_encode(lock_prefix,
7613              REX_reg_mem_wide(newval, mem_ptr),
7614              OpcP, OpcS,
7615              reg_mem(newval, mem_ptr),
7616              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7617              REX_reg_breg(res, res), // movzbl
7618              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7619   ins_pipe( pipe_cmpxchg );
7620 %}
7621 
7622 instruct compareAndSwapL(rRegI res,
7623                          memory mem_ptr,
7624                          rax_RegL oldval, rRegL newval,
7625                          rFlagsReg cr)
7626 %{
7627   predicate(VM_Version::supports_cx8());
7628   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7629   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7630   effect(KILL cr, KILL oldval);
7631 
7632   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7633             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7634             "sete    $res\n\t"
7635             "movzbl  $res, $res" %}
7636   opcode(0x0F, 0xB1);
7637   ins_encode(lock_prefix,
7638              REX_reg_mem_wide(newval, mem_ptr),
7639              OpcP, OpcS,
7640              reg_mem(newval, mem_ptr),
7641              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7642              REX_reg_breg(res, res), // movzbl
7643              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7644   ins_pipe( pipe_cmpxchg );
7645 %}
7646 
7647 instruct compareAndSwapI(rRegI res,
7648                          memory mem_ptr,
7649                          rax_RegI oldval, rRegI newval,
7650                          rFlagsReg cr)
7651 %{
7652   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7653   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7654   effect(KILL cr, KILL oldval);
7655 
7656   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7657             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7658             "sete    $res\n\t"
7659             "movzbl  $res, $res" %}
7660   opcode(0x0F, 0xB1);
7661   ins_encode(lock_prefix,
7662              REX_reg_mem(newval, mem_ptr),
7663              OpcP, OpcS,
7664              reg_mem(newval, mem_ptr),
7665              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7666              REX_reg_breg(res, res), // movzbl
7667              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7668   ins_pipe( pipe_cmpxchg );
7669 %}
7670 
7671 instruct compareAndSwapB(rRegI res,
7672                          memory mem_ptr,
7673                          rax_RegI oldval, rRegI newval,
7674                          rFlagsReg cr)
7675 %{
7676   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7677   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7678   effect(KILL cr, KILL oldval);
7679 
7680   format %{ "cmpxchgb $mem_ptr,$newval\t# "
7681             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7682             "sete    $res\n\t"
7683             "movzbl  $res, $res" %}
7684   opcode(0x0F, 0xB0);
7685   ins_encode(lock_prefix,
7686              REX_breg_mem(newval, mem_ptr),
7687              OpcP, OpcS,
7688              reg_mem(newval, mem_ptr),
7689              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7690              REX_reg_breg(res, res), // movzbl
7691              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7692   ins_pipe( pipe_cmpxchg );
7693 %}
7694 
7695 instruct compareAndSwapS(rRegI res,
7696                          memory mem_ptr,
7697                          rax_RegI oldval, rRegI newval,
7698                          rFlagsReg cr)
7699 %{
7700   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7701   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7702   effect(KILL cr, KILL oldval);
7703 
7704   format %{ "cmpxchgw $mem_ptr,$newval\t# "
7705             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7706             "sete    $res\n\t"
7707             "movzbl  $res, $res" %}
7708   opcode(0x0F, 0xB1);
7709   ins_encode(lock_prefix,
7710              SizePrefix,
7711              REX_reg_mem(newval, mem_ptr),
7712              OpcP, OpcS,
7713              reg_mem(newval, mem_ptr),
7714              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7715              REX_reg_breg(res, res), // movzbl
7716              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7717   ins_pipe( pipe_cmpxchg );
7718 %}
7719 
7720 instruct compareAndSwapN(rRegI res,
7721                           memory mem_ptr,
7722                           rax_RegN oldval, rRegN newval,
7723                           rFlagsReg cr) %{
7724   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7725   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
7726   effect(KILL cr, KILL oldval);
7727 
7728   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7729             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7730             "sete    $res\n\t"
7731             "movzbl  $res, $res" %}
7732   opcode(0x0F, 0xB1);
7733   ins_encode(lock_prefix,
7734              REX_reg_mem(newval, mem_ptr),
7735              OpcP, OpcS,
7736              reg_mem(newval, mem_ptr),
7737              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7738              REX_reg_breg(res, res), // movzbl
7739              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7740   ins_pipe( pipe_cmpxchg );
7741 %}
7742 
7743 instruct compareAndExchangeB(
7744                          memory mem_ptr,
7745                          rax_RegI oldval, rRegI newval,
7746                          rFlagsReg cr)
7747 %{
7748   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7749   effect(KILL cr);
7750 
7751   format %{ "cmpxchgb $mem_ptr,$newval\t# "
7752             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7753   opcode(0x0F, 0xB0);
7754   ins_encode(lock_prefix,
7755              REX_breg_mem(newval, mem_ptr),
7756              OpcP, OpcS,
7757              reg_mem(newval, mem_ptr) // lock cmpxchg
7758              );
7759   ins_pipe( pipe_cmpxchg );
7760 %}
7761 
7762 instruct compareAndExchangeS(
7763                          memory mem_ptr,
7764                          rax_RegI oldval, rRegI newval,
7765                          rFlagsReg cr)
7766 %{
7767   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7768   effect(KILL cr);
7769 
7770   format %{ "cmpxchgw $mem_ptr,$newval\t# "
7771             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7772   opcode(0x0F, 0xB1);
7773   ins_encode(lock_prefix,
7774              SizePrefix,
7775              REX_reg_mem(newval, mem_ptr),
7776              OpcP, OpcS,
7777              reg_mem(newval, mem_ptr) // lock cmpxchg
7778              );
7779   ins_pipe( pipe_cmpxchg );
7780 %}
7781 
7782 instruct compareAndExchangeI(
7783                          memory mem_ptr,
7784                          rax_RegI oldval, rRegI newval,
7785                          rFlagsReg cr)
7786 %{
7787   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7788   effect(KILL cr);
7789 
7790   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7791             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7792   opcode(0x0F, 0xB1);
7793   ins_encode(lock_prefix,
7794              REX_reg_mem(newval, mem_ptr),
7795              OpcP, OpcS,
7796              reg_mem(newval, mem_ptr) // lock cmpxchg
7797              );
7798   ins_pipe( pipe_cmpxchg );
7799 %}
7800 
7801 instruct compareAndExchangeL(
7802                          memory mem_ptr,
7803                          rax_RegL oldval, rRegL newval,
7804                          rFlagsReg cr)
7805 %{
7806   predicate(VM_Version::supports_cx8());
7807   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7808   effect(KILL cr);
7809 
7810   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7811             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7812   opcode(0x0F, 0xB1);
7813   ins_encode(lock_prefix,
7814              REX_reg_mem_wide(newval, mem_ptr),
7815              OpcP, OpcS,
7816              reg_mem(newval, mem_ptr)  // lock cmpxchg
7817             );
7818   ins_pipe( pipe_cmpxchg );
7819 %}
7820 
7821 instruct compareAndExchangeN(
7822                           memory mem_ptr,
7823                           rax_RegN oldval, rRegN newval,
7824                           rFlagsReg cr) %{
7825   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
7826   effect(KILL cr);
7827 
7828   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7829             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
7830   opcode(0x0F, 0xB1);
7831   ins_encode(lock_prefix,
7832              REX_reg_mem(newval, mem_ptr),
7833              OpcP, OpcS,
7834              reg_mem(newval, mem_ptr)  // lock cmpxchg
7835           );
7836   ins_pipe( pipe_cmpxchg );
7837 %}
7838 
7839 instruct compareAndExchangeP(
7840                          memory mem_ptr,
7841                          rax_RegP oldval, rRegP newval,
7842                          rFlagsReg cr)
7843 %{
7844   predicate(VM_Version::supports_cx8());
7845   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7846   effect(KILL cr);
7847 
7848   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7849             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
7850   opcode(0x0F, 0xB1);
7851   ins_encode(lock_prefix,
7852              REX_reg_mem_wide(newval, mem_ptr),
7853              OpcP, OpcS,
7854              reg_mem(newval, mem_ptr)  // lock cmpxchg
7855           );
7856   ins_pipe( pipe_cmpxchg );
7857 %}
7858 
7859 instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7860   predicate(n->as_LoadStore()->result_not_used());
7861   match(Set dummy (GetAndAddB mem add));
7862   effect(KILL cr);
7863   format %{ "ADDB  [$mem],$add" %}
7864   ins_encode %{
7865     __ lock();
7866     __ addb($mem$$Address, $add$$constant);
7867   %}
7868   ins_pipe( pipe_cmpxchg );
7869 %}
7870 
7871 instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
7872   match(Set newval (GetAndAddB mem newval));
7873   effect(KILL cr);
7874   format %{ "XADDB  [$mem],$newval" %}
7875   ins_encode %{
7876     __ lock();
7877     __ xaddb($mem$$Address, $newval$$Register);
7878   %}
7879   ins_pipe( pipe_cmpxchg );
7880 %}
7881 
7882 instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7883   predicate(n->as_LoadStore()->result_not_used());
7884   match(Set dummy (GetAndAddS mem add));
7885   effect(KILL cr);
7886   format %{ "ADDW  [$mem],$add" %}
7887   ins_encode %{
7888     __ lock();
7889     __ addw($mem$$Address, $add$$constant);
7890   %}
7891   ins_pipe( pipe_cmpxchg );
7892 %}
7893 
7894 instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
7895   match(Set newval (GetAndAddS mem newval));
7896   effect(KILL cr);
7897   format %{ "XADDW  [$mem],$newval" %}
7898   ins_encode %{
7899     __ lock();
7900     __ xaddw($mem$$Address, $newval$$Register);
7901   %}
7902   ins_pipe( pipe_cmpxchg );
7903 %}
7904 
7905 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7906   predicate(n->as_LoadStore()->result_not_used());
7907   match(Set dummy (GetAndAddI mem add));
7908   effect(KILL cr);
7909   format %{ "ADDL  [$mem],$add" %}
7910   ins_encode %{
7911     __ lock();
7912     __ addl($mem$$Address, $add$$constant);
7913   %}
7914   ins_pipe( pipe_cmpxchg );
7915 %}
7916 
7917 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
7918   match(Set newval (GetAndAddI mem newval));
7919   effect(KILL cr);
7920   format %{ "XADDL  [$mem],$newval" %}
7921   ins_encode %{
7922     __ lock();
7923     __ xaddl($mem$$Address, $newval$$Register);
7924   %}
7925   ins_pipe( pipe_cmpxchg );
7926 %}
7927 
7928 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
7929   predicate(n->as_LoadStore()->result_not_used());
7930   match(Set dummy (GetAndAddL mem add));
7931   effect(KILL cr);
7932   format %{ "ADDQ  [$mem],$add" %}
7933   ins_encode %{
7934     __ lock();
7935     __ addq($mem$$Address, $add$$constant);
7936   %}
7937   ins_pipe( pipe_cmpxchg );
7938 %}
7939 
7940 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
7941   match(Set newval (GetAndAddL mem newval));
7942   effect(KILL cr);
7943   format %{ "XADDQ  [$mem],$newval" %}
7944   ins_encode %{
7945     __ lock();
7946     __ xaddq($mem$$Address, $newval$$Register);
7947   %}
7948   ins_pipe( pipe_cmpxchg );
7949 %}
7950 
7951 instruct xchgB( memory mem, rRegI newval) %{
7952   match(Set newval (GetAndSetB mem newval));
7953   format %{ "XCHGB  $newval,[$mem]" %}
7954   ins_encode %{
7955     __ xchgb($newval$$Register, $mem$$Address);
7956   %}
7957   ins_pipe( pipe_cmpxchg );
7958 %}
7959 
7960 instruct xchgS( memory mem, rRegI newval) %{
7961   match(Set newval (GetAndSetS mem newval));
7962   format %{ "XCHGW  $newval,[$mem]" %}
7963   ins_encode %{
7964     __ xchgw($newval$$Register, $mem$$Address);
7965   %}
7966   ins_pipe( pipe_cmpxchg );
7967 %}
7968 
7969 instruct xchgI( memory mem, rRegI newval) %{
7970   match(Set newval (GetAndSetI mem newval));
7971   format %{ "XCHGL  $newval,[$mem]" %}
7972   ins_encode %{
7973     __ xchgl($newval$$Register, $mem$$Address);
7974   %}
7975   ins_pipe( pipe_cmpxchg );
7976 %}
7977 
7978 instruct xchgL( memory mem, rRegL newval) %{
7979   match(Set newval (GetAndSetL mem newval));
7980   format %{ "XCHGL  $newval,[$mem]" %}
7981   ins_encode %{
7982     __ xchgq($newval$$Register, $mem$$Address);
7983   %}
7984   ins_pipe( pipe_cmpxchg );
7985 %}
7986 
7987 instruct xchgP( memory mem, rRegP newval) %{
7988   match(Set newval (GetAndSetP mem newval));
7989   format %{ "XCHGQ  $newval,[$mem]" %}
7990   ins_encode %{
7991     __ xchgq($newval$$Register, $mem$$Address);
7992   %}
7993   ins_pipe( pipe_cmpxchg );
7994 %}
7995 
7996 instruct xchgN( memory mem, rRegN newval) %{
7997   match(Set newval (GetAndSetN mem newval));
7998   format %{ "XCHGL  $newval,$mem]" %}
7999   ins_encode %{
8000     __ xchgl($newval$$Register, $mem$$Address);
8001   %}
8002   ins_pipe( pipe_cmpxchg );
8003 %}
8004 
8005 //----------Subtraction Instructions-------------------------------------------
8006 
8007 // Integer Subtraction Instructions
8008 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8009 %{
8010   match(Set dst (SubI dst src));
8011   effect(KILL cr);
8012 
8013   format %{ "subl    $dst, $src\t# int" %}
8014   opcode(0x2B);
8015   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
8016   ins_pipe(ialu_reg_reg);
8017 %}
8018 
8019 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
8020 %{
8021   match(Set dst (SubI dst src));
8022   effect(KILL cr);
8023 
8024   format %{ "subl    $dst, $src\t# int" %}
8025   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8026   ins_encode(OpcSErm(dst, src), Con8or32(src));
8027   ins_pipe(ialu_reg);
8028 %}
8029 
8030 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
8031 %{
8032   match(Set dst (SubI dst (LoadI src)));
8033   effect(KILL cr);
8034 
8035   ins_cost(125);
8036   format %{ "subl    $dst, $src\t# int" %}
8037   opcode(0x2B);
8038   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
8039   ins_pipe(ialu_reg_mem);
8040 %}
8041 
8042 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
8043 %{
8044   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8045   effect(KILL cr);
8046 
8047   ins_cost(150);
8048   format %{ "subl    $dst, $src\t# int" %}
8049   opcode(0x29); /* Opcode 29 /r */
8050   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8051   ins_pipe(ialu_mem_reg);
8052 %}
8053 
8054 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8055 %{
8056   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8057   effect(KILL cr);
8058 
8059   ins_cost(125); // XXX
8060   format %{ "subl    $dst, $src\t# int" %}
8061   opcode(0x81); /* Opcode 81 /5 id */
8062   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8063   ins_pipe(ialu_mem_imm);
8064 %}
8065 
8066 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8067 %{
8068   match(Set dst (SubL dst src));
8069   effect(KILL cr);
8070 
8071   format %{ "subq    $dst, $src\t# long" %}
8072   opcode(0x2B);
8073   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8074   ins_pipe(ialu_reg_reg);
8075 %}
8076 
8077 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8078 %{
8079   match(Set dst (SubL dst src));
8080   effect(KILL cr);
8081 
8082   format %{ "subq    $dst, $src\t# long" %}
8083   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8084   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8085   ins_pipe(ialu_reg);
8086 %}
8087 
8088 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8089 %{
8090   match(Set dst (SubL dst (LoadL src)));
8091   effect(KILL cr);
8092 
8093   ins_cost(125);
8094   format %{ "subq    $dst, $src\t# long" %}
8095   opcode(0x2B);
8096   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8097   ins_pipe(ialu_reg_mem);
8098 %}
8099 
8100 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8101 %{
8102   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8103   effect(KILL cr);
8104 
8105   ins_cost(150);
8106   format %{ "subq    $dst, $src\t# long" %}
8107   opcode(0x29); /* Opcode 29 /r */
8108   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8109   ins_pipe(ialu_mem_reg);
8110 %}
8111 
8112 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8113 %{
8114   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8115   effect(KILL cr);
8116 
8117   ins_cost(125); // XXX
8118   format %{ "subq    $dst, $src\t# long" %}
8119   opcode(0x81); /* Opcode 81 /5 id */
8120   ins_encode(REX_mem_wide(dst),
8121              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8122   ins_pipe(ialu_mem_imm);
8123 %}
8124 
8125 // Subtract from a pointer
8126 // XXX hmpf???
8127 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8128 %{
8129   match(Set dst (AddP dst (SubI zero src)));
8130   effect(KILL cr);
8131 
8132   format %{ "subq    $dst, $src\t# ptr - int" %}
8133   opcode(0x2B);
8134   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8135   ins_pipe(ialu_reg_reg);
8136 %}
8137 
8138 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8139 %{
8140   match(Set dst (SubI zero dst));
8141   effect(KILL cr);
8142 
8143   format %{ "negl    $dst\t# int" %}
8144   opcode(0xF7, 0x03);  // Opcode F7 /3
8145   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8146   ins_pipe(ialu_reg);
8147 %}
8148 
8149 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8150 %{
8151   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8152   effect(KILL cr);
8153 
8154   format %{ "negl    $dst\t# int" %}
8155   opcode(0xF7, 0x03);  // Opcode F7 /3
8156   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8157   ins_pipe(ialu_reg);
8158 %}
8159 
8160 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8161 %{
8162   match(Set dst (SubL zero dst));
8163   effect(KILL cr);
8164 
8165   format %{ "negq    $dst\t# long" %}
8166   opcode(0xF7, 0x03);  // Opcode F7 /3
8167   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8168   ins_pipe(ialu_reg);
8169 %}
8170 
8171 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8172 %{
8173   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8174   effect(KILL cr);
8175 
8176   format %{ "negq    $dst\t# long" %}
8177   opcode(0xF7, 0x03);  // Opcode F7 /3
8178   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8179   ins_pipe(ialu_reg);
8180 %}
8181 
8182 //----------Multiplication/Division Instructions-------------------------------
8183 // Integer Multiplication Instructions
8184 // Multiply Register
8185 
8186 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8187 %{
8188   match(Set dst (MulI dst src));
8189   effect(KILL cr);
8190 
8191   ins_cost(300);
8192   format %{ "imull   $dst, $src\t# int" %}
8193   opcode(0x0F, 0xAF);
8194   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8195   ins_pipe(ialu_reg_reg_alu0);
8196 %}
8197 
8198 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8199 %{
8200   match(Set dst (MulI src imm));
8201   effect(KILL cr);
8202 
8203   ins_cost(300);
8204   format %{ "imull   $dst, $src, $imm\t# int" %}
8205   opcode(0x69); /* 69 /r id */
8206   ins_encode(REX_reg_reg(dst, src),
8207              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8208   ins_pipe(ialu_reg_reg_alu0);
8209 %}
8210 
8211 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8212 %{
8213   match(Set dst (MulI dst (LoadI src)));
8214   effect(KILL cr);
8215 
8216   ins_cost(350);
8217   format %{ "imull   $dst, $src\t# int" %}
8218   opcode(0x0F, 0xAF);
8219   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8220   ins_pipe(ialu_reg_mem_alu0);
8221 %}
8222 
8223 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8224 %{
8225   match(Set dst (MulI (LoadI src) imm));
8226   effect(KILL cr);
8227 
8228   ins_cost(300);
8229   format %{ "imull   $dst, $src, $imm\t# int" %}
8230   opcode(0x69); /* 69 /r id */
8231   ins_encode(REX_reg_mem(dst, src),
8232              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8233   ins_pipe(ialu_reg_mem_alu0);
8234 %}
8235 
8236 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
8237 %{
8238   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
8239   effect(KILL cr, KILL src2);
8240 
8241   expand %{ mulI_rReg(dst, src1, cr);
8242            mulI_rReg(src2, src3, cr);
8243            addI_rReg(dst, src2, cr); %}
8244 %}
8245 
8246 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8247 %{
8248   match(Set dst (MulL dst src));
8249   effect(KILL cr);
8250 
8251   ins_cost(300);
8252   format %{ "imulq   $dst, $src\t# long" %}
8253   opcode(0x0F, 0xAF);
8254   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8255   ins_pipe(ialu_reg_reg_alu0);
8256 %}
8257 
8258 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8259 %{
8260   match(Set dst (MulL src imm));
8261   effect(KILL cr);
8262 
8263   ins_cost(300);
8264   format %{ "imulq   $dst, $src, $imm\t# long" %}
8265   opcode(0x69); /* 69 /r id */
8266   ins_encode(REX_reg_reg_wide(dst, src),
8267              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8268   ins_pipe(ialu_reg_reg_alu0);
8269 %}
8270 
8271 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8272 %{
8273   match(Set dst (MulL dst (LoadL src)));
8274   effect(KILL cr);
8275 
8276   ins_cost(350);
8277   format %{ "imulq   $dst, $src\t# long" %}
8278   opcode(0x0F, 0xAF);
8279   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8280   ins_pipe(ialu_reg_mem_alu0);
8281 %}
8282 
8283 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8284 %{
8285   match(Set dst (MulL (LoadL src) imm));
8286   effect(KILL cr);
8287 
8288   ins_cost(300);
8289   format %{ "imulq   $dst, $src, $imm\t# long" %}
8290   opcode(0x69); /* 69 /r id */
8291   ins_encode(REX_reg_mem_wide(dst, src),
8292              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8293   ins_pipe(ialu_reg_mem_alu0);
8294 %}
8295 
8296 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8297 %{
8298   match(Set dst (MulHiL src rax));
8299   effect(USE_KILL rax, KILL cr);
8300 
8301   ins_cost(300);
8302   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8303   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8304   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8305   ins_pipe(ialu_reg_reg_alu0);
8306 %}
8307 
8308 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8309                    rFlagsReg cr)
8310 %{
8311   match(Set rax (DivI rax div));
8312   effect(KILL rdx, KILL cr);
8313 
8314   ins_cost(30*100+10*100); // XXX
8315   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8316             "jne,s   normal\n\t"
8317             "xorl    rdx, rdx\n\t"
8318             "cmpl    $div, -1\n\t"
8319             "je,s    done\n"
8320     "normal: cdql\n\t"
8321             "idivl   $div\n"
8322     "done:"        %}
8323   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8324   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8325   ins_pipe(ialu_reg_reg_alu0);
8326 %}
8327 
8328 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8329                    rFlagsReg cr)
8330 %{
8331   match(Set rax (DivL rax div));
8332   effect(KILL rdx, KILL cr);
8333 
8334   ins_cost(30*100+10*100); // XXX
8335   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8336             "cmpq    rax, rdx\n\t"
8337             "jne,s   normal\n\t"
8338             "xorl    rdx, rdx\n\t"
8339             "cmpq    $div, -1\n\t"
8340             "je,s    done\n"
8341     "normal: cdqq\n\t"
8342             "idivq   $div\n"
8343     "done:"        %}
8344   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8345   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8346   ins_pipe(ialu_reg_reg_alu0);
8347 %}
8348 
8349 // Integer DIVMOD with Register, both quotient and mod results
8350 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8351                              rFlagsReg cr)
8352 %{
8353   match(DivModI rax div);
8354   effect(KILL cr);
8355 
8356   ins_cost(30*100+10*100); // XXX
8357   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8358             "jne,s   normal\n\t"
8359             "xorl    rdx, rdx\n\t"
8360             "cmpl    $div, -1\n\t"
8361             "je,s    done\n"
8362     "normal: cdql\n\t"
8363             "idivl   $div\n"
8364     "done:"        %}
8365   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8366   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8367   ins_pipe(pipe_slow);
8368 %}
8369 
8370 // Long DIVMOD with Register, both quotient and mod results
8371 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8372                              rFlagsReg cr)
8373 %{
8374   match(DivModL rax div);
8375   effect(KILL cr);
8376 
8377   ins_cost(30*100+10*100); // XXX
8378   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8379             "cmpq    rax, rdx\n\t"
8380             "jne,s   normal\n\t"
8381             "xorl    rdx, rdx\n\t"
8382             "cmpq    $div, -1\n\t"
8383             "je,s    done\n"
8384     "normal: cdqq\n\t"
8385             "idivq   $div\n"
8386     "done:"        %}
8387   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8388   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8389   ins_pipe(pipe_slow);
8390 %}
8391 
8392 //----------- DivL-By-Constant-Expansions--------------------------------------
8393 // DivI cases are handled by the compiler
8394 
8395 // Magic constant, reciprocal of 10
8396 instruct loadConL_0x6666666666666667(rRegL dst)
8397 %{
8398   effect(DEF dst);
8399 
8400   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8401   ins_encode(load_immL(dst, 0x6666666666666667));
8402   ins_pipe(ialu_reg);
8403 %}
8404 
8405 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8406 %{
8407   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8408 
8409   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8410   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8411   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8412   ins_pipe(ialu_reg_reg_alu0);
8413 %}
8414 
8415 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8416 %{
8417   effect(USE_DEF dst, KILL cr);
8418 
8419   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8420   opcode(0xC1, 0x7); /* C1 /7 ib */
8421   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8422   ins_pipe(ialu_reg);
8423 %}
8424 
8425 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8426 %{
8427   effect(USE_DEF dst, KILL cr);
8428 
8429   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8430   opcode(0xC1, 0x7); /* C1 /7 ib */
8431   ins_encode(reg_opc_imm_wide(dst, 0x2));
8432   ins_pipe(ialu_reg);
8433 %}
8434 
8435 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8436 %{
8437   match(Set dst (DivL src div));
8438 
8439   ins_cost((5+8)*100);
8440   expand %{
8441     rax_RegL rax;                     // Killed temp
8442     rFlagsReg cr;                     // Killed
8443     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8444     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8445     sarL_rReg_63(src, cr);            // sarq  src, 63
8446     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8447     subL_rReg(dst, src, cr);          // subl  rdx, src
8448   %}
8449 %}
8450 
8451 //-----------------------------------------------------------------------------
8452 
8453 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8454                    rFlagsReg cr)
8455 %{
8456   match(Set rdx (ModI rax div));
8457   effect(KILL rax, KILL cr);
8458 
8459   ins_cost(300); // XXX
8460   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8461             "jne,s   normal\n\t"
8462             "xorl    rdx, rdx\n\t"
8463             "cmpl    $div, -1\n\t"
8464             "je,s    done\n"
8465     "normal: cdql\n\t"
8466             "idivl   $div\n"
8467     "done:"        %}
8468   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8469   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8470   ins_pipe(ialu_reg_reg_alu0);
8471 %}
8472 
8473 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8474                    rFlagsReg cr)
8475 %{
8476   match(Set rdx (ModL rax div));
8477   effect(KILL rax, KILL cr);
8478 
8479   ins_cost(300); // XXX
8480   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8481             "cmpq    rax, rdx\n\t"
8482             "jne,s   normal\n\t"
8483             "xorl    rdx, rdx\n\t"
8484             "cmpq    $div, -1\n\t"
8485             "je,s    done\n"
8486     "normal: cdqq\n\t"
8487             "idivq   $div\n"
8488     "done:"        %}
8489   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8490   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8491   ins_pipe(ialu_reg_reg_alu0);
8492 %}
8493 
8494 // Integer Shift Instructions
8495 // Shift Left by one
8496 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8497 %{
8498   match(Set dst (LShiftI dst shift));
8499   effect(KILL cr);
8500 
8501   format %{ "sall    $dst, $shift" %}
8502   opcode(0xD1, 0x4); /* D1 /4 */
8503   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8504   ins_pipe(ialu_reg);
8505 %}
8506 
8507 // Shift Left by one
8508 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8509 %{
8510   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8511   effect(KILL cr);
8512 
8513   format %{ "sall    $dst, $shift\t" %}
8514   opcode(0xD1, 0x4); /* D1 /4 */
8515   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8516   ins_pipe(ialu_mem_imm);
8517 %}
8518 
8519 // Shift Left by 8-bit immediate
8520 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8521 %{
8522   match(Set dst (LShiftI dst shift));
8523   effect(KILL cr);
8524 
8525   format %{ "sall    $dst, $shift" %}
8526   opcode(0xC1, 0x4); /* C1 /4 ib */
8527   ins_encode(reg_opc_imm(dst, shift));
8528   ins_pipe(ialu_reg);
8529 %}
8530 
8531 // Shift Left by 8-bit immediate
8532 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8533 %{
8534   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8535   effect(KILL cr);
8536 
8537   format %{ "sall    $dst, $shift" %}
8538   opcode(0xC1, 0x4); /* C1 /4 ib */
8539   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8540   ins_pipe(ialu_mem_imm);
8541 %}
8542 
8543 // Shift Left by variable
8544 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8545 %{
8546   match(Set dst (LShiftI dst shift));
8547   effect(KILL cr);
8548 
8549   format %{ "sall    $dst, $shift" %}
8550   opcode(0xD3, 0x4); /* D3 /4 */
8551   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8552   ins_pipe(ialu_reg_reg);
8553 %}
8554 
8555 // Shift Left by variable
8556 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8557 %{
8558   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8559   effect(KILL cr);
8560 
8561   format %{ "sall    $dst, $shift" %}
8562   opcode(0xD3, 0x4); /* D3 /4 */
8563   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8564   ins_pipe(ialu_mem_reg);
8565 %}
8566 
8567 // Arithmetic shift right by one
8568 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8569 %{
8570   match(Set dst (RShiftI dst shift));
8571   effect(KILL cr);
8572 
8573   format %{ "sarl    $dst, $shift" %}
8574   opcode(0xD1, 0x7); /* D1 /7 */
8575   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8576   ins_pipe(ialu_reg);
8577 %}
8578 
8579 // Arithmetic shift right by one
8580 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8581 %{
8582   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8583   effect(KILL cr);
8584 
8585   format %{ "sarl    $dst, $shift" %}
8586   opcode(0xD1, 0x7); /* D1 /7 */
8587   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8588   ins_pipe(ialu_mem_imm);
8589 %}
8590 
8591 // Arithmetic Shift Right by 8-bit immediate
8592 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8593 %{
8594   match(Set dst (RShiftI dst shift));
8595   effect(KILL cr);
8596 
8597   format %{ "sarl    $dst, $shift" %}
8598   opcode(0xC1, 0x7); /* C1 /7 ib */
8599   ins_encode(reg_opc_imm(dst, shift));
8600   ins_pipe(ialu_mem_imm);
8601 %}
8602 
8603 // Arithmetic Shift Right by 8-bit immediate
8604 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8605 %{
8606   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8607   effect(KILL cr);
8608 
8609   format %{ "sarl    $dst, $shift" %}
8610   opcode(0xC1, 0x7); /* C1 /7 ib */
8611   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8612   ins_pipe(ialu_mem_imm);
8613 %}
8614 
8615 // Arithmetic Shift Right by variable
8616 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8617 %{
8618   match(Set dst (RShiftI dst shift));
8619   effect(KILL cr);
8620 
8621   format %{ "sarl    $dst, $shift" %}
8622   opcode(0xD3, 0x7); /* D3 /7 */
8623   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8624   ins_pipe(ialu_reg_reg);
8625 %}
8626 
8627 // Arithmetic Shift Right by variable
8628 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8629 %{
8630   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8631   effect(KILL cr);
8632 
8633   format %{ "sarl    $dst, $shift" %}
8634   opcode(0xD3, 0x7); /* D3 /7 */
8635   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8636   ins_pipe(ialu_mem_reg);
8637 %}
8638 
8639 // Logical shift right by one
8640 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8641 %{
8642   match(Set dst (URShiftI dst shift));
8643   effect(KILL cr);
8644 
8645   format %{ "shrl    $dst, $shift" %}
8646   opcode(0xD1, 0x5); /* D1 /5 */
8647   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8648   ins_pipe(ialu_reg);
8649 %}
8650 
8651 // Logical shift right by one
8652 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8653 %{
8654   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8655   effect(KILL cr);
8656 
8657   format %{ "shrl    $dst, $shift" %}
8658   opcode(0xD1, 0x5); /* D1 /5 */
8659   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8660   ins_pipe(ialu_mem_imm);
8661 %}
8662 
8663 // Logical Shift Right by 8-bit immediate
8664 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8665 %{
8666   match(Set dst (URShiftI dst shift));
8667   effect(KILL cr);
8668 
8669   format %{ "shrl    $dst, $shift" %}
8670   opcode(0xC1, 0x5); /* C1 /5 ib */
8671   ins_encode(reg_opc_imm(dst, shift));
8672   ins_pipe(ialu_reg);
8673 %}
8674 
8675 // Logical Shift Right by 8-bit immediate
8676 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8677 %{
8678   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8679   effect(KILL cr);
8680 
8681   format %{ "shrl    $dst, $shift" %}
8682   opcode(0xC1, 0x5); /* C1 /5 ib */
8683   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8684   ins_pipe(ialu_mem_imm);
8685 %}
8686 
8687 // Logical Shift Right by variable
8688 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8689 %{
8690   match(Set dst (URShiftI dst shift));
8691   effect(KILL cr);
8692 
8693   format %{ "shrl    $dst, $shift" %}
8694   opcode(0xD3, 0x5); /* D3 /5 */
8695   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8696   ins_pipe(ialu_reg_reg);
8697 %}
8698 
8699 // Logical Shift Right by variable
8700 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8701 %{
8702   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8703   effect(KILL cr);
8704 
8705   format %{ "shrl    $dst, $shift" %}
8706   opcode(0xD3, 0x5); /* D3 /5 */
8707   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8708   ins_pipe(ialu_mem_reg);
8709 %}
8710 
8711 // Long Shift Instructions
8712 // Shift Left by one
8713 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8714 %{
8715   match(Set dst (LShiftL dst shift));
8716   effect(KILL cr);
8717 
8718   format %{ "salq    $dst, $shift" %}
8719   opcode(0xD1, 0x4); /* D1 /4 */
8720   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8721   ins_pipe(ialu_reg);
8722 %}
8723 
8724 // Shift Left by one
8725 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8726 %{
8727   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8728   effect(KILL cr);
8729 
8730   format %{ "salq    $dst, $shift" %}
8731   opcode(0xD1, 0x4); /* D1 /4 */
8732   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8733   ins_pipe(ialu_mem_imm);
8734 %}
8735 
8736 // Shift Left by 8-bit immediate
8737 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8738 %{
8739   match(Set dst (LShiftL dst shift));
8740   effect(KILL cr);
8741 
8742   format %{ "salq    $dst, $shift" %}
8743   opcode(0xC1, 0x4); /* C1 /4 ib */
8744   ins_encode(reg_opc_imm_wide(dst, shift));
8745   ins_pipe(ialu_reg);
8746 %}
8747 
8748 // Shift Left by 8-bit immediate
8749 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8750 %{
8751   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8752   effect(KILL cr);
8753 
8754   format %{ "salq    $dst, $shift" %}
8755   opcode(0xC1, 0x4); /* C1 /4 ib */
8756   ins_encode(REX_mem_wide(dst), OpcP,
8757              RM_opc_mem(secondary, dst), Con8or32(shift));
8758   ins_pipe(ialu_mem_imm);
8759 %}
8760 
8761 // Shift Left by variable
8762 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8763 %{
8764   match(Set dst (LShiftL dst shift));
8765   effect(KILL cr);
8766 
8767   format %{ "salq    $dst, $shift" %}
8768   opcode(0xD3, 0x4); /* D3 /4 */
8769   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8770   ins_pipe(ialu_reg_reg);
8771 %}
8772 
8773 // Shift Left by variable
8774 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8775 %{
8776   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8777   effect(KILL cr);
8778 
8779   format %{ "salq    $dst, $shift" %}
8780   opcode(0xD3, 0x4); /* D3 /4 */
8781   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8782   ins_pipe(ialu_mem_reg);
8783 %}
8784 
8785 // Arithmetic shift right by one
8786 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8787 %{
8788   match(Set dst (RShiftL dst shift));
8789   effect(KILL cr);
8790 
8791   format %{ "sarq    $dst, $shift" %}
8792   opcode(0xD1, 0x7); /* D1 /7 */
8793   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8794   ins_pipe(ialu_reg);
8795 %}
8796 
8797 // Arithmetic shift right by one
8798 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8799 %{
8800   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8801   effect(KILL cr);
8802 
8803   format %{ "sarq    $dst, $shift" %}
8804   opcode(0xD1, 0x7); /* D1 /7 */
8805   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8806   ins_pipe(ialu_mem_imm);
8807 %}
8808 
8809 // Arithmetic Shift Right by 8-bit immediate
8810 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8811 %{
8812   match(Set dst (RShiftL dst shift));
8813   effect(KILL cr);
8814 
8815   format %{ "sarq    $dst, $shift" %}
8816   opcode(0xC1, 0x7); /* C1 /7 ib */
8817   ins_encode(reg_opc_imm_wide(dst, shift));
8818   ins_pipe(ialu_mem_imm);
8819 %}
8820 
8821 // Arithmetic Shift Right by 8-bit immediate
8822 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8823 %{
8824   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8825   effect(KILL cr);
8826 
8827   format %{ "sarq    $dst, $shift" %}
8828   opcode(0xC1, 0x7); /* C1 /7 ib */
8829   ins_encode(REX_mem_wide(dst), OpcP,
8830              RM_opc_mem(secondary, dst), Con8or32(shift));
8831   ins_pipe(ialu_mem_imm);
8832 %}
8833 
8834 // Arithmetic Shift Right by variable
8835 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8836 %{
8837   match(Set dst (RShiftL dst shift));
8838   effect(KILL cr);
8839 
8840   format %{ "sarq    $dst, $shift" %}
8841   opcode(0xD3, 0x7); /* D3 /7 */
8842   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8843   ins_pipe(ialu_reg_reg);
8844 %}
8845 
8846 // Arithmetic Shift Right by variable
8847 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8848 %{
8849   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8850   effect(KILL cr);
8851 
8852   format %{ "sarq    $dst, $shift" %}
8853   opcode(0xD3, 0x7); /* D3 /7 */
8854   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8855   ins_pipe(ialu_mem_reg);
8856 %}
8857 
8858 // Logical shift right by one
8859 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8860 %{
8861   match(Set dst (URShiftL dst shift));
8862   effect(KILL cr);
8863 
8864   format %{ "shrq    $dst, $shift" %}
8865   opcode(0xD1, 0x5); /* D1 /5 */
8866   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8867   ins_pipe(ialu_reg);
8868 %}
8869 
8870 // Logical shift right by one
8871 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8872 %{
8873   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8874   effect(KILL cr);
8875 
8876   format %{ "shrq    $dst, $shift" %}
8877   opcode(0xD1, 0x5); /* D1 /5 */
8878   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8879   ins_pipe(ialu_mem_imm);
8880 %}
8881 
8882 // Logical Shift Right by 8-bit immediate
8883 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8884 %{
8885   match(Set dst (URShiftL dst shift));
8886   effect(KILL cr);
8887 
8888   format %{ "shrq    $dst, $shift" %}
8889   opcode(0xC1, 0x5); /* C1 /5 ib */
8890   ins_encode(reg_opc_imm_wide(dst, shift));
8891   ins_pipe(ialu_reg);
8892 %}
8893 
8894 
8895 // Logical Shift Right by 8-bit immediate
8896 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8897 %{
8898   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8899   effect(KILL cr);
8900 
8901   format %{ "shrq    $dst, $shift" %}
8902   opcode(0xC1, 0x5); /* C1 /5 ib */
8903   ins_encode(REX_mem_wide(dst), OpcP,
8904              RM_opc_mem(secondary, dst), Con8or32(shift));
8905   ins_pipe(ialu_mem_imm);
8906 %}
8907 
8908 // Logical Shift Right by variable
8909 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8910 %{
8911   match(Set dst (URShiftL dst shift));
8912   effect(KILL cr);
8913 
8914   format %{ "shrq    $dst, $shift" %}
8915   opcode(0xD3, 0x5); /* D3 /5 */
8916   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8917   ins_pipe(ialu_reg_reg);
8918 %}
8919 
8920 // Logical Shift Right by variable
8921 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8922 %{
8923   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8924   effect(KILL cr);
8925 
8926   format %{ "shrq    $dst, $shift" %}
8927   opcode(0xD3, 0x5); /* D3 /5 */
8928   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8929   ins_pipe(ialu_mem_reg);
8930 %}
8931 
8932 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8933 // This idiom is used by the compiler for the i2b bytecode.
8934 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8935 %{
8936   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8937 
8938   format %{ "movsbl  $dst, $src\t# i2b" %}
8939   opcode(0x0F, 0xBE);
8940   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8941   ins_pipe(ialu_reg_reg);
8942 %}
8943 
8944 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8945 // This idiom is used by the compiler the i2s bytecode.
8946 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8947 %{
8948   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8949 
8950   format %{ "movswl  $dst, $src\t# i2s" %}
8951   opcode(0x0F, 0xBF);
8952   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8953   ins_pipe(ialu_reg_reg);
8954 %}
8955 
8956 // ROL/ROR instructions
8957 
8958 // ROL expand
8959 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8960   effect(KILL cr, USE_DEF dst);
8961 
8962   format %{ "roll    $dst" %}
8963   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8964   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8965   ins_pipe(ialu_reg);
8966 %}
8967 
8968 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8969   effect(USE_DEF dst, USE shift, KILL cr);
8970 
8971   format %{ "roll    $dst, $shift" %}
8972   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8973   ins_encode( reg_opc_imm(dst, shift) );
8974   ins_pipe(ialu_reg);
8975 %}
8976 
8977 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8978 %{
8979   effect(USE_DEF dst, USE shift, KILL cr);
8980 
8981   format %{ "roll    $dst, $shift" %}
8982   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8983   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8984   ins_pipe(ialu_reg_reg);
8985 %}
8986 // end of ROL expand
8987 
8988 // Rotate Left by one
8989 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8990 %{
8991   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8992 
8993   expand %{
8994     rolI_rReg_imm1(dst, cr);
8995   %}
8996 %}
8997 
8998 // Rotate Left by 8-bit immediate
8999 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9000 %{
9001   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9002   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9003 
9004   expand %{
9005     rolI_rReg_imm8(dst, lshift, cr);
9006   %}
9007 %}
9008 
9009 // Rotate Left by variable
9010 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9011 %{
9012   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9013 
9014   expand %{
9015     rolI_rReg_CL(dst, shift, cr);
9016   %}
9017 %}
9018 
9019 // Rotate Left by variable
9020 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9021 %{
9022   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9023 
9024   expand %{
9025     rolI_rReg_CL(dst, shift, cr);
9026   %}
9027 %}
9028 
9029 // ROR expand
9030 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
9031 %{
9032   effect(USE_DEF dst, KILL cr);
9033 
9034   format %{ "rorl    $dst" %}
9035   opcode(0xD1, 0x1); /* D1 /1 */
9036   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9037   ins_pipe(ialu_reg);
9038 %}
9039 
9040 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
9041 %{
9042   effect(USE_DEF dst, USE shift, KILL cr);
9043 
9044   format %{ "rorl    $dst, $shift" %}
9045   opcode(0xC1, 0x1); /* C1 /1 ib */
9046   ins_encode(reg_opc_imm(dst, shift));
9047   ins_pipe(ialu_reg);
9048 %}
9049 
9050 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
9051 %{
9052   effect(USE_DEF dst, USE shift, KILL cr);
9053 
9054   format %{ "rorl    $dst, $shift" %}
9055   opcode(0xD3, 0x1); /* D3 /1 */
9056   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9057   ins_pipe(ialu_reg_reg);
9058 %}
9059 // end of ROR expand
9060 
9061 // Rotate Right by one
9062 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9063 %{
9064   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9065 
9066   expand %{
9067     rorI_rReg_imm1(dst, cr);
9068   %}
9069 %}
9070 
9071 // Rotate Right by 8-bit immediate
9072 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9073 %{
9074   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9075   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9076 
9077   expand %{
9078     rorI_rReg_imm8(dst, rshift, cr);
9079   %}
9080 %}
9081 
9082 // Rotate Right by variable
9083 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9084 %{
9085   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9086 
9087   expand %{
9088     rorI_rReg_CL(dst, shift, cr);
9089   %}
9090 %}
9091 
9092 // Rotate Right by variable
9093 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9094 %{
9095   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9096 
9097   expand %{
9098     rorI_rReg_CL(dst, shift, cr);
9099   %}
9100 %}
9101 
9102 // for long rotate
9103 // ROL expand
9104 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9105   effect(USE_DEF dst, KILL cr);
9106 
9107   format %{ "rolq    $dst" %}
9108   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9109   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9110   ins_pipe(ialu_reg);
9111 %}
9112 
9113 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9114   effect(USE_DEF dst, USE shift, KILL cr);
9115 
9116   format %{ "rolq    $dst, $shift" %}
9117   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9118   ins_encode( reg_opc_imm_wide(dst, shift) );
9119   ins_pipe(ialu_reg);
9120 %}
9121 
9122 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9123 %{
9124   effect(USE_DEF dst, USE shift, KILL cr);
9125 
9126   format %{ "rolq    $dst, $shift" %}
9127   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9128   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9129   ins_pipe(ialu_reg_reg);
9130 %}
9131 // end of ROL expand
9132 
9133 // Rotate Left by one
9134 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9135 %{
9136   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9137 
9138   expand %{
9139     rolL_rReg_imm1(dst, cr);
9140   %}
9141 %}
9142 
9143 // Rotate Left by 8-bit immediate
9144 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9145 %{
9146   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9147   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9148 
9149   expand %{
9150     rolL_rReg_imm8(dst, lshift, cr);
9151   %}
9152 %}
9153 
9154 // Rotate Left by variable
9155 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9156 %{
9157   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9158 
9159   expand %{
9160     rolL_rReg_CL(dst, shift, cr);
9161   %}
9162 %}
9163 
9164 // Rotate Left by variable
9165 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9166 %{
9167   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9168 
9169   expand %{
9170     rolL_rReg_CL(dst, shift, cr);
9171   %}
9172 %}
9173 
9174 // ROR expand
9175 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9176 %{
9177   effect(USE_DEF dst, KILL cr);
9178 
9179   format %{ "rorq    $dst" %}
9180   opcode(0xD1, 0x1); /* D1 /1 */
9181   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9182   ins_pipe(ialu_reg);
9183 %}
9184 
9185 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9186 %{
9187   effect(USE_DEF dst, USE shift, KILL cr);
9188 
9189   format %{ "rorq    $dst, $shift" %}
9190   opcode(0xC1, 0x1); /* C1 /1 ib */
9191   ins_encode(reg_opc_imm_wide(dst, shift));
9192   ins_pipe(ialu_reg);
9193 %}
9194 
9195 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9196 %{
9197   effect(USE_DEF dst, USE shift, KILL cr);
9198 
9199   format %{ "rorq    $dst, $shift" %}
9200   opcode(0xD3, 0x1); /* D3 /1 */
9201   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9202   ins_pipe(ialu_reg_reg);
9203 %}
9204 // end of ROR expand
9205 
9206 // Rotate Right by one
9207 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9208 %{
9209   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9210 
9211   expand %{
9212     rorL_rReg_imm1(dst, cr);
9213   %}
9214 %}
9215 
9216 // Rotate Right by 8-bit immediate
9217 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9218 %{
9219   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9220   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9221 
9222   expand %{
9223     rorL_rReg_imm8(dst, rshift, cr);
9224   %}
9225 %}
9226 
9227 // Rotate Right by variable
9228 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9229 %{
9230   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9231 
9232   expand %{
9233     rorL_rReg_CL(dst, shift, cr);
9234   %}
9235 %}
9236 
9237 // Rotate Right by variable
9238 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9239 %{
9240   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9241 
9242   expand %{
9243     rorL_rReg_CL(dst, shift, cr);
9244   %}
9245 %}
9246 
9247 // Logical Instructions
9248 
9249 // Integer Logical Instructions
9250 
9251 // And Instructions
9252 // And Register with Register
9253 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9254 %{
9255   match(Set dst (AndI dst src));
9256   effect(KILL cr);
9257 
9258   format %{ "andl    $dst, $src\t# int" %}
9259   opcode(0x23);
9260   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9261   ins_pipe(ialu_reg_reg);
9262 %}
9263 
9264 // And Register with Immediate 255
9265 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9266 %{
9267   match(Set dst (AndI dst src));
9268 
9269   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9270   opcode(0x0F, 0xB6);
9271   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9272   ins_pipe(ialu_reg);
9273 %}
9274 
9275 // And Register with Immediate 255 and promote to long
9276 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9277 %{
9278   match(Set dst (ConvI2L (AndI src mask)));
9279 
9280   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9281   opcode(0x0F, 0xB6);
9282   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9283   ins_pipe(ialu_reg);
9284 %}
9285 
9286 // And Register with Immediate 65535
9287 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9288 %{
9289   match(Set dst (AndI dst src));
9290 
9291   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9292   opcode(0x0F, 0xB7);
9293   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9294   ins_pipe(ialu_reg);
9295 %}
9296 
9297 // And Register with Immediate 65535 and promote to long
9298 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9299 %{
9300   match(Set dst (ConvI2L (AndI src mask)));
9301 
9302   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9303   opcode(0x0F, 0xB7);
9304   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9305   ins_pipe(ialu_reg);
9306 %}
9307 
9308 // And Register with Immediate
9309 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9310 %{
9311   match(Set dst (AndI dst src));
9312   effect(KILL cr);
9313 
9314   format %{ "andl    $dst, $src\t# int" %}
9315   opcode(0x81, 0x04); /* Opcode 81 /4 */
9316   ins_encode(OpcSErm(dst, src), Con8or32(src));
9317   ins_pipe(ialu_reg);
9318 %}
9319 
9320 // And Register with Memory
9321 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9322 %{
9323   match(Set dst (AndI dst (LoadI src)));
9324   effect(KILL cr);
9325 
9326   ins_cost(125);
9327   format %{ "andl    $dst, $src\t# int" %}
9328   opcode(0x23);
9329   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9330   ins_pipe(ialu_reg_mem);
9331 %}
9332 
9333 // And Memory with Register
9334 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9335 %{
9336   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
9337   effect(KILL cr);
9338 
9339   ins_cost(150);
9340   format %{ "andb    $dst, $src\t# byte" %}
9341   opcode(0x20);
9342   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9343   ins_pipe(ialu_mem_reg);
9344 %}
9345 
9346 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9347 %{
9348   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9349   effect(KILL cr);
9350 
9351   ins_cost(150);
9352   format %{ "andl    $dst, $src\t# int" %}
9353   opcode(0x21); /* Opcode 21 /r */
9354   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9355   ins_pipe(ialu_mem_reg);
9356 %}
9357 
9358 // And Memory with Immediate
9359 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9360 %{
9361   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9362   effect(KILL cr);
9363 
9364   ins_cost(125);
9365   format %{ "andl    $dst, $src\t# int" %}
9366   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9367   ins_encode(REX_mem(dst), OpcSE(src),
9368              RM_opc_mem(secondary, dst), Con8or32(src));
9369   ins_pipe(ialu_mem_imm);
9370 %}
9371 
9372 // BMI1 instructions
9373 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
9374   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
9375   predicate(UseBMI1Instructions);
9376   effect(KILL cr);
9377 
9378   ins_cost(125);
9379   format %{ "andnl  $dst, $src1, $src2" %}
9380 
9381   ins_encode %{
9382     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
9383   %}
9384   ins_pipe(ialu_reg_mem);
9385 %}
9386 
9387 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
9388   match(Set dst (AndI (XorI src1 minus_1) src2));
9389   predicate(UseBMI1Instructions);
9390   effect(KILL cr);
9391 
9392   format %{ "andnl  $dst, $src1, $src2" %}
9393 
9394   ins_encode %{
9395     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
9396   %}
9397   ins_pipe(ialu_reg);
9398 %}
9399 
9400 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
9401   match(Set dst (AndI (SubI imm_zero src) src));
9402   predicate(UseBMI1Instructions);
9403   effect(KILL cr);
9404 
9405   format %{ "blsil  $dst, $src" %}
9406 
9407   ins_encode %{
9408     __ blsil($dst$$Register, $src$$Register);
9409   %}
9410   ins_pipe(ialu_reg);
9411 %}
9412 
9413 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
9414   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
9415   predicate(UseBMI1Instructions);
9416   effect(KILL cr);
9417 
9418   ins_cost(125);
9419   format %{ "blsil  $dst, $src" %}
9420 
9421   ins_encode %{
9422     __ blsil($dst$$Register, $src$$Address);
9423   %}
9424   ins_pipe(ialu_reg_mem);
9425 %}
9426 
9427 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9428 %{
9429   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
9430   predicate(UseBMI1Instructions);
9431   effect(KILL cr);
9432 
9433   ins_cost(125);
9434   format %{ "blsmskl $dst, $src" %}
9435 
9436   ins_encode %{
9437     __ blsmskl($dst$$Register, $src$$Address);
9438   %}
9439   ins_pipe(ialu_reg_mem);
9440 %}
9441 
9442 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9443 %{
9444   match(Set dst (XorI (AddI src minus_1) src));
9445   predicate(UseBMI1Instructions);
9446   effect(KILL cr);
9447 
9448   format %{ "blsmskl $dst, $src" %}
9449 
9450   ins_encode %{
9451     __ blsmskl($dst$$Register, $src$$Register);
9452   %}
9453 
9454   ins_pipe(ialu_reg);
9455 %}
9456 
9457 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9458 %{
9459   match(Set dst (AndI (AddI src minus_1) src) );
9460   predicate(UseBMI1Instructions);
9461   effect(KILL cr);
9462 
9463   format %{ "blsrl  $dst, $src" %}
9464 
9465   ins_encode %{
9466     __ blsrl($dst$$Register, $src$$Register);
9467   %}
9468 
9469   ins_pipe(ialu_reg_mem);
9470 %}
9471 
9472 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9473 %{
9474   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
9475   predicate(UseBMI1Instructions);
9476   effect(KILL cr);
9477 
9478   ins_cost(125);
9479   format %{ "blsrl  $dst, $src" %}
9480 
9481   ins_encode %{
9482     __ blsrl($dst$$Register, $src$$Address);
9483   %}
9484 
9485   ins_pipe(ialu_reg);
9486 %}
9487 
9488 // Or Instructions
9489 // Or Register with Register
9490 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9491 %{
9492   match(Set dst (OrI dst src));
9493   effect(KILL cr);
9494 
9495   format %{ "orl     $dst, $src\t# int" %}
9496   opcode(0x0B);
9497   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9498   ins_pipe(ialu_reg_reg);
9499 %}
9500 
9501 // Or Register with Immediate
9502 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9503 %{
9504   match(Set dst (OrI dst src));
9505   effect(KILL cr);
9506 
9507   format %{ "orl     $dst, $src\t# int" %}
9508   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9509   ins_encode(OpcSErm(dst, src), Con8or32(src));
9510   ins_pipe(ialu_reg);
9511 %}
9512 
9513 // Or Register with Memory
9514 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9515 %{
9516   match(Set dst (OrI dst (LoadI src)));
9517   effect(KILL cr);
9518 
9519   ins_cost(125);
9520   format %{ "orl     $dst, $src\t# int" %}
9521   opcode(0x0B);
9522   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9523   ins_pipe(ialu_reg_mem);
9524 %}
9525 
9526 // Or Memory with Register
9527 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9528 %{
9529   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
9530   effect(KILL cr);
9531 
9532   ins_cost(150);
9533   format %{ "orb    $dst, $src\t# byte" %}
9534   opcode(0x08);
9535   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9536   ins_pipe(ialu_mem_reg);
9537 %}
9538 
9539 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9540 %{
9541   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9542   effect(KILL cr);
9543 
9544   ins_cost(150);
9545   format %{ "orl     $dst, $src\t# int" %}
9546   opcode(0x09); /* Opcode 09 /r */
9547   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9548   ins_pipe(ialu_mem_reg);
9549 %}
9550 
9551 // Or Memory with Immediate
9552 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9553 %{
9554   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9555   effect(KILL cr);
9556 
9557   ins_cost(125);
9558   format %{ "orl     $dst, $src\t# int" %}
9559   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9560   ins_encode(REX_mem(dst), OpcSE(src),
9561              RM_opc_mem(secondary, dst), Con8or32(src));
9562   ins_pipe(ialu_mem_imm);
9563 %}
9564 
9565 // Xor Instructions
9566 // Xor Register with Register
9567 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9568 %{
9569   match(Set dst (XorI dst src));
9570   effect(KILL cr);
9571 
9572   format %{ "xorl    $dst, $src\t# int" %}
9573   opcode(0x33);
9574   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9575   ins_pipe(ialu_reg_reg);
9576 %}
9577 
9578 // Xor Register with Immediate -1
9579 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9580   match(Set dst (XorI dst imm));
9581 
9582   format %{ "not    $dst" %}
9583   ins_encode %{
9584      __ notl($dst$$Register);
9585   %}
9586   ins_pipe(ialu_reg);
9587 %}
9588 
9589 // Xor Register with Immediate
9590 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9591 %{
9592   match(Set dst (XorI dst src));
9593   effect(KILL cr);
9594 
9595   format %{ "xorl    $dst, $src\t# int" %}
9596   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9597   ins_encode(OpcSErm(dst, src), Con8or32(src));
9598   ins_pipe(ialu_reg);
9599 %}
9600 
9601 // Xor Register with Memory
9602 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9603 %{
9604   match(Set dst (XorI dst (LoadI src)));
9605   effect(KILL cr);
9606 
9607   ins_cost(125);
9608   format %{ "xorl    $dst, $src\t# int" %}
9609   opcode(0x33);
9610   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9611   ins_pipe(ialu_reg_mem);
9612 %}
9613 
9614 // Xor Memory with Register
9615 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9616 %{
9617   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
9618   effect(KILL cr);
9619 
9620   ins_cost(150);
9621   format %{ "xorb    $dst, $src\t# byte" %}
9622   opcode(0x30);
9623   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9624   ins_pipe(ialu_mem_reg);
9625 %}
9626 
9627 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9628 %{
9629   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9630   effect(KILL cr);
9631 
9632   ins_cost(150);
9633   format %{ "xorl    $dst, $src\t# int" %}
9634   opcode(0x31); /* Opcode 31 /r */
9635   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9636   ins_pipe(ialu_mem_reg);
9637 %}
9638 
9639 // Xor Memory with Immediate
9640 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9641 %{
9642   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9643   effect(KILL cr);
9644 
9645   ins_cost(125);
9646   format %{ "xorl    $dst, $src\t# int" %}
9647   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9648   ins_encode(REX_mem(dst), OpcSE(src),
9649              RM_opc_mem(secondary, dst), Con8or32(src));
9650   ins_pipe(ialu_mem_imm);
9651 %}
9652 
9653 
9654 // Long Logical Instructions
9655 
9656 // And Instructions
9657 // And Register with Register
9658 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9659 %{
9660   match(Set dst (AndL dst src));
9661   effect(KILL cr);
9662 
9663   format %{ "andq    $dst, $src\t# long" %}
9664   opcode(0x23);
9665   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9666   ins_pipe(ialu_reg_reg);
9667 %}
9668 
9669 // And Register with Immediate 255
9670 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9671 %{
9672   match(Set dst (AndL dst src));
9673 
9674   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9675   opcode(0x0F, 0xB6);
9676   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9677   ins_pipe(ialu_reg);
9678 %}
9679 
9680 // And Register with Immediate 65535
9681 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9682 %{
9683   match(Set dst (AndL dst src));
9684 
9685   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9686   opcode(0x0F, 0xB7);
9687   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9688   ins_pipe(ialu_reg);
9689 %}
9690 
9691 // And Register with Immediate
9692 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9693 %{
9694   match(Set dst (AndL dst src));
9695   effect(KILL cr);
9696 
9697   format %{ "andq    $dst, $src\t# long" %}
9698   opcode(0x81, 0x04); /* Opcode 81 /4 */
9699   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9700   ins_pipe(ialu_reg);
9701 %}
9702 
9703 // And Register with Memory
9704 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9705 %{
9706   match(Set dst (AndL dst (LoadL src)));
9707   effect(KILL cr);
9708 
9709   ins_cost(125);
9710   format %{ "andq    $dst, $src\t# long" %}
9711   opcode(0x23);
9712   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9713   ins_pipe(ialu_reg_mem);
9714 %}
9715 
9716 // And Memory with Register
9717 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9718 %{
9719   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9720   effect(KILL cr);
9721 
9722   ins_cost(150);
9723   format %{ "andq    $dst, $src\t# long" %}
9724   opcode(0x21); /* Opcode 21 /r */
9725   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9726   ins_pipe(ialu_mem_reg);
9727 %}
9728 
9729 // And Memory with Immediate
9730 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9731 %{
9732   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9733   effect(KILL cr);
9734 
9735   ins_cost(125);
9736   format %{ "andq    $dst, $src\t# long" %}
9737   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9738   ins_encode(REX_mem_wide(dst), OpcSE(src),
9739              RM_opc_mem(secondary, dst), Con8or32(src));
9740   ins_pipe(ialu_mem_imm);
9741 %}
9742 
9743 // BMI1 instructions
9744 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
9745   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
9746   predicate(UseBMI1Instructions);
9747   effect(KILL cr);
9748 
9749   ins_cost(125);
9750   format %{ "andnq  $dst, $src1, $src2" %}
9751 
9752   ins_encode %{
9753     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
9754   %}
9755   ins_pipe(ialu_reg_mem);
9756 %}
9757 
9758 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
9759   match(Set dst (AndL (XorL src1 minus_1) src2));
9760   predicate(UseBMI1Instructions);
9761   effect(KILL cr);
9762 
9763   format %{ "andnq  $dst, $src1, $src2" %}
9764 
9765   ins_encode %{
9766   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
9767   %}
9768   ins_pipe(ialu_reg_mem);
9769 %}
9770 
9771 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
9772   match(Set dst (AndL (SubL imm_zero src) src));
9773   predicate(UseBMI1Instructions);
9774   effect(KILL cr);
9775 
9776   format %{ "blsiq  $dst, $src" %}
9777 
9778   ins_encode %{
9779     __ blsiq($dst$$Register, $src$$Register);
9780   %}
9781   ins_pipe(ialu_reg);
9782 %}
9783 
9784 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
9785   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9786   predicate(UseBMI1Instructions);
9787   effect(KILL cr);
9788 
9789   ins_cost(125);
9790   format %{ "blsiq  $dst, $src" %}
9791 
9792   ins_encode %{
9793     __ blsiq($dst$$Register, $src$$Address);
9794   %}
9795   ins_pipe(ialu_reg_mem);
9796 %}
9797 
9798 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
9799 %{
9800   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
9801   predicate(UseBMI1Instructions);
9802   effect(KILL cr);
9803 
9804   ins_cost(125);
9805   format %{ "blsmskq $dst, $src" %}
9806 
9807   ins_encode %{
9808     __ blsmskq($dst$$Register, $src$$Address);
9809   %}
9810   ins_pipe(ialu_reg_mem);
9811 %}
9812 
9813 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
9814 %{
9815   match(Set dst (XorL (AddL src minus_1) src));
9816   predicate(UseBMI1Instructions);
9817   effect(KILL cr);
9818 
9819   format %{ "blsmskq $dst, $src" %}
9820 
9821   ins_encode %{
9822     __ blsmskq($dst$$Register, $src$$Register);
9823   %}
9824 
9825   ins_pipe(ialu_reg);
9826 %}
9827 
9828 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
9829 %{
9830   match(Set dst (AndL (AddL src minus_1) src) );
9831   predicate(UseBMI1Instructions);
9832   effect(KILL cr);
9833 
9834   format %{ "blsrq  $dst, $src" %}
9835 
9836   ins_encode %{
9837     __ blsrq($dst$$Register, $src$$Register);
9838   %}
9839 
9840   ins_pipe(ialu_reg);
9841 %}
9842 
9843 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
9844 %{
9845   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
9846   predicate(UseBMI1Instructions);
9847   effect(KILL cr);
9848 
9849   ins_cost(125);
9850   format %{ "blsrq  $dst, $src" %}
9851 
9852   ins_encode %{
9853     __ blsrq($dst$$Register, $src$$Address);
9854   %}
9855 
9856   ins_pipe(ialu_reg);
9857 %}
9858 
9859 // Or Instructions
9860 // Or Register with Register
9861 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9862 %{
9863   match(Set dst (OrL dst src));
9864   effect(KILL cr);
9865 
9866   format %{ "orq     $dst, $src\t# long" %}
9867   opcode(0x0B);
9868   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9869   ins_pipe(ialu_reg_reg);
9870 %}
9871 
9872 // Use any_RegP to match R15 (TLS register) without spilling.
9873 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9874   match(Set dst (OrL dst (CastP2X src)));
9875   effect(KILL cr);
9876 
9877   format %{ "orq     $dst, $src\t# long" %}
9878   opcode(0x0B);
9879   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9880   ins_pipe(ialu_reg_reg);
9881 %}
9882 
9883 
9884 // Or Register with Immediate
9885 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9886 %{
9887   match(Set dst (OrL dst src));
9888   effect(KILL cr);
9889 
9890   format %{ "orq     $dst, $src\t# long" %}
9891   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9892   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9893   ins_pipe(ialu_reg);
9894 %}
9895 
9896 // Or Register with Memory
9897 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9898 %{
9899   match(Set dst (OrL dst (LoadL src)));
9900   effect(KILL cr);
9901 
9902   ins_cost(125);
9903   format %{ "orq     $dst, $src\t# long" %}
9904   opcode(0x0B);
9905   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9906   ins_pipe(ialu_reg_mem);
9907 %}
9908 
9909 // Or Memory with Register
9910 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9911 %{
9912   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9913   effect(KILL cr);
9914 
9915   ins_cost(150);
9916   format %{ "orq     $dst, $src\t# long" %}
9917   opcode(0x09); /* Opcode 09 /r */
9918   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9919   ins_pipe(ialu_mem_reg);
9920 %}
9921 
9922 // Or Memory with Immediate
9923 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9924 %{
9925   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9926   effect(KILL cr);
9927 
9928   ins_cost(125);
9929   format %{ "orq     $dst, $src\t# long" %}
9930   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9931   ins_encode(REX_mem_wide(dst), OpcSE(src),
9932              RM_opc_mem(secondary, dst), Con8or32(src));
9933   ins_pipe(ialu_mem_imm);
9934 %}
9935 
9936 // Xor Instructions
9937 // Xor Register with Register
9938 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9939 %{
9940   match(Set dst (XorL dst src));
9941   effect(KILL cr);
9942 
9943   format %{ "xorq    $dst, $src\t# long" %}
9944   opcode(0x33);
9945   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9946   ins_pipe(ialu_reg_reg);
9947 %}
9948 
9949 // Xor Register with Immediate -1
9950 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9951   match(Set dst (XorL dst imm));
9952 
9953   format %{ "notq   $dst" %}
9954   ins_encode %{
9955      __ notq($dst$$Register);
9956   %}
9957   ins_pipe(ialu_reg);
9958 %}
9959 
9960 // Xor Register with Immediate
9961 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9962 %{
9963   match(Set dst (XorL dst src));
9964   effect(KILL cr);
9965 
9966   format %{ "xorq    $dst, $src\t# long" %}
9967   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9968   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9969   ins_pipe(ialu_reg);
9970 %}
9971 
9972 // Xor Register with Memory
9973 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9974 %{
9975   match(Set dst (XorL dst (LoadL src)));
9976   effect(KILL cr);
9977 
9978   ins_cost(125);
9979   format %{ "xorq    $dst, $src\t# long" %}
9980   opcode(0x33);
9981   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9982   ins_pipe(ialu_reg_mem);
9983 %}
9984 
9985 // Xor Memory with Register
9986 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9987 %{
9988   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9989   effect(KILL cr);
9990 
9991   ins_cost(150);
9992   format %{ "xorq    $dst, $src\t# long" %}
9993   opcode(0x31); /* Opcode 31 /r */
9994   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9995   ins_pipe(ialu_mem_reg);
9996 %}
9997 
9998 // Xor Memory with Immediate
9999 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10000 %{
10001   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10002   effect(KILL cr);
10003 
10004   ins_cost(125);
10005   format %{ "xorq    $dst, $src\t# long" %}
10006   opcode(0x81, 0x6); /* Opcode 81 /6 id */
10007   ins_encode(REX_mem_wide(dst), OpcSE(src),
10008              RM_opc_mem(secondary, dst), Con8or32(src));
10009   ins_pipe(ialu_mem_imm);
10010 %}
10011 
10012 // Convert Int to Boolean
10013 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10014 %{
10015   match(Set dst (Conv2B src));
10016   effect(KILL cr);
10017 
10018   format %{ "testl   $src, $src\t# ci2b\n\t"
10019             "setnz   $dst\n\t"
10020             "movzbl  $dst, $dst" %}
10021   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
10022              setNZ_reg(dst),
10023              REX_reg_breg(dst, dst), // movzbl
10024              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10025   ins_pipe(pipe_slow); // XXX
10026 %}
10027 
10028 // Convert Pointer to Boolean
10029 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10030 %{
10031   match(Set dst (Conv2B src));
10032   effect(KILL cr);
10033 
10034   format %{ "testq   $src, $src\t# cp2b\n\t"
10035             "setnz   $dst\n\t"
10036             "movzbl  $dst, $dst" %}
10037   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
10038              setNZ_reg(dst),
10039              REX_reg_breg(dst, dst), // movzbl
10040              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
10041   ins_pipe(pipe_slow); // XXX
10042 %}
10043 
10044 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10045 %{
10046   match(Set dst (CmpLTMask p q));
10047   effect(KILL cr);
10048 
10049   ins_cost(400);
10050   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10051             "setlt   $dst\n\t"
10052             "movzbl  $dst, $dst\n\t"
10053             "negl    $dst" %}
10054   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
10055              setLT_reg(dst),
10056              REX_reg_breg(dst, dst), // movzbl
10057              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10058              neg_reg(dst));
10059   ins_pipe(pipe_slow);
10060 %}
10061 
10062 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10063 %{
10064   match(Set dst (CmpLTMask dst zero));
10065   effect(KILL cr);
10066 
10067   ins_cost(100);
10068   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10069   ins_encode %{
10070   __ sarl($dst$$Register, 31);
10071   %}
10072   ins_pipe(ialu_reg);
10073 %}
10074 
10075 /* Better to save a register than avoid a branch */
10076 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10077 %{
10078   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10079   effect(KILL cr);
10080   ins_cost(300);
10081   format %{ "subl   $p,$q\t# cadd_cmpLTMask\n\t"
10082             "jge    done\n\t"
10083             "addl   $p,$y\n"
10084             "done:  " %}
10085   ins_encode %{
10086     Register Rp = $p$$Register;
10087     Register Rq = $q$$Register;
10088     Register Ry = $y$$Register;
10089     Label done;
10090     __ subl(Rp, Rq);
10091     __ jccb(Assembler::greaterEqual, done);
10092     __ addl(Rp, Ry);
10093     __ bind(done);
10094   %}
10095   ins_pipe(pipe_cmplt);
10096 %}
10097 
10098 /* Better to save a register than avoid a branch */
10099 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10100 %{
10101   match(Set y (AndI (CmpLTMask p q) y));
10102   effect(KILL cr);
10103 
10104   ins_cost(300);
10105 
10106   format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
10107             "jlt      done\n\t"
10108             "xorl     $y, $y\n"
10109             "done:  " %}
10110   ins_encode %{
10111     Register Rp = $p$$Register;
10112     Register Rq = $q$$Register;
10113     Register Ry = $y$$Register;
10114     Label done;
10115     __ cmpl(Rp, Rq);
10116     __ jccb(Assembler::less, done);
10117     __ xorl(Ry, Ry);
10118     __ bind(done);
10119   %}
10120   ins_pipe(pipe_cmplt);
10121 %}
10122 
10123 
10124 //---------- FP Instructions------------------------------------------------
10125 
10126 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10127 %{
10128   match(Set cr (CmpF src1 src2));
10129 
10130   ins_cost(145);
10131   format %{ "ucomiss $src1, $src2\n\t"
10132             "jnp,s   exit\n\t"
10133             "pushfq\t# saw NaN, set CF\n\t"
10134             "andq    [rsp], #0xffffff2b\n\t"
10135             "popfq\n"
10136     "exit:" %}
10137   ins_encode %{
10138     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10139     emit_cmpfp_fixup(_masm);
10140   %}
10141   ins_pipe(pipe_slow);
10142 %}
10143 
10144 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10145   match(Set cr (CmpF src1 src2));
10146 
10147   ins_cost(100);
10148   format %{ "ucomiss $src1, $src2" %}
10149   ins_encode %{
10150     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10151   %}
10152   ins_pipe(pipe_slow);
10153 %}
10154 
10155 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10156 %{
10157   match(Set cr (CmpF src1 (LoadF src2)));
10158 
10159   ins_cost(145);
10160   format %{ "ucomiss $src1, $src2\n\t"
10161             "jnp,s   exit\n\t"
10162             "pushfq\t# saw NaN, set CF\n\t"
10163             "andq    [rsp], #0xffffff2b\n\t"
10164             "popfq\n"
10165     "exit:" %}
10166   ins_encode %{
10167     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10168     emit_cmpfp_fixup(_masm);
10169   %}
10170   ins_pipe(pipe_slow);
10171 %}
10172 
10173 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10174   match(Set cr (CmpF src1 (LoadF src2)));
10175 
10176   ins_cost(100);
10177   format %{ "ucomiss $src1, $src2" %}
10178   ins_encode %{
10179     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10180   %}
10181   ins_pipe(pipe_slow);
10182 %}
10183 
10184 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10185   match(Set cr (CmpF src con));
10186 
10187   ins_cost(145);
10188   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10189             "jnp,s   exit\n\t"
10190             "pushfq\t# saw NaN, set CF\n\t"
10191             "andq    [rsp], #0xffffff2b\n\t"
10192             "popfq\n"
10193     "exit:" %}
10194   ins_encode %{
10195     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10196     emit_cmpfp_fixup(_masm);
10197   %}
10198   ins_pipe(pipe_slow);
10199 %}
10200 
10201 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10202   match(Set cr (CmpF src con));
10203   ins_cost(100);
10204   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10205   ins_encode %{
10206     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10207   %}
10208   ins_pipe(pipe_slow);
10209 %}
10210 
10211 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10212 %{
10213   match(Set cr (CmpD src1 src2));
10214 
10215   ins_cost(145);
10216   format %{ "ucomisd $src1, $src2\n\t"
10217             "jnp,s   exit\n\t"
10218             "pushfq\t# saw NaN, set CF\n\t"
10219             "andq    [rsp], #0xffffff2b\n\t"
10220             "popfq\n"
10221     "exit:" %}
10222   ins_encode %{
10223     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10224     emit_cmpfp_fixup(_masm);
10225   %}
10226   ins_pipe(pipe_slow);
10227 %}
10228 
10229 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10230   match(Set cr (CmpD src1 src2));
10231 
10232   ins_cost(100);
10233   format %{ "ucomisd $src1, $src2 test" %}
10234   ins_encode %{
10235     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10236   %}
10237   ins_pipe(pipe_slow);
10238 %}
10239 
10240 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10241 %{
10242   match(Set cr (CmpD src1 (LoadD src2)));
10243 
10244   ins_cost(145);
10245   format %{ "ucomisd $src1, $src2\n\t"
10246             "jnp,s   exit\n\t"
10247             "pushfq\t# saw NaN, set CF\n\t"
10248             "andq    [rsp], #0xffffff2b\n\t"
10249             "popfq\n"
10250     "exit:" %}
10251   ins_encode %{
10252     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10253     emit_cmpfp_fixup(_masm);
10254   %}
10255   ins_pipe(pipe_slow);
10256 %}
10257 
10258 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10259   match(Set cr (CmpD src1 (LoadD src2)));
10260 
10261   ins_cost(100);
10262   format %{ "ucomisd $src1, $src2" %}
10263   ins_encode %{
10264     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10265   %}
10266   ins_pipe(pipe_slow);
10267 %}
10268 
10269 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10270   match(Set cr (CmpD src con));
10271 
10272   ins_cost(145);
10273   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10274             "jnp,s   exit\n\t"
10275             "pushfq\t# saw NaN, set CF\n\t"
10276             "andq    [rsp], #0xffffff2b\n\t"
10277             "popfq\n"
10278     "exit:" %}
10279   ins_encode %{
10280     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10281     emit_cmpfp_fixup(_masm);
10282   %}
10283   ins_pipe(pipe_slow);
10284 %}
10285 
10286 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10287   match(Set cr (CmpD src con));
10288   ins_cost(100);
10289   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10290   ins_encode %{
10291     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10292   %}
10293   ins_pipe(pipe_slow);
10294 %}
10295 
10296 // Compare into -1,0,1
10297 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10298 %{
10299   match(Set dst (CmpF3 src1 src2));
10300   effect(KILL cr);
10301 
10302   ins_cost(275);
10303   format %{ "ucomiss $src1, $src2\n\t"
10304             "movl    $dst, #-1\n\t"
10305             "jp,s    done\n\t"
10306             "jb,s    done\n\t"
10307             "setne   $dst\n\t"
10308             "movzbl  $dst, $dst\n"
10309     "done:" %}
10310   ins_encode %{
10311     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10312     emit_cmpfp3(_masm, $dst$$Register);
10313   %}
10314   ins_pipe(pipe_slow);
10315 %}
10316 
10317 // Compare into -1,0,1
10318 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10319 %{
10320   match(Set dst (CmpF3 src1 (LoadF src2)));
10321   effect(KILL cr);
10322 
10323   ins_cost(275);
10324   format %{ "ucomiss $src1, $src2\n\t"
10325             "movl    $dst, #-1\n\t"
10326             "jp,s    done\n\t"
10327             "jb,s    done\n\t"
10328             "setne   $dst\n\t"
10329             "movzbl  $dst, $dst\n"
10330     "done:" %}
10331   ins_encode %{
10332     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10333     emit_cmpfp3(_masm, $dst$$Register);
10334   %}
10335   ins_pipe(pipe_slow);
10336 %}
10337 
10338 // Compare into -1,0,1
10339 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10340   match(Set dst (CmpF3 src con));
10341   effect(KILL cr);
10342 
10343   ins_cost(275);
10344   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10345             "movl    $dst, #-1\n\t"
10346             "jp,s    done\n\t"
10347             "jb,s    done\n\t"
10348             "setne   $dst\n\t"
10349             "movzbl  $dst, $dst\n"
10350     "done:" %}
10351   ins_encode %{
10352     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10353     emit_cmpfp3(_masm, $dst$$Register);
10354   %}
10355   ins_pipe(pipe_slow);
10356 %}
10357 
10358 // Compare into -1,0,1
10359 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10360 %{
10361   match(Set dst (CmpD3 src1 src2));
10362   effect(KILL cr);
10363 
10364   ins_cost(275);
10365   format %{ "ucomisd $src1, $src2\n\t"
10366             "movl    $dst, #-1\n\t"
10367             "jp,s    done\n\t"
10368             "jb,s    done\n\t"
10369             "setne   $dst\n\t"
10370             "movzbl  $dst, $dst\n"
10371     "done:" %}
10372   ins_encode %{
10373     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10374     emit_cmpfp3(_masm, $dst$$Register);
10375   %}
10376   ins_pipe(pipe_slow);
10377 %}
10378 
10379 // Compare into -1,0,1
10380 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10381 %{
10382   match(Set dst (CmpD3 src1 (LoadD src2)));
10383   effect(KILL cr);
10384 
10385   ins_cost(275);
10386   format %{ "ucomisd $src1, $src2\n\t"
10387             "movl    $dst, #-1\n\t"
10388             "jp,s    done\n\t"
10389             "jb,s    done\n\t"
10390             "setne   $dst\n\t"
10391             "movzbl  $dst, $dst\n"
10392     "done:" %}
10393   ins_encode %{
10394     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10395     emit_cmpfp3(_masm, $dst$$Register);
10396   %}
10397   ins_pipe(pipe_slow);
10398 %}
10399 
10400 // Compare into -1,0,1
10401 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10402   match(Set dst (CmpD3 src con));
10403   effect(KILL cr);
10404 
10405   ins_cost(275);
10406   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10407             "movl    $dst, #-1\n\t"
10408             "jp,s    done\n\t"
10409             "jb,s    done\n\t"
10410             "setne   $dst\n\t"
10411             "movzbl  $dst, $dst\n"
10412     "done:" %}
10413   ins_encode %{
10414     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10415     emit_cmpfp3(_masm, $dst$$Register);
10416   %}
10417   ins_pipe(pipe_slow);
10418 %}
10419 
10420 //----------Arithmetic Conversion Instructions---------------------------------
10421 
10422 instruct roundFloat_nop(regF dst)
10423 %{
10424   match(Set dst (RoundFloat dst));
10425 
10426   ins_cost(0);
10427   ins_encode();
10428   ins_pipe(empty);
10429 %}
10430 
10431 instruct roundDouble_nop(regD dst)
10432 %{
10433   match(Set dst (RoundDouble dst));
10434 
10435   ins_cost(0);
10436   ins_encode();
10437   ins_pipe(empty);
10438 %}
10439 
10440 instruct convF2D_reg_reg(regD dst, regF src)
10441 %{
10442   match(Set dst (ConvF2D src));
10443 
10444   format %{ "cvtss2sd $dst, $src" %}
10445   ins_encode %{
10446     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10447   %}
10448   ins_pipe(pipe_slow); // XXX
10449 %}
10450 
10451 instruct convF2D_reg_mem(regD dst, memory src)
10452 %{
10453   match(Set dst (ConvF2D (LoadF src)));
10454 
10455   format %{ "cvtss2sd $dst, $src" %}
10456   ins_encode %{
10457     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
10458   %}
10459   ins_pipe(pipe_slow); // XXX
10460 %}
10461 
10462 instruct convD2F_reg_reg(regF dst, regD src)
10463 %{
10464   match(Set dst (ConvD2F src));
10465 
10466   format %{ "cvtsd2ss $dst, $src" %}
10467   ins_encode %{
10468     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10469   %}
10470   ins_pipe(pipe_slow); // XXX
10471 %}
10472 
10473 instruct convD2F_reg_mem(regF dst, memory src)
10474 %{
10475   match(Set dst (ConvD2F (LoadD src)));
10476 
10477   format %{ "cvtsd2ss $dst, $src" %}
10478   ins_encode %{
10479     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
10480   %}
10481   ins_pipe(pipe_slow); // XXX
10482 %}
10483 
10484 // XXX do mem variants
10485 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10486 %{
10487   match(Set dst (ConvF2I src));
10488   effect(KILL cr);
10489 
10490   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10491             "cmpl    $dst, #0x80000000\n\t"
10492             "jne,s   done\n\t"
10493             "subq    rsp, #8\n\t"
10494             "movss   [rsp], $src\n\t"
10495             "call    f2i_fixup\n\t"
10496             "popq    $dst\n"
10497     "done:   "%}
10498   ins_encode %{
10499     Label done;
10500     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10501     __ cmpl($dst$$Register, 0x80000000);
10502     __ jccb(Assembler::notEqual, done);
10503     __ subptr(rsp, 8);
10504     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10505     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
10506     __ pop($dst$$Register);
10507     __ bind(done);
10508   %}
10509   ins_pipe(pipe_slow);
10510 %}
10511 
10512 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10513 %{
10514   match(Set dst (ConvF2L src));
10515   effect(KILL cr);
10516 
10517   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10518             "cmpq    $dst, [0x8000000000000000]\n\t"
10519             "jne,s   done\n\t"
10520             "subq    rsp, #8\n\t"
10521             "movss   [rsp], $src\n\t"
10522             "call    f2l_fixup\n\t"
10523             "popq    $dst\n"
10524     "done:   "%}
10525   ins_encode %{
10526     Label done;
10527     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
10528     __ cmp64($dst$$Register,
10529              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10530     __ jccb(Assembler::notEqual, done);
10531     __ subptr(rsp, 8);
10532     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10533     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
10534     __ pop($dst$$Register);
10535     __ bind(done);
10536   %}
10537   ins_pipe(pipe_slow);
10538 %}
10539 
10540 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10541 %{
10542   match(Set dst (ConvD2I src));
10543   effect(KILL cr);
10544 
10545   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10546             "cmpl    $dst, #0x80000000\n\t"
10547             "jne,s   done\n\t"
10548             "subq    rsp, #8\n\t"
10549             "movsd   [rsp], $src\n\t"
10550             "call    d2i_fixup\n\t"
10551             "popq    $dst\n"
10552     "done:   "%}
10553   ins_encode %{
10554     Label done;
10555     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10556     __ cmpl($dst$$Register, 0x80000000);
10557     __ jccb(Assembler::notEqual, done);
10558     __ subptr(rsp, 8);
10559     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10560     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
10561     __ pop($dst$$Register);
10562     __ bind(done);
10563   %}
10564   ins_pipe(pipe_slow);
10565 %}
10566 
10567 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10568 %{
10569   match(Set dst (ConvD2L src));
10570   effect(KILL cr);
10571 
10572   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10573             "cmpq    $dst, [0x8000000000000000]\n\t"
10574             "jne,s   done\n\t"
10575             "subq    rsp, #8\n\t"
10576             "movsd   [rsp], $src\n\t"
10577             "call    d2l_fixup\n\t"
10578             "popq    $dst\n"
10579     "done:   "%}
10580   ins_encode %{
10581     Label done;
10582     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
10583     __ cmp64($dst$$Register,
10584              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10585     __ jccb(Assembler::notEqual, done);
10586     __ subptr(rsp, 8);
10587     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10588     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
10589     __ pop($dst$$Register);
10590     __ bind(done);
10591   %}
10592   ins_pipe(pipe_slow);
10593 %}
10594 
10595 instruct convI2F_reg_reg(regF dst, rRegI src)
10596 %{
10597   predicate(!UseXmmI2F);
10598   match(Set dst (ConvI2F src));
10599 
10600   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10601   ins_encode %{
10602     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10603   %}
10604   ins_pipe(pipe_slow); // XXX
10605 %}
10606 
10607 instruct convI2F_reg_mem(regF dst, memory src)
10608 %{
10609   match(Set dst (ConvI2F (LoadI src)));
10610 
10611   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10612   ins_encode %{
10613     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10614   %}
10615   ins_pipe(pipe_slow); // XXX
10616 %}
10617 
10618 instruct convI2D_reg_reg(regD dst, rRegI src)
10619 %{
10620   predicate(!UseXmmI2D);
10621   match(Set dst (ConvI2D src));
10622 
10623   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10624   ins_encode %{
10625     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10626   %}
10627   ins_pipe(pipe_slow); // XXX
10628 %}
10629 
10630 instruct convI2D_reg_mem(regD dst, memory src)
10631 %{
10632   match(Set dst (ConvI2D (LoadI src)));
10633 
10634   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10635   ins_encode %{
10636     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10637   %}
10638   ins_pipe(pipe_slow); // XXX
10639 %}
10640 
10641 instruct convXI2F_reg(regF dst, rRegI src)
10642 %{
10643   predicate(UseXmmI2F);
10644   match(Set dst (ConvI2F src));
10645 
10646   format %{ "movdl $dst, $src\n\t"
10647             "cvtdq2psl $dst, $dst\t# i2f" %}
10648   ins_encode %{
10649     __ movdl($dst$$XMMRegister, $src$$Register);
10650     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10651   %}
10652   ins_pipe(pipe_slow); // XXX
10653 %}
10654 
10655 instruct convXI2D_reg(regD dst, rRegI src)
10656 %{
10657   predicate(UseXmmI2D);
10658   match(Set dst (ConvI2D src));
10659 
10660   format %{ "movdl $dst, $src\n\t"
10661             "cvtdq2pdl $dst, $dst\t# i2d" %}
10662   ins_encode %{
10663     __ movdl($dst$$XMMRegister, $src$$Register);
10664     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10665   %}
10666   ins_pipe(pipe_slow); // XXX
10667 %}
10668 
10669 instruct convL2F_reg_reg(regF dst, rRegL src)
10670 %{
10671   match(Set dst (ConvL2F src));
10672 
10673   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10674   ins_encode %{
10675     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10676   %}
10677   ins_pipe(pipe_slow); // XXX
10678 %}
10679 
10680 instruct convL2F_reg_mem(regF dst, memory src)
10681 %{
10682   match(Set dst (ConvL2F (LoadL src)));
10683 
10684   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10685   ins_encode %{
10686     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10687   %}
10688   ins_pipe(pipe_slow); // XXX
10689 %}
10690 
10691 instruct convL2D_reg_reg(regD dst, rRegL src)
10692 %{
10693   match(Set dst (ConvL2D src));
10694 
10695   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10696   ins_encode %{
10697     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10698   %}
10699   ins_pipe(pipe_slow); // XXX
10700 %}
10701 
10702 instruct convL2D_reg_mem(regD dst, memory src)
10703 %{
10704   match(Set dst (ConvL2D (LoadL src)));
10705 
10706   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10707   ins_encode %{
10708     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10709   %}
10710   ins_pipe(pipe_slow); // XXX
10711 %}
10712 
10713 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10714 %{
10715   match(Set dst (ConvI2L src));
10716 
10717   ins_cost(125);
10718   format %{ "movslq  $dst, $src\t# i2l" %}
10719   ins_encode %{
10720     __ movslq($dst$$Register, $src$$Register);
10721   %}
10722   ins_pipe(ialu_reg_reg);
10723 %}
10724 
10725 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10726 // %{
10727 //   match(Set dst (ConvI2L src));
10728 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10729 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10730 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10731 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10732 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10733 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10734 
10735 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10736 //   ins_encode(enc_copy(dst, src));
10737 // //   opcode(0x63); // needs REX.W
10738 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10739 //   ins_pipe(ialu_reg_reg);
10740 // %}
10741 
10742 // Zero-extend convert int to long
10743 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10744 %{
10745   match(Set dst (AndL (ConvI2L src) mask));
10746 
10747   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10748   ins_encode %{
10749     if ($dst$$reg != $src$$reg) {
10750       __ movl($dst$$Register, $src$$Register);
10751     }
10752   %}
10753   ins_pipe(ialu_reg_reg);
10754 %}
10755 
10756 // Zero-extend convert int to long
10757 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10758 %{
10759   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10760 
10761   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10762   ins_encode %{
10763     __ movl($dst$$Register, $src$$Address);
10764   %}
10765   ins_pipe(ialu_reg_mem);
10766 %}
10767 
10768 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10769 %{
10770   match(Set dst (AndL src mask));
10771 
10772   format %{ "movl    $dst, $src\t# zero-extend long" %}
10773   ins_encode %{
10774     __ movl($dst$$Register, $src$$Register);
10775   %}
10776   ins_pipe(ialu_reg_reg);
10777 %}
10778 
10779 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10780 %{
10781   match(Set dst (ConvL2I src));
10782 
10783   format %{ "movl    $dst, $src\t# l2i" %}
10784   ins_encode %{
10785     __ movl($dst$$Register, $src$$Register);
10786   %}
10787   ins_pipe(ialu_reg_reg);
10788 %}
10789 
10790 
10791 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10792   match(Set dst (MoveF2I src));
10793   effect(DEF dst, USE src);
10794 
10795   ins_cost(125);
10796   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10797   ins_encode %{
10798     __ movl($dst$$Register, Address(rsp, $src$$disp));
10799   %}
10800   ins_pipe(ialu_reg_mem);
10801 %}
10802 
10803 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10804   match(Set dst (MoveI2F src));
10805   effect(DEF dst, USE src);
10806 
10807   ins_cost(125);
10808   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10809   ins_encode %{
10810     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10811   %}
10812   ins_pipe(pipe_slow);
10813 %}
10814 
10815 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10816   match(Set dst (MoveD2L src));
10817   effect(DEF dst, USE src);
10818 
10819   ins_cost(125);
10820   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10821   ins_encode %{
10822     __ movq($dst$$Register, Address(rsp, $src$$disp));
10823   %}
10824   ins_pipe(ialu_reg_mem);
10825 %}
10826 
10827 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10828   predicate(!UseXmmLoadAndClearUpper);
10829   match(Set dst (MoveL2D src));
10830   effect(DEF dst, USE src);
10831 
10832   ins_cost(125);
10833   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10834   ins_encode %{
10835     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10836   %}
10837   ins_pipe(pipe_slow);
10838 %}
10839 
10840 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10841   predicate(UseXmmLoadAndClearUpper);
10842   match(Set dst (MoveL2D src));
10843   effect(DEF dst, USE src);
10844 
10845   ins_cost(125);
10846   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10847   ins_encode %{
10848     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10849   %}
10850   ins_pipe(pipe_slow);
10851 %}
10852 
10853 
10854 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10855   match(Set dst (MoveF2I src));
10856   effect(DEF dst, USE src);
10857 
10858   ins_cost(95); // XXX
10859   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10860   ins_encode %{
10861     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10862   %}
10863   ins_pipe(pipe_slow);
10864 %}
10865 
10866 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10867   match(Set dst (MoveI2F src));
10868   effect(DEF dst, USE src);
10869 
10870   ins_cost(100);
10871   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10872   ins_encode %{
10873     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10874   %}
10875   ins_pipe( ialu_mem_reg );
10876 %}
10877 
10878 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10879   match(Set dst (MoveD2L src));
10880   effect(DEF dst, USE src);
10881 
10882   ins_cost(95); // XXX
10883   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10884   ins_encode %{
10885     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10886   %}
10887   ins_pipe(pipe_slow);
10888 %}
10889 
10890 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10891   match(Set dst (MoveL2D src));
10892   effect(DEF dst, USE src);
10893 
10894   ins_cost(100);
10895   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10896   ins_encode %{
10897     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10898   %}
10899   ins_pipe(ialu_mem_reg);
10900 %}
10901 
10902 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10903   match(Set dst (MoveF2I src));
10904   effect(DEF dst, USE src);
10905   ins_cost(85);
10906   format %{ "movd    $dst,$src\t# MoveF2I" %}
10907   ins_encode %{
10908     __ movdl($dst$$Register, $src$$XMMRegister);
10909   %}
10910   ins_pipe( pipe_slow );
10911 %}
10912 
10913 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10914   match(Set dst (MoveD2L src));
10915   effect(DEF dst, USE src);
10916   ins_cost(85);
10917   format %{ "movd    $dst,$src\t# MoveD2L" %}
10918   ins_encode %{
10919     __ movdq($dst$$Register, $src$$XMMRegister);
10920   %}
10921   ins_pipe( pipe_slow );
10922 %}
10923 
10924 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10925   match(Set dst (MoveI2F src));
10926   effect(DEF dst, USE src);
10927   ins_cost(100);
10928   format %{ "movd    $dst,$src\t# MoveI2F" %}
10929   ins_encode %{
10930     __ movdl($dst$$XMMRegister, $src$$Register);
10931   %}
10932   ins_pipe( pipe_slow );
10933 %}
10934 
10935 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10936   match(Set dst (MoveL2D src));
10937   effect(DEF dst, USE src);
10938   ins_cost(100);
10939   format %{ "movd    $dst,$src\t# MoveL2D" %}
10940   ins_encode %{
10941      __ movdq($dst$$XMMRegister, $src$$Register);
10942   %}
10943   ins_pipe( pipe_slow );
10944 %}
10945 
10946 
10947 // =======================================================================
10948 // fast clearing of an array
10949 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10950                   Universe dummy, rFlagsReg cr)
10951 %{
10952   predicate(!((ClearArrayNode*)n)->is_large());
10953   match(Set dummy (ClearArray cnt base));
10954   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10955 
10956   format %{ $$template
10957     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10958     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10959     $$emit$$"jg      LARGE\n\t"
10960     $$emit$$"dec     rcx\n\t"
10961     $$emit$$"js      DONE\t# Zero length\n\t"
10962     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10963     $$emit$$"dec     rcx\n\t"
10964     $$emit$$"jge     LOOP\n\t"
10965     $$emit$$"jmp     DONE\n\t"
10966     $$emit$$"# LARGE:\n\t"
10967     if (UseFastStosb) {
10968        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10969        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10970     } else if (UseXMMForObjInit) {
10971        $$emit$$"mov     rdi,rax\n\t"
10972        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10973        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10974        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10975        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10976        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10977        $$emit$$"add     0x40,rax\n\t"
10978        $$emit$$"# L_zero_64_bytes:\n\t"
10979        $$emit$$"sub     0x8,rcx\n\t"
10980        $$emit$$"jge     L_loop\n\t"
10981        $$emit$$"add     0x4,rcx\n\t"
10982        $$emit$$"jl      L_tail\n\t"
10983        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10984        $$emit$$"add     0x20,rax\n\t"
10985        $$emit$$"sub     0x4,rcx\n\t"
10986        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10987        $$emit$$"add     0x4,rcx\n\t"
10988        $$emit$$"jle     L_end\n\t"
10989        $$emit$$"dec     rcx\n\t"
10990        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10991        $$emit$$"vmovq   xmm0,(rax)\n\t"
10992        $$emit$$"add     0x8,rax\n\t"
10993        $$emit$$"dec     rcx\n\t"
10994        $$emit$$"jge     L_sloop\n\t"
10995        $$emit$$"# L_end:\n\t"
10996     } else {
10997        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10998     }
10999     $$emit$$"# DONE"
11000   %}
11001   ins_encode %{
11002     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11003                  $tmp$$XMMRegister, false);
11004   %}
11005   ins_pipe(pipe_slow);
11006 %}
11007 
11008 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11009                         Universe dummy, rFlagsReg cr)
11010 %{
11011   predicate(((ClearArrayNode*)n)->is_large());
11012   match(Set dummy (ClearArray cnt base));
11013   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11014 
11015   format %{ $$template
11016     if (UseFastStosb) {
11017        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11018        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11019        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11020     } else if (UseXMMForObjInit) {
11021        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11022        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11023        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11024        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11025        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11026        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11027        $$emit$$"add     0x40,rax\n\t"
11028        $$emit$$"# L_zero_64_bytes:\n\t"
11029        $$emit$$"sub     0x8,rcx\n\t"
11030        $$emit$$"jge     L_loop\n\t"
11031        $$emit$$"add     0x4,rcx\n\t"
11032        $$emit$$"jl      L_tail\n\t"
11033        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11034        $$emit$$"add     0x20,rax\n\t"
11035        $$emit$$"sub     0x4,rcx\n\t"
11036        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11037        $$emit$$"add     0x4,rcx\n\t"
11038        $$emit$$"jle     L_end\n\t"
11039        $$emit$$"dec     rcx\n\t"
11040        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11041        $$emit$$"vmovq   xmm0,(rax)\n\t"
11042        $$emit$$"add     0x8,rax\n\t"
11043        $$emit$$"dec     rcx\n\t"
11044        $$emit$$"jge     L_sloop\n\t"
11045        $$emit$$"# L_end:\n\t"
11046     } else {
11047        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11048        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11049     }
11050   %}
11051   ins_encode %{
11052     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11053                  $tmp$$XMMRegister, true);
11054   %}
11055   ins_pipe(pipe_slow);
11056 %}
11057 
11058 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11059                          rax_RegI result, legVecS tmp1, rFlagsReg cr)
11060 %{
11061   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11062   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11063   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11064 
11065   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11066   ins_encode %{
11067     __ string_compare($str1$$Register, $str2$$Register,
11068                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11069                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11070   %}
11071   ins_pipe( pipe_slow );
11072 %}
11073 
11074 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11075                          rax_RegI result, legVecS tmp1, rFlagsReg cr)
11076 %{
11077   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11078   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11079   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11080 
11081   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11082   ins_encode %{
11083     __ string_compare($str1$$Register, $str2$$Register,
11084                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11085                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11086   %}
11087   ins_pipe( pipe_slow );
11088 %}
11089 
11090 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11091                           rax_RegI result, legVecS tmp1, rFlagsReg cr)
11092 %{
11093   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11094   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11095   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11096 
11097   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11098   ins_encode %{
11099     __ string_compare($str1$$Register, $str2$$Register,
11100                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11101                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11102   %}
11103   ins_pipe( pipe_slow );
11104 %}
11105 
11106 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11107                           rax_RegI result, legVecS tmp1, rFlagsReg cr)
11108 %{
11109   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11110   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11111   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11112 
11113   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11114   ins_encode %{
11115     __ string_compare($str2$$Register, $str1$$Register,
11116                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11117                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11118   %}
11119   ins_pipe( pipe_slow );
11120 %}
11121 
11122 // fast search of substring with known size.
11123 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11124                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11125 %{
11126   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11127   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11128   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11129 
11130   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11131   ins_encode %{
11132     int icnt2 = (int)$int_cnt2$$constant;
11133     if (icnt2 >= 16) {
11134       // IndexOf for constant substrings with size >= 16 elements
11135       // which don't need to be loaded through stack.
11136       __ string_indexofC8($str1$$Register, $str2$$Register,
11137                           $cnt1$$Register, $cnt2$$Register,
11138                           icnt2, $result$$Register,
11139                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11140     } else {
11141       // Small strings are loaded through stack if they cross page boundary.
11142       __ string_indexof($str1$$Register, $str2$$Register,
11143                         $cnt1$$Register, $cnt2$$Register,
11144                         icnt2, $result$$Register,
11145                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11146     }
11147   %}
11148   ins_pipe( pipe_slow );
11149 %}
11150 
11151 // fast search of substring with known size.
11152 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11153                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11154 %{
11155   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11156   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11157   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11158 
11159   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11160   ins_encode %{
11161     int icnt2 = (int)$int_cnt2$$constant;
11162     if (icnt2 >= 8) {
11163       // IndexOf for constant substrings with size >= 8 elements
11164       // which don't need to be loaded through stack.
11165       __ string_indexofC8($str1$$Register, $str2$$Register,
11166                           $cnt1$$Register, $cnt2$$Register,
11167                           icnt2, $result$$Register,
11168                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11169     } else {
11170       // Small strings are loaded through stack if they cross page boundary.
11171       __ string_indexof($str1$$Register, $str2$$Register,
11172                         $cnt1$$Register, $cnt2$$Register,
11173                         icnt2, $result$$Register,
11174                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11175     }
11176   %}
11177   ins_pipe( pipe_slow );
11178 %}
11179 
11180 // fast search of substring with known size.
11181 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11182                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11183 %{
11184   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11185   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11186   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11187 
11188   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11189   ins_encode %{
11190     int icnt2 = (int)$int_cnt2$$constant;
11191     if (icnt2 >= 8) {
11192       // IndexOf for constant substrings with size >= 8 elements
11193       // which don't need to be loaded through stack.
11194       __ string_indexofC8($str1$$Register, $str2$$Register,
11195                           $cnt1$$Register, $cnt2$$Register,
11196                           icnt2, $result$$Register,
11197                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11198     } else {
11199       // Small strings are loaded through stack if they cross page boundary.
11200       __ string_indexof($str1$$Register, $str2$$Register,
11201                         $cnt1$$Register, $cnt2$$Register,
11202                         icnt2, $result$$Register,
11203                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11204     }
11205   %}
11206   ins_pipe( pipe_slow );
11207 %}
11208 
11209 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11210                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11211 %{
11212   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11213   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11214   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11215 
11216   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11217   ins_encode %{
11218     __ string_indexof($str1$$Register, $str2$$Register,
11219                       $cnt1$$Register, $cnt2$$Register,
11220                       (-1), $result$$Register,
11221                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11222   %}
11223   ins_pipe( pipe_slow );
11224 %}
11225 
11226 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11227                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11228 %{
11229   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11230   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11231   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11232 
11233   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11234   ins_encode %{
11235     __ string_indexof($str1$$Register, $str2$$Register,
11236                       $cnt1$$Register, $cnt2$$Register,
11237                       (-1), $result$$Register,
11238                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11239   %}
11240   ins_pipe( pipe_slow );
11241 %}
11242 
11243 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11244                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11245 %{
11246   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11247   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11248   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11249 
11250   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11251   ins_encode %{
11252     __ string_indexof($str1$$Register, $str2$$Register,
11253                       $cnt1$$Register, $cnt2$$Register,
11254                       (-1), $result$$Register,
11255                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11256   %}
11257   ins_pipe( pipe_slow );
11258 %}
11259 
11260 instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11261                               rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
11262 %{
11263   predicate(UseSSE42Intrinsics);
11264   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11265   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11266   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11267   ins_encode %{
11268     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11269                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11270   %}
11271   ins_pipe( pipe_slow );
11272 %}
11273 
11274 // fast string equals
11275 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11276                        legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
11277 %{
11278   match(Set result (StrEquals (Binary str1 str2) cnt));
11279   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11280 
11281   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11282   ins_encode %{
11283     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11284                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11285                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11286   %}
11287   ins_pipe( pipe_slow );
11288 %}
11289 
11290 // fast array equals
11291 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11292                        legVecS tmp1, legVecS tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11293 %{
11294   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11295   match(Set result (AryEq ary1 ary2));
11296   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11297 
11298   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11299   ins_encode %{
11300     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11301                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11302                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11303   %}
11304   ins_pipe( pipe_slow );
11305 %}
11306 
11307 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11308                       legVecS tmp1, legVecS tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11309 %{
11310   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11311   match(Set result (AryEq ary1 ary2));
11312   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11313 
11314   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11315   ins_encode %{
11316     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11317                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11318                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11319   %}
11320   ins_pipe( pipe_slow );
11321 %}
11322 
11323 instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
11324                       legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
11325 %{
11326   match(Set result (HasNegatives ary1 len));
11327   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11328 
11329   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11330   ins_encode %{
11331     __ has_negatives($ary1$$Register, $len$$Register,
11332                      $result$$Register, $tmp3$$Register,
11333                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11334   %}
11335   ins_pipe( pipe_slow );
11336 %}
11337 
11338 // fast char[] to byte[] compression
11339 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legVecS tmp1, legVecS tmp2, legVecS tmp3, legVecS tmp4,
11340                          rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11341   match(Set result (StrCompressedCopy src (Binary dst len)));
11342   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11343 
11344   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11345   ins_encode %{
11346     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11347                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11348                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11349   %}
11350   ins_pipe( pipe_slow );
11351 %}
11352 
11353 // fast byte[] to char[] inflation
11354 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11355                         legVecS tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
11356   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11357   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11358 
11359   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11360   ins_encode %{
11361     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11362                           $tmp1$$XMMRegister, $tmp2$$Register);
11363   %}
11364   ins_pipe( pipe_slow );
11365 %}
11366 
11367 // encode char[] to byte[] in ISO_8859_1
11368 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11369                           legVecS tmp1, legVecS tmp2, legVecS tmp3, legVecS tmp4,
11370                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11371   match(Set result (EncodeISOArray src (Binary dst len)));
11372   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11373 
11374   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
11375   ins_encode %{
11376     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11377                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11378                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11379   %}
11380   ins_pipe( pipe_slow );
11381 %}
11382 
11383 //----------Overflow Math Instructions-----------------------------------------
11384 
11385 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11386 %{
11387   match(Set cr (OverflowAddI op1 op2));
11388   effect(DEF cr, USE_KILL op1, USE op2);
11389 
11390   format %{ "addl    $op1, $op2\t# overflow check int" %}
11391 
11392   ins_encode %{
11393     __ addl($op1$$Register, $op2$$Register);
11394   %}
11395   ins_pipe(ialu_reg_reg);
11396 %}
11397 
11398 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
11399 %{
11400   match(Set cr (OverflowAddI op1 op2));
11401   effect(DEF cr, USE_KILL op1, USE op2);
11402 
11403   format %{ "addl    $op1, $op2\t# overflow check int" %}
11404 
11405   ins_encode %{
11406     __ addl($op1$$Register, $op2$$constant);
11407   %}
11408   ins_pipe(ialu_reg_reg);
11409 %}
11410 
11411 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11412 %{
11413   match(Set cr (OverflowAddL op1 op2));
11414   effect(DEF cr, USE_KILL op1, USE op2);
11415 
11416   format %{ "addq    $op1, $op2\t# overflow check long" %}
11417   ins_encode %{
11418     __ addq($op1$$Register, $op2$$Register);
11419   %}
11420   ins_pipe(ialu_reg_reg);
11421 %}
11422 
11423 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
11424 %{
11425   match(Set cr (OverflowAddL op1 op2));
11426   effect(DEF cr, USE_KILL op1, USE op2);
11427 
11428   format %{ "addq    $op1, $op2\t# overflow check long" %}
11429   ins_encode %{
11430     __ addq($op1$$Register, $op2$$constant);
11431   %}
11432   ins_pipe(ialu_reg_reg);
11433 %}
11434 
11435 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11436 %{
11437   match(Set cr (OverflowSubI op1 op2));
11438 
11439   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11440   ins_encode %{
11441     __ cmpl($op1$$Register, $op2$$Register);
11442   %}
11443   ins_pipe(ialu_reg_reg);
11444 %}
11445 
11446 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11447 %{
11448   match(Set cr (OverflowSubI op1 op2));
11449 
11450   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11451   ins_encode %{
11452     __ cmpl($op1$$Register, $op2$$constant);
11453   %}
11454   ins_pipe(ialu_reg_reg);
11455 %}
11456 
11457 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11458 %{
11459   match(Set cr (OverflowSubL op1 op2));
11460 
11461   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11462   ins_encode %{
11463     __ cmpq($op1$$Register, $op2$$Register);
11464   %}
11465   ins_pipe(ialu_reg_reg);
11466 %}
11467 
11468 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11469 %{
11470   match(Set cr (OverflowSubL op1 op2));
11471 
11472   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11473   ins_encode %{
11474     __ cmpq($op1$$Register, $op2$$constant);
11475   %}
11476   ins_pipe(ialu_reg_reg);
11477 %}
11478 
11479 instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
11480 %{
11481   match(Set cr (OverflowSubI zero op2));
11482   effect(DEF cr, USE_KILL op2);
11483 
11484   format %{ "negl    $op2\t# overflow check int" %}
11485   ins_encode %{
11486     __ negl($op2$$Register);
11487   %}
11488   ins_pipe(ialu_reg_reg);
11489 %}
11490 
11491 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
11492 %{
11493   match(Set cr (OverflowSubL zero op2));
11494   effect(DEF cr, USE_KILL op2);
11495 
11496   format %{ "negq    $op2\t# overflow check long" %}
11497   ins_encode %{
11498     __ negq($op2$$Register);
11499   %}
11500   ins_pipe(ialu_reg_reg);
11501 %}
11502 
11503 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11504 %{
11505   match(Set cr (OverflowMulI op1 op2));
11506   effect(DEF cr, USE_KILL op1, USE op2);
11507 
11508   format %{ "imull    $op1, $op2\t# overflow check int" %}
11509   ins_encode %{
11510     __ imull($op1$$Register, $op2$$Register);
11511   %}
11512   ins_pipe(ialu_reg_reg_alu0);
11513 %}
11514 
11515 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
11516 %{
11517   match(Set cr (OverflowMulI op1 op2));
11518   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11519 
11520   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
11521   ins_encode %{
11522     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
11523   %}
11524   ins_pipe(ialu_reg_reg_alu0);
11525 %}
11526 
11527 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11528 %{
11529   match(Set cr (OverflowMulL op1 op2));
11530   effect(DEF cr, USE_KILL op1, USE op2);
11531 
11532   format %{ "imulq    $op1, $op2\t# overflow check long" %}
11533   ins_encode %{
11534     __ imulq($op1$$Register, $op2$$Register);
11535   %}
11536   ins_pipe(ialu_reg_reg_alu0);
11537 %}
11538 
11539 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
11540 %{
11541   match(Set cr (OverflowMulL op1 op2));
11542   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11543 
11544   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
11545   ins_encode %{
11546     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
11547   %}
11548   ins_pipe(ialu_reg_reg_alu0);
11549 %}
11550 
11551 
11552 //----------Control Flow Instructions------------------------------------------
11553 // Signed compare Instructions
11554 
11555 // XXX more variants!!
11556 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11557 %{
11558   match(Set cr (CmpI op1 op2));
11559   effect(DEF cr, USE op1, USE op2);
11560 
11561   format %{ "cmpl    $op1, $op2" %}
11562   opcode(0x3B);  /* Opcode 3B /r */
11563   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11564   ins_pipe(ialu_cr_reg_reg);
11565 %}
11566 
11567 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11568 %{
11569   match(Set cr (CmpI op1 op2));
11570 
11571   format %{ "cmpl    $op1, $op2" %}
11572   opcode(0x81, 0x07); /* Opcode 81 /7 */
11573   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11574   ins_pipe(ialu_cr_reg_imm);
11575 %}
11576 
11577 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11578 %{
11579   match(Set cr (CmpI op1 (LoadI op2)));
11580 
11581   ins_cost(500); // XXX
11582   format %{ "cmpl    $op1, $op2" %}
11583   opcode(0x3B); /* Opcode 3B /r */
11584   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11585   ins_pipe(ialu_cr_reg_mem);
11586 %}
11587 
11588 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11589 %{
11590   match(Set cr (CmpI src zero));
11591 
11592   format %{ "testl   $src, $src" %}
11593   opcode(0x85);
11594   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11595   ins_pipe(ialu_cr_reg_imm);
11596 %}
11597 
11598 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11599 %{
11600   match(Set cr (CmpI (AndI src con) zero));
11601 
11602   format %{ "testl   $src, $con" %}
11603   opcode(0xF7, 0x00);
11604   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11605   ins_pipe(ialu_cr_reg_imm);
11606 %}
11607 
11608 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11609 %{
11610   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11611 
11612   format %{ "testl   $src, $mem" %}
11613   opcode(0x85);
11614   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11615   ins_pipe(ialu_cr_reg_mem);
11616 %}
11617 
11618 // Unsigned compare Instructions; really, same as signed except they
11619 // produce an rFlagsRegU instead of rFlagsReg.
11620 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11621 %{
11622   match(Set cr (CmpU op1 op2));
11623 
11624   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11625   opcode(0x3B); /* Opcode 3B /r */
11626   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11627   ins_pipe(ialu_cr_reg_reg);
11628 %}
11629 
11630 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11631 %{
11632   match(Set cr (CmpU op1 op2));
11633 
11634   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11635   opcode(0x81,0x07); /* Opcode 81 /7 */
11636   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11637   ins_pipe(ialu_cr_reg_imm);
11638 %}
11639 
11640 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11641 %{
11642   match(Set cr (CmpU op1 (LoadI op2)));
11643 
11644   ins_cost(500); // XXX
11645   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11646   opcode(0x3B); /* Opcode 3B /r */
11647   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11648   ins_pipe(ialu_cr_reg_mem);
11649 %}
11650 
11651 // // // Cisc-spilled version of cmpU_rReg
11652 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11653 // //%{
11654 // //  match(Set cr (CmpU (LoadI op1) op2));
11655 // //
11656 // //  format %{ "CMPu   $op1,$op2" %}
11657 // //  ins_cost(500);
11658 // //  opcode(0x39);  /* Opcode 39 /r */
11659 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11660 // //%}
11661 
11662 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11663 %{
11664   match(Set cr (CmpU src zero));
11665 
11666   format %{ "testl  $src, $src\t# unsigned" %}
11667   opcode(0x85);
11668   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11669   ins_pipe(ialu_cr_reg_imm);
11670 %}
11671 
11672 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11673 %{
11674   match(Set cr (CmpP op1 op2));
11675 
11676   format %{ "cmpq    $op1, $op2\t# ptr" %}
11677   opcode(0x3B); /* Opcode 3B /r */
11678   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11679   ins_pipe(ialu_cr_reg_reg);
11680 %}
11681 
11682 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11683 %{
11684   match(Set cr (CmpP op1 (LoadP op2)));
11685 
11686   ins_cost(500); // XXX
11687   format %{ "cmpq    $op1, $op2\t# ptr" %}
11688   opcode(0x3B); /* Opcode 3B /r */
11689   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11690   ins_pipe(ialu_cr_reg_mem);
11691 %}
11692 
11693 // // // Cisc-spilled version of cmpP_rReg
11694 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11695 // //%{
11696 // //  match(Set cr (CmpP (LoadP op1) op2));
11697 // //
11698 // //  format %{ "CMPu   $op1,$op2" %}
11699 // //  ins_cost(500);
11700 // //  opcode(0x39);  /* Opcode 39 /r */
11701 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11702 // //%}
11703 
11704 // XXX this is generalized by compP_rReg_mem???
11705 // Compare raw pointer (used in out-of-heap check).
11706 // Only works because non-oop pointers must be raw pointers
11707 // and raw pointers have no anti-dependencies.
11708 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11709 %{
11710   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
11711   match(Set cr (CmpP op1 (LoadP op2)));
11712 
11713   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11714   opcode(0x3B); /* Opcode 3B /r */
11715   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11716   ins_pipe(ialu_cr_reg_mem);
11717 %}
11718 
11719 // This will generate a signed flags result. This should be OK since
11720 // any compare to a zero should be eq/neq.
11721 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11722 %{
11723   match(Set cr (CmpP src zero));
11724 
11725   format %{ "testq   $src, $src\t# ptr" %}
11726   opcode(0x85);
11727   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11728   ins_pipe(ialu_cr_reg_imm);
11729 %}
11730 
11731 // This will generate a signed flags result. This should be OK since
11732 // any compare to a zero should be eq/neq.
11733 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11734 %{
11735   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11736   match(Set cr (CmpP (LoadP op) zero));
11737 
11738   ins_cost(500); // XXX
11739   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11740   opcode(0xF7); /* Opcode F7 /0 */
11741   ins_encode(REX_mem_wide(op),
11742              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11743   ins_pipe(ialu_cr_reg_imm);
11744 %}
11745 
11746 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11747 %{
11748   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
11749   match(Set cr (CmpP (LoadP mem) zero));
11750 
11751   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11752   ins_encode %{
11753     __ cmpq(r12, $mem$$Address);
11754   %}
11755   ins_pipe(ialu_cr_reg_mem);
11756 %}
11757 
11758 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11759 %{
11760   match(Set cr (CmpN op1 op2));
11761 
11762   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11763   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11764   ins_pipe(ialu_cr_reg_reg);
11765 %}
11766 
11767 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11768 %{
11769   match(Set cr (CmpN src (LoadN mem)));
11770 
11771   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11772   ins_encode %{
11773     __ cmpl($src$$Register, $mem$$Address);
11774   %}
11775   ins_pipe(ialu_cr_reg_mem);
11776 %}
11777 
11778 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11779   match(Set cr (CmpN op1 op2));
11780 
11781   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11782   ins_encode %{
11783     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11784   %}
11785   ins_pipe(ialu_cr_reg_imm);
11786 %}
11787 
11788 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11789 %{
11790   match(Set cr (CmpN src (LoadN mem)));
11791 
11792   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11793   ins_encode %{
11794     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11795   %}
11796   ins_pipe(ialu_cr_reg_mem);
11797 %}
11798 
11799 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
11800   match(Set cr (CmpN op1 op2));
11801 
11802   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
11803   ins_encode %{
11804     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
11805   %}
11806   ins_pipe(ialu_cr_reg_imm);
11807 %}
11808 
11809 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
11810 %{
11811   match(Set cr (CmpN src (LoadNKlass mem)));
11812 
11813   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
11814   ins_encode %{
11815     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
11816   %}
11817   ins_pipe(ialu_cr_reg_mem);
11818 %}
11819 
11820 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11821   match(Set cr (CmpN src zero));
11822 
11823   format %{ "testl   $src, $src\t# compressed ptr" %}
11824   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11825   ins_pipe(ialu_cr_reg_imm);
11826 %}
11827 
11828 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11829 %{
11830   predicate(Universe::narrow_oop_base() != NULL);
11831   match(Set cr (CmpN (LoadN mem) zero));
11832 
11833   ins_cost(500); // XXX
11834   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11835   ins_encode %{
11836     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11837   %}
11838   ins_pipe(ialu_cr_reg_mem);
11839 %}
11840 
11841 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11842 %{
11843   predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
11844   match(Set cr (CmpN (LoadN mem) zero));
11845 
11846   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11847   ins_encode %{
11848     __ cmpl(r12, $mem$$Address);
11849   %}
11850   ins_pipe(ialu_cr_reg_mem);
11851 %}
11852 
11853 // Yanked all unsigned pointer compare operations.
11854 // Pointer compares are done with CmpP which is already unsigned.
11855 
11856 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11857 %{
11858   match(Set cr (CmpL op1 op2));
11859 
11860   format %{ "cmpq    $op1, $op2" %}
11861   opcode(0x3B);  /* Opcode 3B /r */
11862   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11863   ins_pipe(ialu_cr_reg_reg);
11864 %}
11865 
11866 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11867 %{
11868   match(Set cr (CmpL op1 op2));
11869 
11870   format %{ "cmpq    $op1, $op2" %}
11871   opcode(0x81, 0x07); /* Opcode 81 /7 */
11872   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11873   ins_pipe(ialu_cr_reg_imm);
11874 %}
11875 
11876 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11877 %{
11878   match(Set cr (CmpL op1 (LoadL op2)));
11879 
11880   format %{ "cmpq    $op1, $op2" %}
11881   opcode(0x3B); /* Opcode 3B /r */
11882   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11883   ins_pipe(ialu_cr_reg_mem);
11884 %}
11885 
11886 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11887 %{
11888   match(Set cr (CmpL src zero));
11889 
11890   format %{ "testq   $src, $src" %}
11891   opcode(0x85);
11892   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11893   ins_pipe(ialu_cr_reg_imm);
11894 %}
11895 
11896 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11897 %{
11898   match(Set cr (CmpL (AndL src con) zero));
11899 
11900   format %{ "testq   $src, $con\t# long" %}
11901   opcode(0xF7, 0x00);
11902   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11903   ins_pipe(ialu_cr_reg_imm);
11904 %}
11905 
11906 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11907 %{
11908   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11909 
11910   format %{ "testq   $src, $mem" %}
11911   opcode(0x85);
11912   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11913   ins_pipe(ialu_cr_reg_mem);
11914 %}
11915 
11916 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
11917 %{
11918   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
11919 
11920   format %{ "testq   $src, $mem" %}
11921   opcode(0x85);
11922   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11923   ins_pipe(ialu_cr_reg_mem);
11924 %}
11925 
11926 // Manifest a CmpL result in an integer register.  Very painful.
11927 // This is the test to avoid.
11928 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11929 %{
11930   match(Set dst (CmpL3 src1 src2));
11931   effect(KILL flags);
11932 
11933   ins_cost(275); // XXX
11934   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11935             "movl    $dst, -1\n\t"
11936             "jl,s    done\n\t"
11937             "setne   $dst\n\t"
11938             "movzbl  $dst, $dst\n\t"
11939     "done:" %}
11940   ins_encode(cmpl3_flag(src1, src2, dst));
11941   ins_pipe(pipe_slow);
11942 %}
11943 
11944 // Unsigned long compare Instructions; really, same as signed long except they
11945 // produce an rFlagsRegU instead of rFlagsReg.
11946 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
11947 %{
11948   match(Set cr (CmpUL op1 op2));
11949 
11950   format %{ "cmpq    $op1, $op2\t# unsigned" %}
11951   opcode(0x3B);  /* Opcode 3B /r */
11952   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11953   ins_pipe(ialu_cr_reg_reg);
11954 %}
11955 
11956 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
11957 %{
11958   match(Set cr (CmpUL op1 op2));
11959 
11960   format %{ "cmpq    $op1, $op2\t# unsigned" %}
11961   opcode(0x81, 0x07); /* Opcode 81 /7 */
11962   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11963   ins_pipe(ialu_cr_reg_imm);
11964 %}
11965 
11966 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
11967 %{
11968   match(Set cr (CmpUL op1 (LoadL op2)));
11969 
11970   format %{ "cmpq    $op1, $op2\t# unsigned" %}
11971   opcode(0x3B); /* Opcode 3B /r */
11972   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11973   ins_pipe(ialu_cr_reg_mem);
11974 %}
11975 
11976 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
11977 %{
11978   match(Set cr (CmpUL src zero));
11979 
11980   format %{ "testq   $src, $src\t# unsigned" %}
11981   opcode(0x85);
11982   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11983   ins_pipe(ialu_cr_reg_imm);
11984 %}
11985 
11986 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
11987 %{
11988   match(Set cr (CmpI (LoadB mem) imm));
11989 
11990   ins_cost(125);
11991   format %{ "cmpb    $mem, $imm" %}
11992   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
11993   ins_pipe(ialu_cr_reg_mem);
11994 %}
11995 
11996 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU8 imm, immI0 zero)
11997 %{
11998   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
11999 
12000   ins_cost(125);
12001   format %{ "testb   $mem, $imm\t# ubyte" %}
12002   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12003   ins_pipe(ialu_cr_reg_mem);
12004 %}
12005 
12006 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero)
12007 %{
12008   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
12009 
12010   ins_cost(125);
12011   format %{ "testb   $mem, $imm\t# byte" %}
12012   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12013   ins_pipe(ialu_cr_reg_mem);
12014 %}
12015 
12016 //----------Max and Min--------------------------------------------------------
12017 // Min Instructions
12018 
12019 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12020 %{
12021   effect(USE_DEF dst, USE src, USE cr);
12022 
12023   format %{ "cmovlgt $dst, $src\t# min" %}
12024   opcode(0x0F, 0x4F);
12025   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12026   ins_pipe(pipe_cmov_reg);
12027 %}
12028 
12029 
12030 instruct minI_rReg(rRegI dst, rRegI src)
12031 %{
12032   match(Set dst (MinI dst src));
12033 
12034   ins_cost(200);
12035   expand %{
12036     rFlagsReg cr;
12037     compI_rReg(cr, dst, src);
12038     cmovI_reg_g(dst, src, cr);
12039   %}
12040 %}
12041 
12042 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12043 %{
12044   effect(USE_DEF dst, USE src, USE cr);
12045 
12046   format %{ "cmovllt $dst, $src\t# max" %}
12047   opcode(0x0F, 0x4C);
12048   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12049   ins_pipe(pipe_cmov_reg);
12050 %}
12051 
12052 
12053 instruct maxI_rReg(rRegI dst, rRegI src)
12054 %{
12055   match(Set dst (MaxI dst src));
12056 
12057   ins_cost(200);
12058   expand %{
12059     rFlagsReg cr;
12060     compI_rReg(cr, dst, src);
12061     cmovI_reg_l(dst, src, cr);
12062   %}
12063 %}
12064 
12065 // ============================================================================
12066 // Branch Instructions
12067 
12068 // Jump Direct - Label defines a relative address from JMP+1
12069 instruct jmpDir(label labl)
12070 %{
12071   match(Goto);
12072   effect(USE labl);
12073 
12074   ins_cost(300);
12075   format %{ "jmp     $labl" %}
12076   size(5);
12077   ins_encode %{
12078     Label* L = $labl$$label;
12079     __ jmp(*L, false); // Always long jump
12080   %}
12081   ins_pipe(pipe_jmp);
12082 %}
12083 
12084 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12085 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12086 %{
12087   match(If cop cr);
12088   effect(USE labl);
12089 
12090   ins_cost(300);
12091   format %{ "j$cop     $labl" %}
12092   size(6);
12093   ins_encode %{
12094     Label* L = $labl$$label;
12095     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12096   %}
12097   ins_pipe(pipe_jcc);
12098 %}
12099 
12100 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12101 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12102 %{
12103   predicate(!n->has_vector_mask_set());
12104   match(CountedLoopEnd cop cr);
12105   effect(USE labl);
12106 
12107   ins_cost(300);
12108   format %{ "j$cop     $labl\t# loop end" %}
12109   size(6);
12110   ins_encode %{
12111     Label* L = $labl$$label;
12112     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12113   %}
12114   ins_pipe(pipe_jcc);
12115 %}
12116 
12117 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12118 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12119   predicate(!n->has_vector_mask_set());
12120   match(CountedLoopEnd cop cmp);
12121   effect(USE labl);
12122 
12123   ins_cost(300);
12124   format %{ "j$cop,u   $labl\t# loop end" %}
12125   size(6);
12126   ins_encode %{
12127     Label* L = $labl$$label;
12128     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12129   %}
12130   ins_pipe(pipe_jcc);
12131 %}
12132 
12133 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12134   predicate(!n->has_vector_mask_set());
12135   match(CountedLoopEnd cop cmp);
12136   effect(USE labl);
12137 
12138   ins_cost(200);
12139   format %{ "j$cop,u   $labl\t# loop end" %}
12140   size(6);
12141   ins_encode %{
12142     Label* L = $labl$$label;
12143     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12144   %}
12145   ins_pipe(pipe_jcc);
12146 %}
12147 
12148 // mask version
12149 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12150 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
12151 %{
12152   predicate(n->has_vector_mask_set());
12153   match(CountedLoopEnd cop cr);
12154   effect(USE labl);
12155 
12156   ins_cost(400);
12157   format %{ "j$cop     $labl\t# loop end\n\t"
12158             "restorevectmask \t# vector mask restore for loops" %}
12159   size(10);
12160   ins_encode %{
12161     Label* L = $labl$$label;
12162     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12163     __ restorevectmask();
12164   %}
12165   ins_pipe(pipe_jcc);
12166 %}
12167 
12168 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12169 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12170   predicate(n->has_vector_mask_set());
12171   match(CountedLoopEnd cop cmp);
12172   effect(USE labl);
12173 
12174   ins_cost(400);
12175   format %{ "j$cop,u   $labl\t# loop end\n\t"
12176             "restorevectmask \t# vector mask restore for loops" %}
12177   size(10);
12178   ins_encode %{
12179     Label* L = $labl$$label;
12180     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12181     __ restorevectmask();
12182   %}
12183   ins_pipe(pipe_jcc);
12184 %}
12185 
12186 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12187   predicate(n->has_vector_mask_set());
12188   match(CountedLoopEnd cop cmp);
12189   effect(USE labl);
12190 
12191   ins_cost(300);
12192   format %{ "j$cop,u   $labl\t# loop end\n\t"
12193             "restorevectmask \t# vector mask restore for loops" %}
12194   size(10);
12195   ins_encode %{
12196     Label* L = $labl$$label;
12197     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12198     __ restorevectmask();
12199   %}
12200   ins_pipe(pipe_jcc);
12201 %}
12202 
12203 // Jump Direct Conditional - using unsigned comparison
12204 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12205   match(If cop cmp);
12206   effect(USE labl);
12207 
12208   ins_cost(300);
12209   format %{ "j$cop,u  $labl" %}
12210   size(6);
12211   ins_encode %{
12212     Label* L = $labl$$label;
12213     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12214   %}
12215   ins_pipe(pipe_jcc);
12216 %}
12217 
12218 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12219   match(If cop cmp);
12220   effect(USE labl);
12221 
12222   ins_cost(200);
12223   format %{ "j$cop,u  $labl" %}
12224   size(6);
12225   ins_encode %{
12226     Label* L = $labl$$label;
12227     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12228   %}
12229   ins_pipe(pipe_jcc);
12230 %}
12231 
12232 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12233   match(If cop cmp);
12234   effect(USE labl);
12235 
12236   ins_cost(200);
12237   format %{ $$template
12238     if ($cop$$cmpcode == Assembler::notEqual) {
12239       $$emit$$"jp,u   $labl\n\t"
12240       $$emit$$"j$cop,u   $labl"
12241     } else {
12242       $$emit$$"jp,u   done\n\t"
12243       $$emit$$"j$cop,u   $labl\n\t"
12244       $$emit$$"done:"
12245     }
12246   %}
12247   ins_encode %{
12248     Label* l = $labl$$label;
12249     if ($cop$$cmpcode == Assembler::notEqual) {
12250       __ jcc(Assembler::parity, *l, false);
12251       __ jcc(Assembler::notEqual, *l, false);
12252     } else if ($cop$$cmpcode == Assembler::equal) {
12253       Label done;
12254       __ jccb(Assembler::parity, done);
12255       __ jcc(Assembler::equal, *l, false);
12256       __ bind(done);
12257     } else {
12258        ShouldNotReachHere();
12259     }
12260   %}
12261   ins_pipe(pipe_jcc);
12262 %}
12263 
12264 // ============================================================================
12265 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12266 // superklass array for an instance of the superklass.  Set a hidden
12267 // internal cache on a hit (cache is checked with exposed code in
12268 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12269 // encoding ALSO sets flags.
12270 
12271 instruct partialSubtypeCheck(rdi_RegP result,
12272                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12273                              rFlagsReg cr)
12274 %{
12275   match(Set result (PartialSubtypeCheck sub super));
12276   effect(KILL rcx, KILL cr);
12277 
12278   ins_cost(1100);  // slightly larger than the next version
12279   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12280             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12281             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12282             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12283             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12284             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12285             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12286     "miss:\t" %}
12287 
12288   opcode(0x1); // Force a XOR of RDI
12289   ins_encode(enc_PartialSubtypeCheck());
12290   ins_pipe(pipe_slow);
12291 %}
12292 
12293 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12294                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12295                                      immP0 zero,
12296                                      rdi_RegP result)
12297 %{
12298   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12299   effect(KILL rcx, KILL result);
12300 
12301   ins_cost(1000);
12302   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12303             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12304             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12305             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12306             "jne,s   miss\t\t# Missed: flags nz\n\t"
12307             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12308     "miss:\t" %}
12309 
12310   opcode(0x0); // No need to XOR RDI
12311   ins_encode(enc_PartialSubtypeCheck());
12312   ins_pipe(pipe_slow);
12313 %}
12314 
12315 // ============================================================================
12316 // Branch Instructions -- short offset versions
12317 //
12318 // These instructions are used to replace jumps of a long offset (the default
12319 // match) with jumps of a shorter offset.  These instructions are all tagged
12320 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12321 // match rules in general matching.  Instead, the ADLC generates a conversion
12322 // method in the MachNode which can be used to do in-place replacement of the
12323 // long variant with the shorter variant.  The compiler will determine if a
12324 // branch can be taken by the is_short_branch_offset() predicate in the machine
12325 // specific code section of the file.
12326 
12327 // Jump Direct - Label defines a relative address from JMP+1
12328 instruct jmpDir_short(label labl) %{
12329   match(Goto);
12330   effect(USE labl);
12331 
12332   ins_cost(300);
12333   format %{ "jmp,s   $labl" %}
12334   size(2);
12335   ins_encode %{
12336     Label* L = $labl$$label;
12337     __ jmpb(*L);
12338   %}
12339   ins_pipe(pipe_jmp);
12340   ins_short_branch(1);
12341 %}
12342 
12343 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12344 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12345   match(If cop cr);
12346   effect(USE labl);
12347 
12348   ins_cost(300);
12349   format %{ "j$cop,s   $labl" %}
12350   size(2);
12351   ins_encode %{
12352     Label* L = $labl$$label;
12353     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12354   %}
12355   ins_pipe(pipe_jcc);
12356   ins_short_branch(1);
12357 %}
12358 
12359 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12360 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12361   match(CountedLoopEnd cop cr);
12362   effect(USE labl);
12363 
12364   ins_cost(300);
12365   format %{ "j$cop,s   $labl\t# loop end" %}
12366   size(2);
12367   ins_encode %{
12368     Label* L = $labl$$label;
12369     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12370   %}
12371   ins_pipe(pipe_jcc);
12372   ins_short_branch(1);
12373 %}
12374 
12375 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12376 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12377   match(CountedLoopEnd cop cmp);
12378   effect(USE labl);
12379 
12380   ins_cost(300);
12381   format %{ "j$cop,us  $labl\t# loop end" %}
12382   size(2);
12383   ins_encode %{
12384     Label* L = $labl$$label;
12385     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12386   %}
12387   ins_pipe(pipe_jcc);
12388   ins_short_branch(1);
12389 %}
12390 
12391 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12392   match(CountedLoopEnd cop cmp);
12393   effect(USE labl);
12394 
12395   ins_cost(300);
12396   format %{ "j$cop,us  $labl\t# loop end" %}
12397   size(2);
12398   ins_encode %{
12399     Label* L = $labl$$label;
12400     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12401   %}
12402   ins_pipe(pipe_jcc);
12403   ins_short_branch(1);
12404 %}
12405 
12406 // Jump Direct Conditional - using unsigned comparison
12407 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12408   match(If cop cmp);
12409   effect(USE labl);
12410 
12411   ins_cost(300);
12412   format %{ "j$cop,us  $labl" %}
12413   size(2);
12414   ins_encode %{
12415     Label* L = $labl$$label;
12416     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12417   %}
12418   ins_pipe(pipe_jcc);
12419   ins_short_branch(1);
12420 %}
12421 
12422 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12423   match(If cop cmp);
12424   effect(USE labl);
12425 
12426   ins_cost(300);
12427   format %{ "j$cop,us  $labl" %}
12428   size(2);
12429   ins_encode %{
12430     Label* L = $labl$$label;
12431     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12432   %}
12433   ins_pipe(pipe_jcc);
12434   ins_short_branch(1);
12435 %}
12436 
12437 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12438   match(If cop cmp);
12439   effect(USE labl);
12440 
12441   ins_cost(300);
12442   format %{ $$template
12443     if ($cop$$cmpcode == Assembler::notEqual) {
12444       $$emit$$"jp,u,s   $labl\n\t"
12445       $$emit$$"j$cop,u,s   $labl"
12446     } else {
12447       $$emit$$"jp,u,s   done\n\t"
12448       $$emit$$"j$cop,u,s  $labl\n\t"
12449       $$emit$$"done:"
12450     }
12451   %}
12452   size(4);
12453   ins_encode %{
12454     Label* l = $labl$$label;
12455     if ($cop$$cmpcode == Assembler::notEqual) {
12456       __ jccb(Assembler::parity, *l);
12457       __ jccb(Assembler::notEqual, *l);
12458     } else if ($cop$$cmpcode == Assembler::equal) {
12459       Label done;
12460       __ jccb(Assembler::parity, done);
12461       __ jccb(Assembler::equal, *l);
12462       __ bind(done);
12463     } else {
12464        ShouldNotReachHere();
12465     }
12466   %}
12467   ins_pipe(pipe_jcc);
12468   ins_short_branch(1);
12469 %}
12470 
12471 // ============================================================================
12472 // inlined locking and unlocking
12473 
12474 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
12475   predicate(Compile::current()->use_rtm());
12476   match(Set cr (FastLock object box));
12477   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12478   ins_cost(300);
12479   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12480   ins_encode %{
12481     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12482                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12483                  _counters, _rtm_counters, _stack_rtm_counters,
12484                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12485                  true, ra_->C->profile_rtm());
12486   %}
12487   ins_pipe(pipe_slow);
12488 %}
12489 
12490 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
12491   predicate(!Compile::current()->use_rtm());
12492   match(Set cr (FastLock object box));
12493   effect(TEMP tmp, TEMP scr, USE_KILL box);
12494   ins_cost(300);
12495   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
12496   ins_encode %{
12497     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12498                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12499   %}
12500   ins_pipe(pipe_slow);
12501 %}
12502 
12503 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
12504   match(Set cr (FastUnlock object box));
12505   effect(TEMP tmp, USE_KILL box);
12506   ins_cost(300);
12507   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
12508   ins_encode %{
12509     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12510   %}
12511   ins_pipe(pipe_slow);
12512 %}
12513 
12514 
12515 // ============================================================================
12516 // Safepoint Instructions
12517 instruct safePoint_poll(rFlagsReg cr)
12518 %{
12519   predicate(!Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
12520   match(SafePoint);
12521   effect(KILL cr);
12522 
12523   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12524             "# Safepoint: poll for GC" %}
12525   ins_cost(125);
12526   ins_encode %{
12527     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12528     __ testl(rax, addr);
12529   %}
12530   ins_pipe(ialu_reg_mem);
12531 %}
12532 
12533 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12534 %{
12535   predicate(Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
12536   match(SafePoint poll);
12537   effect(KILL cr, USE poll);
12538 
12539   format %{ "testl  rax, [$poll]\t"
12540             "# Safepoint: poll for GC" %}
12541   ins_cost(125);
12542   ins_encode %{
12543     __ relocate(relocInfo::poll_type);
12544     __ testl(rax, Address($poll$$Register, 0));
12545   %}
12546   ins_pipe(ialu_reg_mem);
12547 %}
12548 
12549 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
12550 %{
12551   predicate(SafepointMechanism::uses_thread_local_poll());
12552   match(SafePoint poll);
12553   effect(KILL cr, USE poll);
12554 
12555   format %{ "testl  rax, [$poll]\t"
12556             "# Safepoint: poll for GC" %}
12557   ins_cost(125);
12558   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
12559   ins_encode %{
12560     __ relocate(relocInfo::poll_type);
12561     address pre_pc = __ pc();
12562     __ testl(rax, Address($poll$$Register, 0));
12563     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
12564   %}
12565   ins_pipe(ialu_reg_mem);
12566 %}
12567 
12568 // ============================================================================
12569 // Procedure Call/Return Instructions
12570 // Call Java Static Instruction
12571 // Note: If this code changes, the corresponding ret_addr_offset() and
12572 //       compute_padding() functions will have to be adjusted.
12573 instruct CallStaticJavaDirect(method meth) %{
12574   match(CallStaticJava);
12575   effect(USE meth);
12576 
12577   ins_cost(300);
12578   format %{ "call,static " %}
12579   opcode(0xE8); /* E8 cd */
12580   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
12581   ins_pipe(pipe_slow);
12582   ins_alignment(4);
12583 %}
12584 
12585 // Call Java Dynamic Instruction
12586 // Note: If this code changes, the corresponding ret_addr_offset() and
12587 //       compute_padding() functions will have to be adjusted.
12588 instruct CallDynamicJavaDirect(method meth)
12589 %{
12590   match(CallDynamicJava);
12591   effect(USE meth);
12592 
12593   ins_cost(300);
12594   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12595             "call,dynamic " %}
12596   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
12597   ins_pipe(pipe_slow);
12598   ins_alignment(4);
12599 %}
12600 
12601 // Call Runtime Instruction
12602 instruct CallRuntimeDirect(method meth)
12603 %{
12604   match(CallRuntime);
12605   effect(USE meth);
12606 
12607   ins_cost(300);
12608   format %{ "call,runtime " %}
12609   ins_encode(clear_avx, Java_To_Runtime(meth));
12610   ins_pipe(pipe_slow);
12611 %}
12612 
12613 // Call runtime without safepoint
12614 instruct CallLeafDirect(method meth)
12615 %{
12616   match(CallLeaf);
12617   effect(USE meth);
12618 
12619   ins_cost(300);
12620   format %{ "call_leaf,runtime " %}
12621   ins_encode(clear_avx, Java_To_Runtime(meth));
12622   ins_pipe(pipe_slow);
12623 %}
12624 
12625 // Call runtime without safepoint
12626 instruct CallLeafNoFPDirect(method meth)
12627 %{
12628   match(CallLeafNoFP);
12629   effect(USE meth);
12630 
12631   ins_cost(300);
12632   format %{ "call_leaf_nofp,runtime " %}
12633   ins_encode(clear_avx, Java_To_Runtime(meth));
12634   ins_pipe(pipe_slow);
12635 %}
12636 
12637 // Return Instruction
12638 // Remove the return address & jump to it.
12639 // Notice: We always emit a nop after a ret to make sure there is room
12640 // for safepoint patching
12641 instruct Ret()
12642 %{
12643   match(Return);
12644 
12645   format %{ "ret" %}
12646   opcode(0xC3);
12647   ins_encode(OpcP);
12648   ins_pipe(pipe_jmp);
12649 %}
12650 
12651 // Tail Call; Jump from runtime stub to Java code.
12652 // Also known as an 'interprocedural jump'.
12653 // Target of jump will eventually return to caller.
12654 // TailJump below removes the return address.
12655 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12656 %{
12657   match(TailCall jump_target method_oop);
12658 
12659   ins_cost(300);
12660   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12661   opcode(0xFF, 0x4); /* Opcode FF /4 */
12662   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12663   ins_pipe(pipe_jmp);
12664 %}
12665 
12666 // Tail Jump; remove the return address; jump to target.
12667 // TailCall above leaves the return address around.
12668 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12669 %{
12670   match(TailJump jump_target ex_oop);
12671 
12672   ins_cost(300);
12673   format %{ "popq    rdx\t# pop return address\n\t"
12674             "jmp     $jump_target" %}
12675   opcode(0xFF, 0x4); /* Opcode FF /4 */
12676   ins_encode(Opcode(0x5a), // popq rdx
12677              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12678   ins_pipe(pipe_jmp);
12679 %}
12680 
12681 // Create exception oop: created by stack-crawling runtime code.
12682 // Created exception is now available to this handler, and is setup
12683 // just prior to jumping to this handler.  No code emitted.
12684 instruct CreateException(rax_RegP ex_oop)
12685 %{
12686   match(Set ex_oop (CreateEx));
12687 
12688   size(0);
12689   // use the following format syntax
12690   format %{ "# exception oop is in rax; no code emitted" %}
12691   ins_encode();
12692   ins_pipe(empty);
12693 %}
12694 
12695 // Rethrow exception:
12696 // The exception oop will come in the first argument position.
12697 // Then JUMP (not call) to the rethrow stub code.
12698 instruct RethrowException()
12699 %{
12700   match(Rethrow);
12701 
12702   // use the following format syntax
12703   format %{ "jmp     rethrow_stub" %}
12704   ins_encode(enc_rethrow);
12705   ins_pipe(pipe_jmp);
12706 %}
12707 
12708 // ============================================================================
12709 // This name is KNOWN by the ADLC and cannot be changed.
12710 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12711 // for this guy.
12712 instruct tlsLoadP(r15_RegP dst) %{
12713   match(Set dst (ThreadLocal));
12714   effect(DEF dst);
12715 
12716   size(0);
12717   format %{ "# TLS is in R15" %}
12718   ins_encode( /*empty encoding*/ );
12719   ins_pipe(ialu_reg_reg);
12720 %}
12721 
12722 
12723 //----------PEEPHOLE RULES-----------------------------------------------------
12724 // These must follow all instruction definitions as they use the names
12725 // defined in the instructions definitions.
12726 //
12727 // peepmatch ( root_instr_name [preceding_instruction]* );
12728 //
12729 // peepconstraint %{
12730 // (instruction_number.operand_name relational_op instruction_number.operand_name
12731 //  [, ...] );
12732 // // instruction numbers are zero-based using left to right order in peepmatch
12733 //
12734 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12735 // // provide an instruction_number.operand_name for each operand that appears
12736 // // in the replacement instruction's match rule
12737 //
12738 // ---------VM FLAGS---------------------------------------------------------
12739 //
12740 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12741 //
12742 // Each peephole rule is given an identifying number starting with zero and
12743 // increasing by one in the order seen by the parser.  An individual peephole
12744 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12745 // on the command-line.
12746 //
12747 // ---------CURRENT LIMITATIONS----------------------------------------------
12748 //
12749 // Only match adjacent instructions in same basic block
12750 // Only equality constraints
12751 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12752 // Only one replacement instruction
12753 //
12754 // ---------EXAMPLE----------------------------------------------------------
12755 //
12756 // // pertinent parts of existing instructions in architecture description
12757 // instruct movI(rRegI dst, rRegI src)
12758 // %{
12759 //   match(Set dst (CopyI src));
12760 // %}
12761 //
12762 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12763 // %{
12764 //   match(Set dst (AddI dst src));
12765 //   effect(KILL cr);
12766 // %}
12767 //
12768 // // Change (inc mov) to lea
12769 // peephole %{
12770 //   // increment preceeded by register-register move
12771 //   peepmatch ( incI_rReg movI );
12772 //   // require that the destination register of the increment
12773 //   // match the destination register of the move
12774 //   peepconstraint ( 0.dst == 1.dst );
12775 //   // construct a replacement instruction that sets
12776 //   // the destination to ( move's source register + one )
12777 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12778 // %}
12779 //
12780 
12781 // Implementation no longer uses movX instructions since
12782 // machine-independent system no longer uses CopyX nodes.
12783 //
12784 // peephole
12785 // %{
12786 //   peepmatch (incI_rReg movI);
12787 //   peepconstraint (0.dst == 1.dst);
12788 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12789 // %}
12790 
12791 // peephole
12792 // %{
12793 //   peepmatch (decI_rReg movI);
12794 //   peepconstraint (0.dst == 1.dst);
12795 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12796 // %}
12797 
12798 // peephole
12799 // %{
12800 //   peepmatch (addI_rReg_imm movI);
12801 //   peepconstraint (0.dst == 1.dst);
12802 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
12803 // %}
12804 
12805 // peephole
12806 // %{
12807 //   peepmatch (incL_rReg movL);
12808 //   peepconstraint (0.dst == 1.dst);
12809 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12810 // %}
12811 
12812 // peephole
12813 // %{
12814 //   peepmatch (decL_rReg movL);
12815 //   peepconstraint (0.dst == 1.dst);
12816 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12817 // %}
12818 
12819 // peephole
12820 // %{
12821 //   peepmatch (addL_rReg_imm movL);
12822 //   peepconstraint (0.dst == 1.dst);
12823 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
12824 // %}
12825 
12826 // peephole
12827 // %{
12828 //   peepmatch (addP_rReg_imm movP);
12829 //   peepconstraint (0.dst == 1.dst);
12830 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
12831 // %}
12832 
12833 // // Change load of spilled value to only a spill
12834 // instruct storeI(memory mem, rRegI src)
12835 // %{
12836 //   match(Set mem (StoreI mem src));
12837 // %}
12838 //
12839 // instruct loadI(rRegI dst, memory mem)
12840 // %{
12841 //   match(Set dst (LoadI mem));
12842 // %}
12843 //
12844 
12845 peephole
12846 %{
12847   peepmatch (loadI storeI);
12848   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12849   peepreplace (storeI(1.mem 1.mem 1.src));
12850 %}
12851 
12852 peephole
12853 %{
12854   peepmatch (loadL storeL);
12855   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
12856   peepreplace (storeL(1.mem 1.mem 1.src));
12857 %}
12858 
12859 //----------SMARTSPILL RULES---------------------------------------------------
12860 // These must follow all instruction definitions as they use the names
12861 // defined in the instructions definitions.