1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Empty register class.
 170 reg_class no_reg();
 171 
 172 // Class for all pointer registers (including RSP and RBP)
 173 reg_class any_reg_with_rbp(RAX, RAX_H,
 174                            RDX, RDX_H,
 175                            RBP, RBP_H,
 176                            RDI, RDI_H,
 177                            RSI, RSI_H,
 178                            RCX, RCX_H,
 179                            RBX, RBX_H,
 180                            RSP, RSP_H,
 181                            R8,  R8_H,
 182                            R9,  R9_H,
 183                            R10, R10_H,
 184                            R11, R11_H,
 185                            R12, R12_H,
 186                            R13, R13_H,
 187                            R14, R14_H,
 188                            R15, R15_H);
 189 
 190 // Class for all pointer registers (including RSP, but excluding RBP)
 191 reg_class any_reg_no_rbp(RAX, RAX_H,
 192                          RDX, RDX_H,
 193                          RDI, RDI_H,
 194                          RSI, RSI_H,
 195                          RCX, RCX_H,
 196                          RBX, RBX_H,
 197                          RSP, RSP_H,
 198                          R8,  R8_H,
 199                          R9,  R9_H,
 200                          R10, R10_H,
 201                          R11, R11_H,
 202                          R12, R12_H,
 203                          R13, R13_H,
 204                          R14, R14_H,
 205                          R15, R15_H);
 206 
 207 // Dynamic register class that selects at runtime between register classes
 208 // any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer).
 209 // Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp;
 210 reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %});
 211 
 212 // Class for all pointer registers (excluding RSP)
 213 reg_class ptr_reg_with_rbp(RAX, RAX_H,
 214                            RDX, RDX_H,
 215                            RBP, RBP_H,
 216                            RDI, RDI_H,
 217                            RSI, RSI_H,
 218                            RCX, RCX_H,
 219                            RBX, RBX_H,
 220                            R8,  R8_H,
 221                            R9,  R9_H,
 222                            R10, R10_H,
 223                            R11, R11_H,
 224                            R13, R13_H,
 225                            R14, R14_H);
 226 
 227 // Class for all pointer registers (excluding RSP and RBP)
 228 reg_class ptr_reg_no_rbp(RAX, RAX_H,
 229                          RDX, RDX_H,
 230                          RDI, RDI_H,
 231                          RSI, RSI_H,
 232                          RCX, RCX_H,
 233                          RBX, RBX_H,
 234                          R8,  R8_H,
 235                          R9,  R9_H,
 236                          R10, R10_H,
 237                          R11, R11_H,
 238                          R13, R13_H,
 239                          R14, R14_H);
 240 
 241 // Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp.
 242 reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %});
 243 
 244 // Class for all pointer registers (excluding RAX and RSP)
 245 reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H,
 246                                   RBP, RBP_H,
 247                                   RDI, RDI_H,
 248                                   RSI, RSI_H,
 249                                   RCX, RCX_H,
 250                                   RBX, RBX_H,
 251                                   R8,  R8_H,
 252                                   R9,  R9_H,
 253                                   R10, R10_H,
 254                                   R11, R11_H,
 255                                   R13, R13_H,
 256                                   R14, R14_H);
 257 
 258 // Class for all pointer registers (excluding RAX, RSP, and RBP)
 259 reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H,
 260                                 RDI, RDI_H,
 261                                 RSI, RSI_H,
 262                                 RCX, RCX_H,
 263                                 RBX, RBX_H,
 264                                 R8,  R8_H,
 265                                 R9,  R9_H,
 266                                 R10, R10_H,
 267                                 R11, R11_H,
 268                                 R13, R13_H,
 269                                 R14, R14_H);
 270 
 271 // Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp.
 272 reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %});
 273 
 274 // Class for all pointer registers (excluding RAX, RBX, and RSP)
 275 reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H,
 276                                       RBP, RBP_H,
 277                                       RDI, RDI_H,
 278                                       RSI, RSI_H,
 279                                       RCX, RCX_H,
 280                                       R8,  R8_H,
 281                                       R9,  R9_H,
 282                                       R10, R10_H,
 283                                       R11, R11_H,
 284                                       R13, R13_H,
 285                                       R14, R14_H);
 286 
 287 // Class for all pointer registers (excluding RAX, RBX, RSP, and RBP)
 288 reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H,
 289                                     RDI, RDI_H,
 290                                     RSI, RSI_H,
 291                                     RCX, RCX_H,
 292                                     R8,  R8_H,
 293                                     R9,  R9_H,
 294                                     R10, R10_H,
 295                                     R11, R11_H,
 296                                     R13, R13_H,
 297                                     R14, R14_H);
 298 
 299 // Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp.
 300 reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
 301 
 302 // Singleton class for RAX pointer register
 303 reg_class ptr_rax_reg(RAX, RAX_H);
 304 
 305 // Singleton class for RBX pointer register
 306 reg_class ptr_rbx_reg(RBX, RBX_H);
 307 
 308 // Singleton class for RSI pointer register
 309 reg_class ptr_rsi_reg(RSI, RSI_H);
 310 
 311 // Singleton class for RDI pointer register
 312 reg_class ptr_rdi_reg(RDI, RDI_H);
 313 
 314 // Singleton class for stack pointer
 315 reg_class ptr_rsp_reg(RSP, RSP_H);
 316 
 317 // Singleton class for TLS pointer
 318 reg_class ptr_r15_reg(R15, R15_H);
 319 
 320 // Class for all long registers (excluding RSP)
 321 reg_class long_reg_with_rbp(RAX, RAX_H,
 322                             RDX, RDX_H,
 323                             RBP, RBP_H,
 324                             RDI, RDI_H,
 325                             RSI, RSI_H,
 326                             RCX, RCX_H,
 327                             RBX, RBX_H,
 328                             R8,  R8_H,
 329                             R9,  R9_H,
 330                             R10, R10_H,
 331                             R11, R11_H,
 332                             R13, R13_H,
 333                             R14, R14_H);
 334 
 335 // Class for all long registers (excluding RSP and RBP)
 336 reg_class long_reg_no_rbp(RAX, RAX_H,
 337                           RDX, RDX_H,
 338                           RDI, RDI_H,
 339                           RSI, RSI_H,
 340                           RCX, RCX_H,
 341                           RBX, RBX_H,
 342                           R8,  R8_H,
 343                           R9,  R9_H,
 344                           R10, R10_H,
 345                           R11, R11_H,
 346                           R13, R13_H,
 347                           R14, R14_H);
 348 
 349 // Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp.
 350 reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %});
 351 
 352 // Class for all long registers (excluding RAX, RDX and RSP)
 353 reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H,
 354                                        RDI, RDI_H,
 355                                        RSI, RSI_H,
 356                                        RCX, RCX_H,
 357                                        RBX, RBX_H,
 358                                        R8,  R8_H,
 359                                        R9,  R9_H,
 360                                        R10, R10_H,
 361                                        R11, R11_H,
 362                                        R13, R13_H,
 363                                        R14, R14_H);
 364 
 365 // Class for all long registers (excluding RAX, RDX, RSP, and RBP)
 366 reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H,
 367                                      RSI, RSI_H,
 368                                      RCX, RCX_H,
 369                                      RBX, RBX_H,
 370                                      R8,  R8_H,
 371                                      R9,  R9_H,
 372                                      R10, R10_H,
 373                                      R11, R11_H,
 374                                      R13, R13_H,
 375                                      R14, R14_H);
 376 
 377 // Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp.
 378 reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 379 
 380 // Class for all long registers (excluding RCX and RSP)
 381 reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H,
 382                                    RDI, RDI_H,
 383                                    RSI, RSI_H,
 384                                    RAX, RAX_H,
 385                                    RDX, RDX_H,
 386                                    RBX, RBX_H,
 387                                    R8,  R8_H,
 388                                    R9,  R9_H,
 389                                    R10, R10_H,
 390                                    R11, R11_H,
 391                                    R13, R13_H,
 392                                    R14, R14_H);
 393 
 394 // Class for all long registers (excluding RCX, RSP, and RBP)
 395 reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H,
 396                                  RSI, RSI_H,
 397                                  RAX, RAX_H,
 398                                  RDX, RDX_H,
 399                                  RBX, RBX_H,
 400                                  R8,  R8_H,
 401                                  R9,  R9_H,
 402                                  R10, R10_H,
 403                                  R11, R11_H,
 404                                  R13, R13_H,
 405                                  R14, R14_H);
 406 
 407 // Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp.
 408 reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
 409 
 410 // Singleton class for RAX long register
 411 reg_class long_rax_reg(RAX, RAX_H);
 412 
 413 // Singleton class for RCX long register
 414 reg_class long_rcx_reg(RCX, RCX_H);
 415 
 416 // Singleton class for RDX long register
 417 reg_class long_rdx_reg(RDX, RDX_H);
 418 
 419 // Class for all int registers (excluding RSP)
 420 reg_class int_reg_with_rbp(RAX,
 421                            RDX,
 422                            RBP,
 423                            RDI,
 424                            RSI,
 425                            RCX,
 426                            RBX,
 427                            R8,
 428                            R9,
 429                            R10,
 430                            R11,
 431                            R13,
 432                            R14);
 433 
 434 // Class for all int registers (excluding RSP and RBP)
 435 reg_class int_reg_no_rbp(RAX,
 436                          RDX,
 437                          RDI,
 438                          RSI,
 439                          RCX,
 440                          RBX,
 441                          R8,
 442                          R9,
 443                          R10,
 444                          R11,
 445                          R13,
 446                          R14);
 447 
 448 // Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp.
 449 reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %});
 450 
 451 // Class for all int registers (excluding RCX and RSP)
 452 reg_class int_no_rcx_reg_with_rbp(RAX,
 453                                   RDX,
 454                                   RBP,
 455                                   RDI,
 456                                   RSI,
 457                                   RBX,
 458                                   R8,
 459                                   R9,
 460                                   R10,
 461                                   R11,
 462                                   R13,
 463                                   R14);
 464 
 465 // Class for all int registers (excluding RCX, RSP, and RBP)
 466 reg_class int_no_rcx_reg_no_rbp(RAX,
 467                                 RDX,
 468                                 RDI,
 469                                 RSI,
 470                                 RBX,
 471                                 R8,
 472                                 R9,
 473                                 R10,
 474                                 R11,
 475                                 R13,
 476                                 R14);
 477 
 478 // Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp.
 479 reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
 480 
 481 // Class for all int registers (excluding RAX, RDX, and RSP)
 482 reg_class int_no_rax_rdx_reg_with_rbp(RBP,
 483                                       RDI,
 484                                       RSI,
 485                                       RCX,
 486                                       RBX,
 487                                       R8,
 488                                       R9,
 489                                       R10,
 490                                       R11,
 491                                       R13,
 492                                       R14);
 493 
 494 // Class for all int registers (excluding RAX, RDX, RSP, and RBP)
 495 reg_class int_no_rax_rdx_reg_no_rbp(RDI,
 496                                     RSI,
 497                                     RCX,
 498                                     RBX,
 499                                     R8,
 500                                     R9,
 501                                     R10,
 502                                     R11,
 503                                     R13,
 504                                     R14);
 505 
 506 // Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp.
 507 reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 508 
 509 // Singleton class for RAX int register
 510 reg_class int_rax_reg(RAX);
 511 
 512 // Singleton class for RBX int register
 513 reg_class int_rbx_reg(RBX);
 514 
 515 // Singleton class for RCX int register
 516 reg_class int_rcx_reg(RCX);
 517 
 518 // Singleton class for RCX int register
 519 reg_class int_rdx_reg(RDX);
 520 
 521 // Singleton class for RCX int register
 522 reg_class int_rdi_reg(RDI);
 523 
 524 // Singleton class for instruction pointer
 525 // reg_class ip_reg(RIP);
 526 
 527 %}
 528 
 529 source_hpp %{
 530 #if INCLUDE_ZGC
 531 #include "gc/z/zBarrierSetAssembler.hpp"
 532 #endif
 533 %}
 534 
 535 //----------SOURCE BLOCK-------------------------------------------------------
 536 // This is a block of C++ code which provides values, functions, and
 537 // definitions necessary in the rest of the architecture description
 538 source %{
 539 #define   RELOC_IMM64    Assembler::imm_operand
 540 #define   RELOC_DISP32   Assembler::disp32_operand
 541 
 542 #define __ _masm.
 543 
 544 static bool generate_vzeroupper(Compile* C) {
 545   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 546 }
 547 
 548 static int clear_avx_size() {
 549   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 550 }
 551 
 552 // !!!!! Special hack to get all types of calls to specify the byte offset
 553 //       from the start of the call to the point where the return address
 554 //       will point.
 555 int MachCallStaticJavaNode::ret_addr_offset()
 556 {
 557   int offset = 5; // 5 bytes from start of call to where return address points
 558   offset += clear_avx_size();
 559   return offset;
 560 }
 561 
 562 int MachCallDynamicJavaNode::ret_addr_offset()
 563 {
 564   int offset = 15; // 15 bytes from start of call to where return address points
 565   offset += clear_avx_size();
 566   return offset;
 567 }
 568 
 569 int MachCallRuntimeNode::ret_addr_offset() {
 570   int offset = 13; // movq r10,#addr; callq (r10)
 571   offset += clear_avx_size();
 572   return offset;
 573 }
 574 
 575 // Indicate if the safepoint node needs the polling page as an input,
 576 // it does if the polling page is more than disp32 away.
 577 bool SafePointNode::needs_polling_address_input()
 578 {
 579   return SafepointMechanism::uses_thread_local_poll() || Assembler::is_polling_page_far();
 580 }
 581 
 582 //
 583 // Compute padding required for nodes which need alignment
 584 //
 585 
 586 // The address of the call instruction needs to be 4-byte aligned to
 587 // ensure that it does not span a cache line so that it can be patched.
 588 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 589 {
 590   current_offset += clear_avx_size(); // skip vzeroupper
 591   current_offset += 1; // skip call opcode byte
 592   return align_up(current_offset, alignment_required()) - current_offset;
 593 }
 594 
 595 // The address of the call instruction needs to be 4-byte aligned to
 596 // ensure that it does not span a cache line so that it can be patched.
 597 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 598 {
 599   current_offset += clear_avx_size(); // skip vzeroupper
 600   current_offset += 11; // skip movq instruction + call opcode byte
 601   return align_up(current_offset, alignment_required()) - current_offset;
 602 }
 603 
 604 // EMIT_RM()
 605 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 606   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 607   cbuf.insts()->emit_int8(c);
 608 }
 609 
 610 // EMIT_CC()
 611 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 612   unsigned char c = (unsigned char) (f1 | f2);
 613   cbuf.insts()->emit_int8(c);
 614 }
 615 
 616 // EMIT_OPCODE()
 617 void emit_opcode(CodeBuffer &cbuf, int code) {
 618   cbuf.insts()->emit_int8((unsigned char) code);
 619 }
 620 
 621 // EMIT_OPCODE() w/ relocation information
 622 void emit_opcode(CodeBuffer &cbuf,
 623                  int code, relocInfo::relocType reloc, int offset, int format)
 624 {
 625   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 626   emit_opcode(cbuf, code);
 627 }
 628 
 629 // EMIT_D8()
 630 void emit_d8(CodeBuffer &cbuf, int d8) {
 631   cbuf.insts()->emit_int8((unsigned char) d8);
 632 }
 633 
 634 // EMIT_D16()
 635 void emit_d16(CodeBuffer &cbuf, int d16) {
 636   cbuf.insts()->emit_int16(d16);
 637 }
 638 
 639 // EMIT_D32()
 640 void emit_d32(CodeBuffer &cbuf, int d32) {
 641   cbuf.insts()->emit_int32(d32);
 642 }
 643 
 644 // EMIT_D64()
 645 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 646   cbuf.insts()->emit_int64(d64);
 647 }
 648 
 649 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 650 void emit_d32_reloc(CodeBuffer& cbuf,
 651                     int d32,
 652                     relocInfo::relocType reloc,
 653                     int format)
 654 {
 655   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 656   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 657   cbuf.insts()->emit_int32(d32);
 658 }
 659 
 660 // emit 32 bit value and construct relocation entry from RelocationHolder
 661 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 662 #ifdef ASSERT
 663   if (rspec.reloc()->type() == relocInfo::oop_type &&
 664       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 665     assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
 666     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop((intptr_t)d32))), "cannot embed scavengable oops in code");
 667   }
 668 #endif
 669   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 670   cbuf.insts()->emit_int32(d32);
 671 }
 672 
 673 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 674   address next_ip = cbuf.insts_end() + 4;
 675   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 676                  external_word_Relocation::spec(addr),
 677                  RELOC_DISP32);
 678 }
 679 
 680 
 681 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 682 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 683   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 684   cbuf.insts()->emit_int64(d64);
 685 }
 686 
 687 // emit 64 bit value and construct relocation entry from RelocationHolder
 688 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 689 #ifdef ASSERT
 690   if (rspec.reloc()->type() == relocInfo::oop_type &&
 691       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 692     assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
 693     assert(oopDesc::is_oop(cast_to_oop(d64)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d64))),
 694            "cannot embed scavengable oops in code");
 695   }
 696 #endif
 697   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 698   cbuf.insts()->emit_int64(d64);
 699 }
 700 
 701 // Access stack slot for load or store
 702 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 703 {
 704   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 705   if (-0x80 <= disp && disp < 0x80) {
 706     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 707     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 708     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 709   } else {
 710     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 711     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 712     emit_d32(cbuf, disp);     // Displacement // R/M byte
 713   }
 714 }
 715 
 716    // rRegI ereg, memory mem) %{    // emit_reg_mem
 717 void encode_RegMem(CodeBuffer &cbuf,
 718                    int reg,
 719                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 720 {
 721   assert(disp_reloc == relocInfo::none, "cannot have disp");
 722   int regenc = reg & 7;
 723   int baseenc = base & 7;
 724   int indexenc = index & 7;
 725 
 726   // There is no index & no scale, use form without SIB byte
 727   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 728     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 729     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 730       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 731     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 732       // If 8-bit displacement, mode 0x1
 733       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 734       emit_d8(cbuf, disp);
 735     } else {
 736       // If 32-bit displacement
 737       if (base == -1) { // Special flag for absolute address
 738         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 739         if (disp_reloc != relocInfo::none) {
 740           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 741         } else {
 742           emit_d32(cbuf, disp);
 743         }
 744       } else {
 745         // Normal base + offset
 746         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 747         if (disp_reloc != relocInfo::none) {
 748           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 749         } else {
 750           emit_d32(cbuf, disp);
 751         }
 752       }
 753     }
 754   } else {
 755     // Else, encode with the SIB byte
 756     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 757     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 758       // If no displacement
 759       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 760       emit_rm(cbuf, scale, indexenc, baseenc);
 761     } else {
 762       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 763         // If 8-bit displacement, mode 0x1
 764         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 765         emit_rm(cbuf, scale, indexenc, baseenc);
 766         emit_d8(cbuf, disp);
 767       } else {
 768         // If 32-bit displacement
 769         if (base == 0x04 ) {
 770           emit_rm(cbuf, 0x2, regenc, 0x4);
 771           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 772         } else {
 773           emit_rm(cbuf, 0x2, regenc, 0x4);
 774           emit_rm(cbuf, scale, indexenc, baseenc); // *
 775         }
 776         if (disp_reloc != relocInfo::none) {
 777           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 778         } else {
 779           emit_d32(cbuf, disp);
 780         }
 781       }
 782     }
 783   }
 784 }
 785 
 786 // This could be in MacroAssembler but it's fairly C2 specific
 787 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 788   Label exit;
 789   __ jccb(Assembler::noParity, exit);
 790   __ pushf();
 791   //
 792   // comiss/ucomiss instructions set ZF,PF,CF flags and
 793   // zero OF,AF,SF for NaN values.
 794   // Fixup flags by zeroing ZF,PF so that compare of NaN
 795   // values returns 'less than' result (CF is set).
 796   // Leave the rest of flags unchanged.
 797   //
 798   //    7 6 5 4 3 2 1 0
 799   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 800   //    0 0 1 0 1 0 1 1   (0x2B)
 801   //
 802   __ andq(Address(rsp, 0), 0xffffff2b);
 803   __ popf();
 804   __ bind(exit);
 805 }
 806 
 807 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 808   Label done;
 809   __ movl(dst, -1);
 810   __ jcc(Assembler::parity, done);
 811   __ jcc(Assembler::below, done);
 812   __ setb(Assembler::notEqual, dst);
 813   __ movzbl(dst, dst);
 814   __ bind(done);
 815 }
 816 
 817 
 818 //=============================================================================
 819 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 820 
 821 int Compile::ConstantTable::calculate_table_base_offset() const {
 822   return 0;  // absolute addressing, no offset
 823 }
 824 
 825 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 826 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 827   ShouldNotReachHere();
 828 }
 829 
 830 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 831   // Empty encoding
 832 }
 833 
 834 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 835   return 0;
 836 }
 837 
 838 #ifndef PRODUCT
 839 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 840   st->print("# MachConstantBaseNode (empty encoding)");
 841 }
 842 #endif
 843 
 844 
 845 //=============================================================================
 846 #ifndef PRODUCT
 847 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 848   Compile* C = ra_->C;
 849 
 850   int framesize = C->frame_size_in_bytes();
 851   int bangsize = C->bang_size_in_bytes();
 852   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 853   // Remove wordSize for return addr which is already pushed.
 854   framesize -= wordSize;
 855 
 856   if (C->need_stack_bang(bangsize)) {
 857     framesize -= wordSize;
 858     st->print("# stack bang (%d bytes)", bangsize);
 859     st->print("\n\t");
 860     st->print("pushq   rbp\t# Save rbp");
 861     if (PreserveFramePointer) {
 862         st->print("\n\t");
 863         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 864     }
 865     if (framesize) {
 866       st->print("\n\t");
 867       st->print("subq    rsp, #%d\t# Create frame",framesize);
 868     }
 869   } else {
 870     st->print("subq    rsp, #%d\t# Create frame",framesize);
 871     st->print("\n\t");
 872     framesize -= wordSize;
 873     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 874     if (PreserveFramePointer) {
 875       st->print("\n\t");
 876       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 877       if (framesize > 0) {
 878         st->print("\n\t");
 879         st->print("addq    rbp, #%d", framesize);
 880       }
 881     }
 882   }
 883 
 884   if (VerifyStackAtCalls) {
 885     st->print("\n\t");
 886     framesize -= wordSize;
 887     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 888 #ifdef ASSERT
 889     st->print("\n\t");
 890     st->print("# stack alignment check");
 891 #endif
 892   }
 893   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
 894     st->print("\n\t");
 895     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
 896     st->print("\n\t");
 897     st->print("je      fast_entry\t");
 898     st->print("\n\t");
 899     st->print("call    #nmethod_entry_barrier_stub\t");
 900     st->print("\n\tfast_entry:");
 901   }
 902   st->cr();
 903 }
 904 #endif
 905 
 906 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 907   Compile* C = ra_->C;
 908   MacroAssembler _masm(&cbuf);
 909 
 910   __ verified_entry(C);
 911   __ bind(*_verified_entry);
 912 
 913   C->set_frame_complete(cbuf.insts_size());
 914 
 915   if (C->has_mach_constant_base_node()) {
 916     // NOTE: We set the table base offset here because users might be
 917     // emitted before MachConstantBaseNode.
 918     Compile::ConstantTable& constant_table = C->constant_table();
 919     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 920   }
 921 }
 922 
 923 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 924 {
 925   return MachNode::size(ra_); // too many variables; just compute it
 926                               // the hard way
 927 }
 928 
 929 int MachPrologNode::reloc() const
 930 {
 931   return 0; // a large enough number
 932 }
 933 
 934 //=============================================================================
 935 #ifndef PRODUCT
 936 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 937 {
 938   Compile* C = ra_->C;
 939   if (generate_vzeroupper(C)) {
 940     st->print("vzeroupper");
 941     st->cr(); st->print("\t");
 942   }
 943 
 944   int framesize = C->frame_size_in_bytes();
 945   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 946   // Remove word for return adr already pushed
 947   // and RBP
 948   framesize -= 2*wordSize;
 949 
 950   if (framesize) {
 951     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 952     st->print("\t");
 953   }
 954 
 955   st->print_cr("popq   rbp");
 956   if (do_polling() && C->is_method_compilation()) {
 957     st->print("\t");
 958     if (SafepointMechanism::uses_thread_local_poll()) {
 959       st->print_cr("movq   rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
 960                    "testl  rax, [rscratch1]\t"
 961                    "# Safepoint: poll for GC");
 962     } else if (Assembler::is_polling_page_far()) {
 963       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 964                    "testl  rax, [rscratch1]\t"
 965                    "# Safepoint: poll for GC");
 966     } else {
 967       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 968                    "# Safepoint: poll for GC");
 969     }
 970   }
 971 }
 972 #endif
 973 
 974 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 975 {
 976   Compile* C = ra_->C;
 977   MacroAssembler _masm(&cbuf);
 978 
 979   if (generate_vzeroupper(C)) {
 980     // Clear upper bits of YMM registers when current compiled code uses
 981     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 982     __ vzeroupper();
 983   }
 984 
 985   __ restore_stack(C);
 986 
 987 
 988   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 989     __ reserved_stack_check();
 990   }
 991 
 992   if (do_polling() && C->is_method_compilation()) {
 993     MacroAssembler _masm(&cbuf);
 994     if (SafepointMechanism::uses_thread_local_poll()) {
 995       __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
 996       __ relocate(relocInfo::poll_return_type);
 997       __ testl(rax, Address(rscratch1, 0));
 998     } else {
 999       AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1000       if (Assembler::is_polling_page_far()) {
1001         __ lea(rscratch1, polling_page);
1002         __ relocate(relocInfo::poll_return_type);
1003         __ testl(rax, Address(rscratch1, 0));
1004       } else {
1005         __ testl(rax, polling_page);
1006       }
1007     }
1008   }
1009 }
1010 
1011 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1012 {
1013   return MachNode::size(ra_); // too many variables; just compute it
1014                               // the hard way
1015 }
1016 
1017 int MachEpilogNode::reloc() const
1018 {
1019   return 2; // a large enough number
1020 }
1021 
1022 const Pipeline* MachEpilogNode::pipeline() const
1023 {
1024   return MachNode::pipeline_class();
1025 }
1026 
1027 int MachEpilogNode::safepoint_offset() const
1028 {
1029   return 0;
1030 }
1031 
1032 //=============================================================================
1033 
1034 enum RC {
1035   rc_bad,
1036   rc_int,
1037   rc_float,
1038   rc_stack
1039 };
1040 
1041 static enum RC rc_class(OptoReg::Name reg)
1042 {
1043   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1044 
1045   if (OptoReg::is_stack(reg)) return rc_stack;
1046 
1047   VMReg r = OptoReg::as_VMReg(reg);
1048 
1049   if (r->is_Register()) return rc_int;
1050 
1051   assert(r->is_XMMRegister(), "must be");
1052   return rc_float;
1053 }
1054 
1055 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
1056 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1057                           int src_hi, int dst_hi, uint ireg, outputStream* st);
1058 
1059 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1060                             int stack_offset, int reg, uint ireg, outputStream* st);
1061 
1062 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
1063                                       int dst_offset, uint ireg, outputStream* st) {
1064   if (cbuf) {
1065     MacroAssembler _masm(cbuf);
1066     switch (ireg) {
1067     case Op_VecS:
1068       __ movq(Address(rsp, -8), rax);
1069       __ movl(rax, Address(rsp, src_offset));
1070       __ movl(Address(rsp, dst_offset), rax);
1071       __ movq(rax, Address(rsp, -8));
1072       break;
1073     case Op_VecD:
1074       __ pushq(Address(rsp, src_offset));
1075       __ popq (Address(rsp, dst_offset));
1076       break;
1077     case Op_VecX:
1078       __ pushq(Address(rsp, src_offset));
1079       __ popq (Address(rsp, dst_offset));
1080       __ pushq(Address(rsp, src_offset+8));
1081       __ popq (Address(rsp, dst_offset+8));
1082       break;
1083     case Op_VecY:
1084       __ vmovdqu(Address(rsp, -32), xmm0);
1085       __ vmovdqu(xmm0, Address(rsp, src_offset));
1086       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1087       __ vmovdqu(xmm0, Address(rsp, -32));
1088       break;
1089     case Op_VecZ:
1090       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1091       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1092       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1093       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1094       break;
1095     default:
1096       ShouldNotReachHere();
1097     }
1098 #ifndef PRODUCT
1099   } else {
1100     switch (ireg) {
1101     case Op_VecS:
1102       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1103                 "movl    rax, [rsp + #%d]\n\t"
1104                 "movl    [rsp + #%d], rax\n\t"
1105                 "movq    rax, [rsp - #8]",
1106                 src_offset, dst_offset);
1107       break;
1108     case Op_VecD:
1109       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1110                 "popq    [rsp + #%d]",
1111                 src_offset, dst_offset);
1112       break;
1113      case Op_VecX:
1114       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
1115                 "popq    [rsp + #%d]\n\t"
1116                 "pushq   [rsp + #%d]\n\t"
1117                 "popq    [rsp + #%d]",
1118                 src_offset, dst_offset, src_offset+8, dst_offset+8);
1119       break;
1120     case Op_VecY:
1121       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1122                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1123                 "vmovdqu [rsp + #%d], xmm0\n\t"
1124                 "vmovdqu xmm0, [rsp - #32]",
1125                 src_offset, dst_offset);
1126       break;
1127     case Op_VecZ:
1128       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1129                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1130                 "vmovdqu [rsp + #%d], xmm0\n\t"
1131                 "vmovdqu xmm0, [rsp - #64]",
1132                 src_offset, dst_offset);
1133       break;
1134     default:
1135       ShouldNotReachHere();
1136     }
1137 #endif
1138   }
1139 }
1140 
1141 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1142                                        PhaseRegAlloc* ra_,
1143                                        bool do_size,
1144                                        outputStream* st) const {
1145   assert(cbuf != NULL || st  != NULL, "sanity");
1146   // Get registers to move
1147   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1148   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1149   OptoReg::Name dst_second = ra_->get_reg_second(this);
1150   OptoReg::Name dst_first = ra_->get_reg_first(this);
1151 
1152   enum RC src_second_rc = rc_class(src_second);
1153   enum RC src_first_rc = rc_class(src_first);
1154   enum RC dst_second_rc = rc_class(dst_second);
1155   enum RC dst_first_rc = rc_class(dst_first);
1156 
1157   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1158          "must move at least 1 register" );
1159 
1160   if (src_first == dst_first && src_second == dst_second) {
1161     // Self copy, no move
1162     return 0;
1163   }
1164   if (bottom_type()->isa_vect() != NULL) {
1165     uint ireg = ideal_reg();
1166     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1167     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1168     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1169       // mem -> mem
1170       int src_offset = ra_->reg2offset(src_first);
1171       int dst_offset = ra_->reg2offset(dst_first);
1172       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1173     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1174       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1175     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1176       int stack_offset = ra_->reg2offset(dst_first);
1177       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1178     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1179       int stack_offset = ra_->reg2offset(src_first);
1180       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1181     } else {
1182       ShouldNotReachHere();
1183     }
1184     return 0;
1185   }
1186   if (src_first_rc == rc_stack) {
1187     // mem ->
1188     if (dst_first_rc == rc_stack) {
1189       // mem -> mem
1190       assert(src_second != dst_first, "overlap");
1191       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1192           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1193         // 64-bit
1194         int src_offset = ra_->reg2offset(src_first);
1195         int dst_offset = ra_->reg2offset(dst_first);
1196         if (cbuf) {
1197           MacroAssembler _masm(cbuf);
1198           __ pushq(Address(rsp, src_offset));
1199           __ popq (Address(rsp, dst_offset));
1200 #ifndef PRODUCT
1201         } else {
1202           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1203                     "popq    [rsp + #%d]",
1204                      src_offset, dst_offset);
1205 #endif
1206         }
1207       } else {
1208         // 32-bit
1209         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1210         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1211         // No pushl/popl, so:
1212         int src_offset = ra_->reg2offset(src_first);
1213         int dst_offset = ra_->reg2offset(dst_first);
1214         if (cbuf) {
1215           MacroAssembler _masm(cbuf);
1216           __ movq(Address(rsp, -8), rax);
1217           __ movl(rax, Address(rsp, src_offset));
1218           __ movl(Address(rsp, dst_offset), rax);
1219           __ movq(rax, Address(rsp, -8));
1220 #ifndef PRODUCT
1221         } else {
1222           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1223                     "movl    rax, [rsp + #%d]\n\t"
1224                     "movl    [rsp + #%d], rax\n\t"
1225                     "movq    rax, [rsp - #8]",
1226                      src_offset, dst_offset);
1227 #endif
1228         }
1229       }
1230       return 0;
1231     } else if (dst_first_rc == rc_int) {
1232       // mem -> gpr
1233       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1234           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1235         // 64-bit
1236         int offset = ra_->reg2offset(src_first);
1237         if (cbuf) {
1238           MacroAssembler _masm(cbuf);
1239           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1240 #ifndef PRODUCT
1241         } else {
1242           st->print("movq    %s, [rsp + #%d]\t# spill",
1243                      Matcher::regName[dst_first],
1244                      offset);
1245 #endif
1246         }
1247       } else {
1248         // 32-bit
1249         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1250         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1251         int offset = ra_->reg2offset(src_first);
1252         if (cbuf) {
1253           MacroAssembler _masm(cbuf);
1254           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1255 #ifndef PRODUCT
1256         } else {
1257           st->print("movl    %s, [rsp + #%d]\t# spill",
1258                      Matcher::regName[dst_first],
1259                      offset);
1260 #endif
1261         }
1262       }
1263       return 0;
1264     } else if (dst_first_rc == rc_float) {
1265       // mem-> xmm
1266       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1267           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1268         // 64-bit
1269         int offset = ra_->reg2offset(src_first);
1270         if (cbuf) {
1271           MacroAssembler _masm(cbuf);
1272           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1273 #ifndef PRODUCT
1274         } else {
1275           st->print("%s  %s, [rsp + #%d]\t# spill",
1276                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1277                      Matcher::regName[dst_first],
1278                      offset);
1279 #endif
1280         }
1281       } else {
1282         // 32-bit
1283         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1284         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1285         int offset = ra_->reg2offset(src_first);
1286         if (cbuf) {
1287           MacroAssembler _masm(cbuf);
1288           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1289 #ifndef PRODUCT
1290         } else {
1291           st->print("movss   %s, [rsp + #%d]\t# spill",
1292                      Matcher::regName[dst_first],
1293                      offset);
1294 #endif
1295         }
1296       }
1297       return 0;
1298     }
1299   } else if (src_first_rc == rc_int) {
1300     // gpr ->
1301     if (dst_first_rc == rc_stack) {
1302       // gpr -> mem
1303       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1304           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1305         // 64-bit
1306         int offset = ra_->reg2offset(dst_first);
1307         if (cbuf) {
1308           MacroAssembler _masm(cbuf);
1309           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1310 #ifndef PRODUCT
1311         } else {
1312           st->print("movq    [rsp + #%d], %s\t# spill",
1313                      offset,
1314                      Matcher::regName[src_first]);
1315 #endif
1316         }
1317       } else {
1318         // 32-bit
1319         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1320         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1321         int offset = ra_->reg2offset(dst_first);
1322         if (cbuf) {
1323           MacroAssembler _masm(cbuf);
1324           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1325 #ifndef PRODUCT
1326         } else {
1327           st->print("movl    [rsp + #%d], %s\t# spill",
1328                      offset,
1329                      Matcher::regName[src_first]);
1330 #endif
1331         }
1332       }
1333       return 0;
1334     } else if (dst_first_rc == rc_int) {
1335       // gpr -> gpr
1336       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1337           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1338         // 64-bit
1339         if (cbuf) {
1340           MacroAssembler _masm(cbuf);
1341           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1342                   as_Register(Matcher::_regEncode[src_first]));
1343 #ifndef PRODUCT
1344         } else {
1345           st->print("movq    %s, %s\t# spill",
1346                      Matcher::regName[dst_first],
1347                      Matcher::regName[src_first]);
1348 #endif
1349         }
1350         return 0;
1351       } else {
1352         // 32-bit
1353         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1354         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1355         if (cbuf) {
1356           MacroAssembler _masm(cbuf);
1357           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1358                   as_Register(Matcher::_regEncode[src_first]));
1359 #ifndef PRODUCT
1360         } else {
1361           st->print("movl    %s, %s\t# spill",
1362                      Matcher::regName[dst_first],
1363                      Matcher::regName[src_first]);
1364 #endif
1365         }
1366         return 0;
1367       }
1368     } else if (dst_first_rc == rc_float) {
1369       // gpr -> xmm
1370       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1371           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1372         // 64-bit
1373         if (cbuf) {
1374           MacroAssembler _masm(cbuf);
1375           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1376 #ifndef PRODUCT
1377         } else {
1378           st->print("movdq   %s, %s\t# spill",
1379                      Matcher::regName[dst_first],
1380                      Matcher::regName[src_first]);
1381 #endif
1382         }
1383       } else {
1384         // 32-bit
1385         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1386         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1387         if (cbuf) {
1388           MacroAssembler _masm(cbuf);
1389           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1390 #ifndef PRODUCT
1391         } else {
1392           st->print("movdl   %s, %s\t# spill",
1393                      Matcher::regName[dst_first],
1394                      Matcher::regName[src_first]);
1395 #endif
1396         }
1397       }
1398       return 0;
1399     }
1400   } else if (src_first_rc == rc_float) {
1401     // xmm ->
1402     if (dst_first_rc == rc_stack) {
1403       // xmm -> mem
1404       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1405           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1406         // 64-bit
1407         int offset = ra_->reg2offset(dst_first);
1408         if (cbuf) {
1409           MacroAssembler _masm(cbuf);
1410           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1411 #ifndef PRODUCT
1412         } else {
1413           st->print("movsd   [rsp + #%d], %s\t# spill",
1414                      offset,
1415                      Matcher::regName[src_first]);
1416 #endif
1417         }
1418       } else {
1419         // 32-bit
1420         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1421         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1422         int offset = ra_->reg2offset(dst_first);
1423         if (cbuf) {
1424           MacroAssembler _masm(cbuf);
1425           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1426 #ifndef PRODUCT
1427         } else {
1428           st->print("movss   [rsp + #%d], %s\t# spill",
1429                      offset,
1430                      Matcher::regName[src_first]);
1431 #endif
1432         }
1433       }
1434       return 0;
1435     } else if (dst_first_rc == rc_int) {
1436       // xmm -> gpr
1437       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1438           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1439         // 64-bit
1440         if (cbuf) {
1441           MacroAssembler _masm(cbuf);
1442           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1443 #ifndef PRODUCT
1444         } else {
1445           st->print("movdq   %s, %s\t# spill",
1446                      Matcher::regName[dst_first],
1447                      Matcher::regName[src_first]);
1448 #endif
1449         }
1450       } else {
1451         // 32-bit
1452         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1453         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1454         if (cbuf) {
1455           MacroAssembler _masm(cbuf);
1456           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1457 #ifndef PRODUCT
1458         } else {
1459           st->print("movdl   %s, %s\t# spill",
1460                      Matcher::regName[dst_first],
1461                      Matcher::regName[src_first]);
1462 #endif
1463         }
1464       }
1465       return 0;
1466     } else if (dst_first_rc == rc_float) {
1467       // xmm -> xmm
1468       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1469           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1470         // 64-bit
1471         if (cbuf) {
1472           MacroAssembler _masm(cbuf);
1473           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1474 #ifndef PRODUCT
1475         } else {
1476           st->print("%s  %s, %s\t# spill",
1477                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1478                      Matcher::regName[dst_first],
1479                      Matcher::regName[src_first]);
1480 #endif
1481         }
1482       } else {
1483         // 32-bit
1484         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1485         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1486         if (cbuf) {
1487           MacroAssembler _masm(cbuf);
1488           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1489 #ifndef PRODUCT
1490         } else {
1491           st->print("%s  %s, %s\t# spill",
1492                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1493                      Matcher::regName[dst_first],
1494                      Matcher::regName[src_first]);
1495 #endif
1496         }
1497       }
1498       return 0;
1499     }
1500   }
1501 
1502   assert(0," foo ");
1503   Unimplemented();
1504   return 0;
1505 }
1506 
1507 #ifndef PRODUCT
1508 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1509   implementation(NULL, ra_, false, st);
1510 }
1511 #endif
1512 
1513 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1514   implementation(&cbuf, ra_, false, NULL);
1515 }
1516 
1517 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1518   return MachNode::size(ra_);
1519 }
1520 
1521 //=============================================================================
1522 #ifndef PRODUCT
1523 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1524 {
1525   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1526   int reg = ra_->get_reg_first(this);
1527   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1528             Matcher::regName[reg], offset);
1529 }
1530 #endif
1531 
1532 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1533 {
1534   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1535   int reg = ra_->get_encode(this);
1536   if (offset >= 0x80) {
1537     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1538     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1539     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1540     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1541     emit_d32(cbuf, offset);
1542   } else {
1543     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1544     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1545     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1546     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1547     emit_d8(cbuf, offset);
1548   }
1549 }
1550 
1551 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1552 {
1553   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1554   return (offset < 0x80) ? 5 : 8; // REX
1555 }
1556 
1557 //=============================================================================
1558 #ifndef PRODUCT
1559 void MachVVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1560 {
1561   st->print_cr("MachVVEPNode");
1562 }
1563 #endif
1564 
1565 void MachVVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1566 {
1567   // Unpack all value type args passed as oop and then jump to
1568   // the verified entry point (skipping the unverified entry).
1569   MacroAssembler masm(&cbuf);
1570   masm.unpack_value_args(ra_->C);
1571   masm.jmp(*_verified_entry);
1572 }
1573 
1574 uint MachVVEPNode::size(PhaseRegAlloc* ra_) const
1575 {
1576   return MachNode::size(ra_); // too many variables; just compute it the hard way
1577 }
1578 
1579 //=============================================================================
1580 #ifndef PRODUCT
1581 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1582 {
1583   if (UseCompressedClassPointers) {
1584     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1585     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1586     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1587   } else {
1588     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1589                  "# Inline cache check");
1590   }
1591   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1592   st->print_cr("\tnop\t# nops to align entry point");
1593 }
1594 #endif
1595 
1596 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1597 {
1598   MacroAssembler masm(&cbuf);
1599   uint insts_size = cbuf.insts_size();
1600   if (UseCompressedClassPointers) {
1601     masm.load_klass(rscratch1, j_rarg0);
1602     masm.cmpptr(rax, rscratch1);
1603   } else {
1604     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1605   }
1606 
1607   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1608 
1609   /* WARNING these NOPs are critical so that verified entry point is properly
1610      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1611   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1612   if (OptoBreakpoint) {
1613     // Leave space for int3
1614     nops_cnt -= 1;
1615   }
1616   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1617   if (nops_cnt > 0)
1618     masm.nop(nops_cnt);
1619 }
1620 
1621 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1622 {
1623   return MachNode::size(ra_); // too many variables; just compute it
1624                               // the hard way
1625 }
1626 
1627 
1628 //=============================================================================
1629 
1630 int Matcher::regnum_to_fpu_offset(int regnum)
1631 {
1632   return regnum - 32; // The FP registers are in the second chunk
1633 }
1634 
1635 // This is UltraSparc specific, true just means we have fast l2f conversion
1636 const bool Matcher::convL2FSupported(void) {
1637   return true;
1638 }
1639 
1640 // Is this branch offset short enough that a short branch can be used?
1641 //
1642 // NOTE: If the platform does not provide any short branch variants, then
1643 //       this method should return false for offset 0.
1644 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1645   // The passed offset is relative to address of the branch.
1646   // On 86 a branch displacement is calculated relative to address
1647   // of a next instruction.
1648   offset -= br_size;
1649 
1650   // the short version of jmpConUCF2 contains multiple branches,
1651   // making the reach slightly less
1652   if (rule == jmpConUCF2_rule)
1653     return (-126 <= offset && offset <= 125);
1654   return (-128 <= offset && offset <= 127);
1655 }
1656 
1657 const bool Matcher::isSimpleConstant64(jlong value) {
1658   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1659   //return value == (int) value;  // Cf. storeImmL and immL32.
1660 
1661   // Probably always true, even if a temp register is required.
1662   return true;
1663 }
1664 
1665 // The ecx parameter to rep stosq for the ClearArray node is in words.
1666 const bool Matcher::init_array_count_is_in_bytes = false;
1667 
1668 // No additional cost for CMOVL.
1669 const int Matcher::long_cmove_cost() { return 0; }
1670 
1671 // No CMOVF/CMOVD with SSE2
1672 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1673 
1674 // Does the CPU require late expand (see block.cpp for description of late expand)?
1675 const bool Matcher::require_postalloc_expand = false;
1676 
1677 // Do we need to mask the count passed to shift instructions or does
1678 // the cpu only look at the lower 5/6 bits anyway?
1679 const bool Matcher::need_masked_shift_count = false;
1680 
1681 bool Matcher::narrow_oop_use_complex_address() {
1682   assert(UseCompressedOops, "only for compressed oops code");
1683   return (LogMinObjAlignmentInBytes <= 3);
1684 }
1685 
1686 bool Matcher::narrow_klass_use_complex_address() {
1687   assert(UseCompressedClassPointers, "only for compressed klass code");
1688   return (LogKlassAlignmentInBytes <= 3);
1689 }
1690 
1691 bool Matcher::const_oop_prefer_decode() {
1692   // Prefer ConN+DecodeN over ConP.
1693   return true;
1694 }
1695 
1696 bool Matcher::const_klass_prefer_decode() {
1697   // TODO: Either support matching DecodeNKlass (heap-based) in operand
1698   //       or condisider the following:
1699   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
1700   //return Universe::narrow_klass_base() == NULL;
1701   return true;
1702 }
1703 
1704 // Is it better to copy float constants, or load them directly from
1705 // memory?  Intel can load a float constant from a direct address,
1706 // requiring no extra registers.  Most RISCs will have to materialize
1707 // an address into a register first, so they would do better to copy
1708 // the constant from stack.
1709 const bool Matcher::rematerialize_float_constants = true; // XXX
1710 
1711 // If CPU can load and store mis-aligned doubles directly then no
1712 // fixup is needed.  Else we split the double into 2 integer pieces
1713 // and move it piece-by-piece.  Only happens when passing doubles into
1714 // C code as the Java calling convention forces doubles to be aligned.
1715 const bool Matcher::misaligned_doubles_ok = true;
1716 
1717 // No-op on amd64
1718 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1719 
1720 // Advertise here if the CPU requires explicit rounding operations to
1721 // implement the UseStrictFP mode.
1722 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1723 
1724 // Are floats conerted to double when stored to stack during deoptimization?
1725 // On x64 it is stored without convertion so we can use normal access.
1726 bool Matcher::float_in_double() { return false; }
1727 
1728 // Do ints take an entire long register or just half?
1729 const bool Matcher::int_in_long = true;
1730 
1731 // Return whether or not this register is ever used as an argument.
1732 // This function is used on startup to build the trampoline stubs in
1733 // generateOptoStub.  Registers not mentioned will be killed by the VM
1734 // call in the trampoline, and arguments in those registers not be
1735 // available to the callee.
1736 bool Matcher::can_be_java_arg(int reg)
1737 {
1738   return
1739     reg ==  RDI_num || reg == RDI_H_num ||
1740     reg ==  RSI_num || reg == RSI_H_num ||
1741     reg ==  RDX_num || reg == RDX_H_num ||
1742     reg ==  RCX_num || reg == RCX_H_num ||
1743     reg ==   R8_num || reg ==  R8_H_num ||
1744     reg ==   R9_num || reg ==  R9_H_num ||
1745     reg ==  R12_num || reg == R12_H_num ||
1746     reg == XMM0_num || reg == XMM0b_num ||
1747     reg == XMM1_num || reg == XMM1b_num ||
1748     reg == XMM2_num || reg == XMM2b_num ||
1749     reg == XMM3_num || reg == XMM3b_num ||
1750     reg == XMM4_num || reg == XMM4b_num ||
1751     reg == XMM5_num || reg == XMM5b_num ||
1752     reg == XMM6_num || reg == XMM6b_num ||
1753     reg == XMM7_num || reg == XMM7b_num;
1754 }
1755 
1756 bool Matcher::is_spillable_arg(int reg)
1757 {
1758   return can_be_java_arg(reg);
1759 }
1760 
1761 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1762   // In 64 bit mode a code which use multiply when
1763   // devisor is constant is faster than hardware
1764   // DIV instruction (it uses MulHiL).
1765   return false;
1766 }
1767 
1768 // Register for DIVI projection of divmodI
1769 RegMask Matcher::divI_proj_mask() {
1770   return INT_RAX_REG_mask();
1771 }
1772 
1773 // Register for MODI projection of divmodI
1774 RegMask Matcher::modI_proj_mask() {
1775   return INT_RDX_REG_mask();
1776 }
1777 
1778 // Register for DIVL projection of divmodL
1779 RegMask Matcher::divL_proj_mask() {
1780   return LONG_RAX_REG_mask();
1781 }
1782 
1783 // Register for MODL projection of divmodL
1784 RegMask Matcher::modL_proj_mask() {
1785   return LONG_RDX_REG_mask();
1786 }
1787 
1788 // Register for saving SP into on method handle invokes. Not used on x86_64.
1789 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1790     return NO_REG_mask();
1791 }
1792 
1793 %}
1794 
1795 //----------ENCODING BLOCK-----------------------------------------------------
1796 // This block specifies the encoding classes used by the compiler to
1797 // output byte streams.  Encoding classes are parameterized macros
1798 // used by Machine Instruction Nodes in order to generate the bit
1799 // encoding of the instruction.  Operands specify their base encoding
1800 // interface with the interface keyword.  There are currently
1801 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1802 // COND_INTER.  REG_INTER causes an operand to generate a function
1803 // which returns its register number when queried.  CONST_INTER causes
1804 // an operand to generate a function which returns the value of the
1805 // constant when queried.  MEMORY_INTER causes an operand to generate
1806 // four functions which return the Base Register, the Index Register,
1807 // the Scale Value, and the Offset Value of the operand when queried.
1808 // COND_INTER causes an operand to generate six functions which return
1809 // the encoding code (ie - encoding bits for the instruction)
1810 // associated with each basic boolean condition for a conditional
1811 // instruction.
1812 //
1813 // Instructions specify two basic values for encoding.  Again, a
1814 // function is available to check if the constant displacement is an
1815 // oop. They use the ins_encode keyword to specify their encoding
1816 // classes (which must be a sequence of enc_class names, and their
1817 // parameters, specified in the encoding block), and they use the
1818 // opcode keyword to specify, in order, their primary, secondary, and
1819 // tertiary opcode.  Only the opcode sections which a particular
1820 // instruction needs for encoding need to be specified.
1821 encode %{
1822   // Build emit functions for each basic byte or larger field in the
1823   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1824   // from C++ code in the enc_class source block.  Emit functions will
1825   // live in the main source block for now.  In future, we can
1826   // generalize this by adding a syntax that specifies the sizes of
1827   // fields in an order, so that the adlc can build the emit functions
1828   // automagically
1829 
1830   // Emit primary opcode
1831   enc_class OpcP
1832   %{
1833     emit_opcode(cbuf, $primary);
1834   %}
1835 
1836   // Emit secondary opcode
1837   enc_class OpcS
1838   %{
1839     emit_opcode(cbuf, $secondary);
1840   %}
1841 
1842   // Emit tertiary opcode
1843   enc_class OpcT
1844   %{
1845     emit_opcode(cbuf, $tertiary);
1846   %}
1847 
1848   // Emit opcode directly
1849   enc_class Opcode(immI d8)
1850   %{
1851     emit_opcode(cbuf, $d8$$constant);
1852   %}
1853 
1854   // Emit size prefix
1855   enc_class SizePrefix
1856   %{
1857     emit_opcode(cbuf, 0x66);
1858   %}
1859 
1860   enc_class reg(rRegI reg)
1861   %{
1862     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1863   %}
1864 
1865   enc_class reg_reg(rRegI dst, rRegI src)
1866   %{
1867     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1868   %}
1869 
1870   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1871   %{
1872     emit_opcode(cbuf, $opcode$$constant);
1873     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1874   %}
1875 
1876   enc_class cdql_enc(no_rax_rdx_RegI div)
1877   %{
1878     // Full implementation of Java idiv and irem; checks for
1879     // special case as described in JVM spec., p.243 & p.271.
1880     //
1881     //         normal case                           special case
1882     //
1883     // input : rax: dividend                         min_int
1884     //         reg: divisor                          -1
1885     //
1886     // output: rax: quotient  (= rax idiv reg)       min_int
1887     //         rdx: remainder (= rax irem reg)       0
1888     //
1889     //  Code sequnce:
1890     //
1891     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1892     //    5:   75 07/08                jne    e <normal>
1893     //    7:   33 d2                   xor    %edx,%edx
1894     //  [div >= 8 -> offset + 1]
1895     //  [REX_B]
1896     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1897     //    c:   74 03/04                je     11 <done>
1898     // 000000000000000e <normal>:
1899     //    e:   99                      cltd
1900     //  [div >= 8 -> offset + 1]
1901     //  [REX_B]
1902     //    f:   f7 f9                   idiv   $div
1903     // 0000000000000011 <done>:
1904 
1905     // cmp    $0x80000000,%eax
1906     emit_opcode(cbuf, 0x3d);
1907     emit_d8(cbuf, 0x00);
1908     emit_d8(cbuf, 0x00);
1909     emit_d8(cbuf, 0x00);
1910     emit_d8(cbuf, 0x80);
1911 
1912     // jne    e <normal>
1913     emit_opcode(cbuf, 0x75);
1914     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1915 
1916     // xor    %edx,%edx
1917     emit_opcode(cbuf, 0x33);
1918     emit_d8(cbuf, 0xD2);
1919 
1920     // cmp    $0xffffffffffffffff,%ecx
1921     if ($div$$reg >= 8) {
1922       emit_opcode(cbuf, Assembler::REX_B);
1923     }
1924     emit_opcode(cbuf, 0x83);
1925     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1926     emit_d8(cbuf, 0xFF);
1927 
1928     // je     11 <done>
1929     emit_opcode(cbuf, 0x74);
1930     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1931 
1932     // <normal>
1933     // cltd
1934     emit_opcode(cbuf, 0x99);
1935 
1936     // idivl (note: must be emitted by the user of this rule)
1937     // <done>
1938   %}
1939 
1940   enc_class cdqq_enc(no_rax_rdx_RegL div)
1941   %{
1942     // Full implementation of Java ldiv and lrem; checks for
1943     // special case as described in JVM spec., p.243 & p.271.
1944     //
1945     //         normal case                           special case
1946     //
1947     // input : rax: dividend                         min_long
1948     //         reg: divisor                          -1
1949     //
1950     // output: rax: quotient  (= rax idiv reg)       min_long
1951     //         rdx: remainder (= rax irem reg)       0
1952     //
1953     //  Code sequnce:
1954     //
1955     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1956     //    7:   00 00 80
1957     //    a:   48 39 d0                cmp    %rdx,%rax
1958     //    d:   75 08                   jne    17 <normal>
1959     //    f:   33 d2                   xor    %edx,%edx
1960     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1961     //   15:   74 05                   je     1c <done>
1962     // 0000000000000017 <normal>:
1963     //   17:   48 99                   cqto
1964     //   19:   48 f7 f9                idiv   $div
1965     // 000000000000001c <done>:
1966 
1967     // mov    $0x8000000000000000,%rdx
1968     emit_opcode(cbuf, Assembler::REX_W);
1969     emit_opcode(cbuf, 0xBA);
1970     emit_d8(cbuf, 0x00);
1971     emit_d8(cbuf, 0x00);
1972     emit_d8(cbuf, 0x00);
1973     emit_d8(cbuf, 0x00);
1974     emit_d8(cbuf, 0x00);
1975     emit_d8(cbuf, 0x00);
1976     emit_d8(cbuf, 0x00);
1977     emit_d8(cbuf, 0x80);
1978 
1979     // cmp    %rdx,%rax
1980     emit_opcode(cbuf, Assembler::REX_W);
1981     emit_opcode(cbuf, 0x39);
1982     emit_d8(cbuf, 0xD0);
1983 
1984     // jne    17 <normal>
1985     emit_opcode(cbuf, 0x75);
1986     emit_d8(cbuf, 0x08);
1987 
1988     // xor    %edx,%edx
1989     emit_opcode(cbuf, 0x33);
1990     emit_d8(cbuf, 0xD2);
1991 
1992     // cmp    $0xffffffffffffffff,$div
1993     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1994     emit_opcode(cbuf, 0x83);
1995     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1996     emit_d8(cbuf, 0xFF);
1997 
1998     // je     1e <done>
1999     emit_opcode(cbuf, 0x74);
2000     emit_d8(cbuf, 0x05);
2001 
2002     // <normal>
2003     // cqto
2004     emit_opcode(cbuf, Assembler::REX_W);
2005     emit_opcode(cbuf, 0x99);
2006 
2007     // idivq (note: must be emitted by the user of this rule)
2008     // <done>
2009   %}
2010 
2011   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2012   enc_class OpcSE(immI imm)
2013   %{
2014     // Emit primary opcode and set sign-extend bit
2015     // Check for 8-bit immediate, and set sign extend bit in opcode
2016     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2017       emit_opcode(cbuf, $primary | 0x02);
2018     } else {
2019       // 32-bit immediate
2020       emit_opcode(cbuf, $primary);
2021     }
2022   %}
2023 
2024   enc_class OpcSErm(rRegI dst, immI imm)
2025   %{
2026     // OpcSEr/m
2027     int dstenc = $dst$$reg;
2028     if (dstenc >= 8) {
2029       emit_opcode(cbuf, Assembler::REX_B);
2030       dstenc -= 8;
2031     }
2032     // Emit primary opcode and set sign-extend bit
2033     // Check for 8-bit immediate, and set sign extend bit in opcode
2034     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2035       emit_opcode(cbuf, $primary | 0x02);
2036     } else {
2037       // 32-bit immediate
2038       emit_opcode(cbuf, $primary);
2039     }
2040     // Emit r/m byte with secondary opcode, after primary opcode.
2041     emit_rm(cbuf, 0x3, $secondary, dstenc);
2042   %}
2043 
2044   enc_class OpcSErm_wide(rRegL dst, immI imm)
2045   %{
2046     // OpcSEr/m
2047     int dstenc = $dst$$reg;
2048     if (dstenc < 8) {
2049       emit_opcode(cbuf, Assembler::REX_W);
2050     } else {
2051       emit_opcode(cbuf, Assembler::REX_WB);
2052       dstenc -= 8;
2053     }
2054     // Emit primary opcode and set sign-extend bit
2055     // Check for 8-bit immediate, and set sign extend bit in opcode
2056     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2057       emit_opcode(cbuf, $primary | 0x02);
2058     } else {
2059       // 32-bit immediate
2060       emit_opcode(cbuf, $primary);
2061     }
2062     // Emit r/m byte with secondary opcode, after primary opcode.
2063     emit_rm(cbuf, 0x3, $secondary, dstenc);
2064   %}
2065 
2066   enc_class Con8or32(immI imm)
2067   %{
2068     // Check for 8-bit immediate, and set sign extend bit in opcode
2069     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2070       $$$emit8$imm$$constant;
2071     } else {
2072       // 32-bit immediate
2073       $$$emit32$imm$$constant;
2074     }
2075   %}
2076 
2077   enc_class opc2_reg(rRegI dst)
2078   %{
2079     // BSWAP
2080     emit_cc(cbuf, $secondary, $dst$$reg);
2081   %}
2082 
2083   enc_class opc3_reg(rRegI dst)
2084   %{
2085     // BSWAP
2086     emit_cc(cbuf, $tertiary, $dst$$reg);
2087   %}
2088 
2089   enc_class reg_opc(rRegI div)
2090   %{
2091     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2092     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2093   %}
2094 
2095   enc_class enc_cmov(cmpOp cop)
2096   %{
2097     // CMOV
2098     $$$emit8$primary;
2099     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2100   %}
2101 
2102   enc_class enc_PartialSubtypeCheck()
2103   %{
2104     Register Rrdi = as_Register(RDI_enc); // result register
2105     Register Rrax = as_Register(RAX_enc); // super class
2106     Register Rrcx = as_Register(RCX_enc); // killed
2107     Register Rrsi = as_Register(RSI_enc); // sub class
2108     Label miss;
2109     const bool set_cond_codes = true;
2110 
2111     MacroAssembler _masm(&cbuf);
2112     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2113                                      NULL, &miss,
2114                                      /*set_cond_codes:*/ true);
2115     if ($primary) {
2116       __ xorptr(Rrdi, Rrdi);
2117     }
2118     __ bind(miss);
2119   %}
2120 
2121   enc_class clear_avx %{
2122     debug_only(int off0 = cbuf.insts_size());
2123     if (generate_vzeroupper(Compile::current())) {
2124       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
2125       // Clear upper bits of YMM registers when current compiled code uses
2126       // wide vectors to avoid AVX <-> SSE transition penalty during call.
2127       MacroAssembler _masm(&cbuf);
2128       __ vzeroupper();
2129     }
2130     debug_only(int off1 = cbuf.insts_size());
2131     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
2132   %}
2133 
2134   enc_class Java_To_Runtime(method meth) %{
2135     // No relocation needed
2136     MacroAssembler _masm(&cbuf);
2137     __ mov64(r10, (int64_t) $meth$$method);
2138     __ call(r10);
2139   %}
2140 
2141   enc_class Java_To_Interpreter(method meth)
2142   %{
2143     // CALL Java_To_Interpreter
2144     // This is the instruction starting address for relocation info.
2145     cbuf.set_insts_mark();
2146     $$$emit8$primary;
2147     // CALL directly to the runtime
2148     emit_d32_reloc(cbuf,
2149                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2150                    runtime_call_Relocation::spec(),
2151                    RELOC_DISP32);
2152   %}
2153 
2154   enc_class Java_Static_Call(method meth)
2155   %{
2156     // JAVA STATIC CALL
2157     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2158     // determine who we intended to call.
2159     cbuf.set_insts_mark();
2160     $$$emit8$primary;
2161 
2162     if (!_method) {
2163       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2164                      runtime_call_Relocation::spec(),
2165                      RELOC_DISP32);
2166     } else {
2167       int method_index = resolved_method_index(cbuf);
2168       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
2169                                                   : static_call_Relocation::spec(method_index);
2170       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2171                      rspec, RELOC_DISP32);
2172       // Emit stubs for static call.
2173       address mark = cbuf.insts_mark();
2174       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
2175       if (stub == NULL) {
2176         ciEnv::current()->record_failure("CodeCache is full");
2177         return;
2178       }
2179 #if INCLUDE_AOT
2180       CompiledStaticCall::emit_to_aot_stub(cbuf, mark);
2181 #endif
2182     }
2183   %}
2184 
2185   enc_class Java_Dynamic_Call(method meth) %{
2186     MacroAssembler _masm(&cbuf);
2187     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
2188   %}
2189 
2190   enc_class Java_Compiled_Call(method meth)
2191   %{
2192     // JAVA COMPILED CALL
2193     int disp = in_bytes(Method:: from_compiled_offset());
2194 
2195     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2196     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2197 
2198     // callq *disp(%rax)
2199     cbuf.set_insts_mark();
2200     $$$emit8$primary;
2201     if (disp < 0x80) {
2202       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2203       emit_d8(cbuf, disp); // Displacement
2204     } else {
2205       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2206       emit_d32(cbuf, disp); // Displacement
2207     }
2208   %}
2209 
2210   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2211   %{
2212     // SAL, SAR, SHR
2213     int dstenc = $dst$$reg;
2214     if (dstenc >= 8) {
2215       emit_opcode(cbuf, Assembler::REX_B);
2216       dstenc -= 8;
2217     }
2218     $$$emit8$primary;
2219     emit_rm(cbuf, 0x3, $secondary, dstenc);
2220     $$$emit8$shift$$constant;
2221   %}
2222 
2223   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2224   %{
2225     // SAL, SAR, SHR
2226     int dstenc = $dst$$reg;
2227     if (dstenc < 8) {
2228       emit_opcode(cbuf, Assembler::REX_W);
2229     } else {
2230       emit_opcode(cbuf, Assembler::REX_WB);
2231       dstenc -= 8;
2232     }
2233     $$$emit8$primary;
2234     emit_rm(cbuf, 0x3, $secondary, dstenc);
2235     $$$emit8$shift$$constant;
2236   %}
2237 
2238   enc_class load_immI(rRegI dst, immI src)
2239   %{
2240     int dstenc = $dst$$reg;
2241     if (dstenc >= 8) {
2242       emit_opcode(cbuf, Assembler::REX_B);
2243       dstenc -= 8;
2244     }
2245     emit_opcode(cbuf, 0xB8 | dstenc);
2246     $$$emit32$src$$constant;
2247   %}
2248 
2249   enc_class load_immL(rRegL dst, immL src)
2250   %{
2251     int dstenc = $dst$$reg;
2252     if (dstenc < 8) {
2253       emit_opcode(cbuf, Assembler::REX_W);
2254     } else {
2255       emit_opcode(cbuf, Assembler::REX_WB);
2256       dstenc -= 8;
2257     }
2258     emit_opcode(cbuf, 0xB8 | dstenc);
2259     emit_d64(cbuf, $src$$constant);
2260   %}
2261 
2262   enc_class load_immUL32(rRegL dst, immUL32 src)
2263   %{
2264     // same as load_immI, but this time we care about zeroes in the high word
2265     int dstenc = $dst$$reg;
2266     if (dstenc >= 8) {
2267       emit_opcode(cbuf, Assembler::REX_B);
2268       dstenc -= 8;
2269     }
2270     emit_opcode(cbuf, 0xB8 | dstenc);
2271     $$$emit32$src$$constant;
2272   %}
2273 
2274   enc_class load_immL32(rRegL dst, immL32 src)
2275   %{
2276     int dstenc = $dst$$reg;
2277     if (dstenc < 8) {
2278       emit_opcode(cbuf, Assembler::REX_W);
2279     } else {
2280       emit_opcode(cbuf, Assembler::REX_WB);
2281       dstenc -= 8;
2282     }
2283     emit_opcode(cbuf, 0xC7);
2284     emit_rm(cbuf, 0x03, 0x00, dstenc);
2285     $$$emit32$src$$constant;
2286   %}
2287 
2288   enc_class load_immP31(rRegP dst, immP32 src)
2289   %{
2290     // same as load_immI, but this time we care about zeroes in the high word
2291     int dstenc = $dst$$reg;
2292     if (dstenc >= 8) {
2293       emit_opcode(cbuf, Assembler::REX_B);
2294       dstenc -= 8;
2295     }
2296     emit_opcode(cbuf, 0xB8 | dstenc);
2297     $$$emit32$src$$constant;
2298   %}
2299 
2300   enc_class load_immP(rRegP dst, immP src)
2301   %{
2302     int dstenc = $dst$$reg;
2303     if (dstenc < 8) {
2304       emit_opcode(cbuf, Assembler::REX_W);
2305     } else {
2306       emit_opcode(cbuf, Assembler::REX_WB);
2307       dstenc -= 8;
2308     }
2309     emit_opcode(cbuf, 0xB8 | dstenc);
2310     // This next line should be generated from ADLC
2311     if ($src->constant_reloc() != relocInfo::none) {
2312       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2313     } else {
2314       emit_d64(cbuf, $src$$constant);
2315     }
2316   %}
2317 
2318   enc_class Con32(immI src)
2319   %{
2320     // Output immediate
2321     $$$emit32$src$$constant;
2322   %}
2323 
2324   enc_class Con32F_as_bits(immF src)
2325   %{
2326     // Output Float immediate bits
2327     jfloat jf = $src$$constant;
2328     jint jf_as_bits = jint_cast(jf);
2329     emit_d32(cbuf, jf_as_bits);
2330   %}
2331 
2332   enc_class Con16(immI src)
2333   %{
2334     // Output immediate
2335     $$$emit16$src$$constant;
2336   %}
2337 
2338   // How is this different from Con32??? XXX
2339   enc_class Con_d32(immI src)
2340   %{
2341     emit_d32(cbuf,$src$$constant);
2342   %}
2343 
2344   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2345     // Output immediate memory reference
2346     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2347     emit_d32(cbuf, 0x00);
2348   %}
2349 
2350   enc_class lock_prefix()
2351   %{
2352     emit_opcode(cbuf, 0xF0); // lock
2353   %}
2354 
2355   enc_class REX_mem(memory mem)
2356   %{
2357     if ($mem$$base >= 8) {
2358       if ($mem$$index < 8) {
2359         emit_opcode(cbuf, Assembler::REX_B);
2360       } else {
2361         emit_opcode(cbuf, Assembler::REX_XB);
2362       }
2363     } else {
2364       if ($mem$$index >= 8) {
2365         emit_opcode(cbuf, Assembler::REX_X);
2366       }
2367     }
2368   %}
2369 
2370   enc_class REX_mem_wide(memory mem)
2371   %{
2372     if ($mem$$base >= 8) {
2373       if ($mem$$index < 8) {
2374         emit_opcode(cbuf, Assembler::REX_WB);
2375       } else {
2376         emit_opcode(cbuf, Assembler::REX_WXB);
2377       }
2378     } else {
2379       if ($mem$$index < 8) {
2380         emit_opcode(cbuf, Assembler::REX_W);
2381       } else {
2382         emit_opcode(cbuf, Assembler::REX_WX);
2383       }
2384     }
2385   %}
2386 
2387   // for byte regs
2388   enc_class REX_breg(rRegI reg)
2389   %{
2390     if ($reg$$reg >= 4) {
2391       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2392     }
2393   %}
2394 
2395   // for byte regs
2396   enc_class REX_reg_breg(rRegI dst, rRegI src)
2397   %{
2398     if ($dst$$reg < 8) {
2399       if ($src$$reg >= 4) {
2400         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2401       }
2402     } else {
2403       if ($src$$reg < 8) {
2404         emit_opcode(cbuf, Assembler::REX_R);
2405       } else {
2406         emit_opcode(cbuf, Assembler::REX_RB);
2407       }
2408     }
2409   %}
2410 
2411   // for byte regs
2412   enc_class REX_breg_mem(rRegI reg, memory mem)
2413   %{
2414     if ($reg$$reg < 8) {
2415       if ($mem$$base < 8) {
2416         if ($mem$$index >= 8) {
2417           emit_opcode(cbuf, Assembler::REX_X);
2418         } else if ($reg$$reg >= 4) {
2419           emit_opcode(cbuf, Assembler::REX);
2420         }
2421       } else {
2422         if ($mem$$index < 8) {
2423           emit_opcode(cbuf, Assembler::REX_B);
2424         } else {
2425           emit_opcode(cbuf, Assembler::REX_XB);
2426         }
2427       }
2428     } else {
2429       if ($mem$$base < 8) {
2430         if ($mem$$index < 8) {
2431           emit_opcode(cbuf, Assembler::REX_R);
2432         } else {
2433           emit_opcode(cbuf, Assembler::REX_RX);
2434         }
2435       } else {
2436         if ($mem$$index < 8) {
2437           emit_opcode(cbuf, Assembler::REX_RB);
2438         } else {
2439           emit_opcode(cbuf, Assembler::REX_RXB);
2440         }
2441       }
2442     }
2443   %}
2444 
2445   enc_class REX_reg(rRegI reg)
2446   %{
2447     if ($reg$$reg >= 8) {
2448       emit_opcode(cbuf, Assembler::REX_B);
2449     }
2450   %}
2451 
2452   enc_class REX_reg_wide(rRegI reg)
2453   %{
2454     if ($reg$$reg < 8) {
2455       emit_opcode(cbuf, Assembler::REX_W);
2456     } else {
2457       emit_opcode(cbuf, Assembler::REX_WB);
2458     }
2459   %}
2460 
2461   enc_class REX_reg_reg(rRegI dst, rRegI src)
2462   %{
2463     if ($dst$$reg < 8) {
2464       if ($src$$reg >= 8) {
2465         emit_opcode(cbuf, Assembler::REX_B);
2466       }
2467     } else {
2468       if ($src$$reg < 8) {
2469         emit_opcode(cbuf, Assembler::REX_R);
2470       } else {
2471         emit_opcode(cbuf, Assembler::REX_RB);
2472       }
2473     }
2474   %}
2475 
2476   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2477   %{
2478     if ($dst$$reg < 8) {
2479       if ($src$$reg < 8) {
2480         emit_opcode(cbuf, Assembler::REX_W);
2481       } else {
2482         emit_opcode(cbuf, Assembler::REX_WB);
2483       }
2484     } else {
2485       if ($src$$reg < 8) {
2486         emit_opcode(cbuf, Assembler::REX_WR);
2487       } else {
2488         emit_opcode(cbuf, Assembler::REX_WRB);
2489       }
2490     }
2491   %}
2492 
2493   enc_class REX_reg_mem(rRegI reg, memory mem)
2494   %{
2495     if ($reg$$reg < 8) {
2496       if ($mem$$base < 8) {
2497         if ($mem$$index >= 8) {
2498           emit_opcode(cbuf, Assembler::REX_X);
2499         }
2500       } else {
2501         if ($mem$$index < 8) {
2502           emit_opcode(cbuf, Assembler::REX_B);
2503         } else {
2504           emit_opcode(cbuf, Assembler::REX_XB);
2505         }
2506       }
2507     } else {
2508       if ($mem$$base < 8) {
2509         if ($mem$$index < 8) {
2510           emit_opcode(cbuf, Assembler::REX_R);
2511         } else {
2512           emit_opcode(cbuf, Assembler::REX_RX);
2513         }
2514       } else {
2515         if ($mem$$index < 8) {
2516           emit_opcode(cbuf, Assembler::REX_RB);
2517         } else {
2518           emit_opcode(cbuf, Assembler::REX_RXB);
2519         }
2520       }
2521     }
2522   %}
2523 
2524   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2525   %{
2526     if ($reg$$reg < 8) {
2527       if ($mem$$base < 8) {
2528         if ($mem$$index < 8) {
2529           emit_opcode(cbuf, Assembler::REX_W);
2530         } else {
2531           emit_opcode(cbuf, Assembler::REX_WX);
2532         }
2533       } else {
2534         if ($mem$$index < 8) {
2535           emit_opcode(cbuf, Assembler::REX_WB);
2536         } else {
2537           emit_opcode(cbuf, Assembler::REX_WXB);
2538         }
2539       }
2540     } else {
2541       if ($mem$$base < 8) {
2542         if ($mem$$index < 8) {
2543           emit_opcode(cbuf, Assembler::REX_WR);
2544         } else {
2545           emit_opcode(cbuf, Assembler::REX_WRX);
2546         }
2547       } else {
2548         if ($mem$$index < 8) {
2549           emit_opcode(cbuf, Assembler::REX_WRB);
2550         } else {
2551           emit_opcode(cbuf, Assembler::REX_WRXB);
2552         }
2553       }
2554     }
2555   %}
2556 
2557   enc_class reg_mem(rRegI ereg, memory mem)
2558   %{
2559     // High registers handle in encode_RegMem
2560     int reg = $ereg$$reg;
2561     int base = $mem$$base;
2562     int index = $mem$$index;
2563     int scale = $mem$$scale;
2564     int disp = $mem$$disp;
2565     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2566 
2567     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2568   %}
2569 
2570   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2571   %{
2572     int rm_byte_opcode = $rm_opcode$$constant;
2573 
2574     // High registers handle in encode_RegMem
2575     int base = $mem$$base;
2576     int index = $mem$$index;
2577     int scale = $mem$$scale;
2578     int displace = $mem$$disp;
2579 
2580     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2581                                             // working with static
2582                                             // globals
2583     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2584                   disp_reloc);
2585   %}
2586 
2587   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2588   %{
2589     int reg_encoding = $dst$$reg;
2590     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2591     int index        = 0x04;            // 0x04 indicates no index
2592     int scale        = 0x00;            // 0x00 indicates no scale
2593     int displace     = $src1$$constant; // 0x00 indicates no displacement
2594     relocInfo::relocType disp_reloc = relocInfo::none;
2595     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2596                   disp_reloc);
2597   %}
2598 
2599   enc_class neg_reg(rRegI dst)
2600   %{
2601     int dstenc = $dst$$reg;
2602     if (dstenc >= 8) {
2603       emit_opcode(cbuf, Assembler::REX_B);
2604       dstenc -= 8;
2605     }
2606     // NEG $dst
2607     emit_opcode(cbuf, 0xF7);
2608     emit_rm(cbuf, 0x3, 0x03, dstenc);
2609   %}
2610 
2611   enc_class neg_reg_wide(rRegI dst)
2612   %{
2613     int dstenc = $dst$$reg;
2614     if (dstenc < 8) {
2615       emit_opcode(cbuf, Assembler::REX_W);
2616     } else {
2617       emit_opcode(cbuf, Assembler::REX_WB);
2618       dstenc -= 8;
2619     }
2620     // NEG $dst
2621     emit_opcode(cbuf, 0xF7);
2622     emit_rm(cbuf, 0x3, 0x03, dstenc);
2623   %}
2624 
2625   enc_class setLT_reg(rRegI dst)
2626   %{
2627     int dstenc = $dst$$reg;
2628     if (dstenc >= 8) {
2629       emit_opcode(cbuf, Assembler::REX_B);
2630       dstenc -= 8;
2631     } else if (dstenc >= 4) {
2632       emit_opcode(cbuf, Assembler::REX);
2633     }
2634     // SETLT $dst
2635     emit_opcode(cbuf, 0x0F);
2636     emit_opcode(cbuf, 0x9C);
2637     emit_rm(cbuf, 0x3, 0x0, dstenc);
2638   %}
2639 
2640   enc_class setNZ_reg(rRegI dst)
2641   %{
2642     int dstenc = $dst$$reg;
2643     if (dstenc >= 8) {
2644       emit_opcode(cbuf, Assembler::REX_B);
2645       dstenc -= 8;
2646     } else if (dstenc >= 4) {
2647       emit_opcode(cbuf, Assembler::REX);
2648     }
2649     // SETNZ $dst
2650     emit_opcode(cbuf, 0x0F);
2651     emit_opcode(cbuf, 0x95);
2652     emit_rm(cbuf, 0x3, 0x0, dstenc);
2653   %}
2654 
2655 
2656   // Compare the lonogs and set -1, 0, or 1 into dst
2657   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2658   %{
2659     int src1enc = $src1$$reg;
2660     int src2enc = $src2$$reg;
2661     int dstenc = $dst$$reg;
2662 
2663     // cmpq $src1, $src2
2664     if (src1enc < 8) {
2665       if (src2enc < 8) {
2666         emit_opcode(cbuf, Assembler::REX_W);
2667       } else {
2668         emit_opcode(cbuf, Assembler::REX_WB);
2669       }
2670     } else {
2671       if (src2enc < 8) {
2672         emit_opcode(cbuf, Assembler::REX_WR);
2673       } else {
2674         emit_opcode(cbuf, Assembler::REX_WRB);
2675       }
2676     }
2677     emit_opcode(cbuf, 0x3B);
2678     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2679 
2680     // movl $dst, -1
2681     if (dstenc >= 8) {
2682       emit_opcode(cbuf, Assembler::REX_B);
2683     }
2684     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2685     emit_d32(cbuf, -1);
2686 
2687     // jl,s done
2688     emit_opcode(cbuf, 0x7C);
2689     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2690 
2691     // setne $dst
2692     if (dstenc >= 4) {
2693       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2694     }
2695     emit_opcode(cbuf, 0x0F);
2696     emit_opcode(cbuf, 0x95);
2697     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2698 
2699     // movzbl $dst, $dst
2700     if (dstenc >= 4) {
2701       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2702     }
2703     emit_opcode(cbuf, 0x0F);
2704     emit_opcode(cbuf, 0xB6);
2705     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2706   %}
2707 
2708   enc_class Push_ResultXD(regD dst) %{
2709     MacroAssembler _masm(&cbuf);
2710     __ fstp_d(Address(rsp, 0));
2711     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2712     __ addptr(rsp, 8);
2713   %}
2714 
2715   enc_class Push_SrcXD(regD src) %{
2716     MacroAssembler _masm(&cbuf);
2717     __ subptr(rsp, 8);
2718     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2719     __ fld_d(Address(rsp, 0));
2720   %}
2721 
2722 
2723   enc_class enc_rethrow()
2724   %{
2725     cbuf.set_insts_mark();
2726     emit_opcode(cbuf, 0xE9); // jmp entry
2727     emit_d32_reloc(cbuf,
2728                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2729                    runtime_call_Relocation::spec(),
2730                    RELOC_DISP32);
2731   %}
2732 
2733 %}
2734 
2735 
2736 
2737 //----------FRAME--------------------------------------------------------------
2738 // Definition of frame structure and management information.
2739 //
2740 //  S T A C K   L A Y O U T    Allocators stack-slot number
2741 //                             |   (to get allocators register number
2742 //  G  Owned by    |        |  v    add OptoReg::stack0())
2743 //  r   CALLER     |        |
2744 //  o     |        +--------+      pad to even-align allocators stack-slot
2745 //  w     V        |  pad0  |        numbers; owned by CALLER
2746 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2747 //  h     ^        |   in   |  5
2748 //        |        |  args  |  4   Holes in incoming args owned by SELF
2749 //  |     |        |        |  3
2750 //  |     |        +--------+
2751 //  V     |        | old out|      Empty on Intel, window on Sparc
2752 //        |    old |preserve|      Must be even aligned.
2753 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2754 //        |        |   in   |  3   area for Intel ret address
2755 //     Owned by    |preserve|      Empty on Sparc.
2756 //       SELF      +--------+
2757 //        |        |  pad2  |  2   pad to align old SP
2758 //        |        +--------+  1
2759 //        |        | locks  |  0
2760 //        |        +--------+----> OptoReg::stack0(), even aligned
2761 //        |        |  pad1  | 11   pad to align new SP
2762 //        |        +--------+
2763 //        |        |        | 10
2764 //        |        | spills |  9   spills
2765 //        V        |        |  8   (pad0 slot for callee)
2766 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2767 //        ^        |  out   |  7
2768 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2769 //     Owned by    +--------+
2770 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2771 //        |    new |preserve|      Must be even-aligned.
2772 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2773 //        |        |        |
2774 //
2775 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2776 //         known from SELF's arguments and the Java calling convention.
2777 //         Region 6-7 is determined per call site.
2778 // Note 2: If the calling convention leaves holes in the incoming argument
2779 //         area, those holes are owned by SELF.  Holes in the outgoing area
2780 //         are owned by the CALLEE.  Holes should not be nessecary in the
2781 //         incoming area, as the Java calling convention is completely under
2782 //         the control of the AD file.  Doubles can be sorted and packed to
2783 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2784 //         varargs C calling conventions.
2785 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2786 //         even aligned with pad0 as needed.
2787 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2788 //         region 6-11 is even aligned; it may be padded out more so that
2789 //         the region from SP to FP meets the minimum stack alignment.
2790 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2791 //         alignment.  Region 11, pad1, may be dynamically extended so that
2792 //         SP meets the minimum alignment.
2793 
2794 frame
2795 %{
2796   // What direction does stack grow in (assumed to be same for C & Java)
2797   stack_direction(TOWARDS_LOW);
2798 
2799   // These three registers define part of the calling convention
2800   // between compiled code and the interpreter.
2801   inline_cache_reg(RAX);                // Inline Cache Register
2802   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2803                                         // calling interpreter
2804 
2805   // Optional: name the operand used by cisc-spilling to access
2806   // [stack_pointer + offset]
2807   cisc_spilling_operand_name(indOffset32);
2808 
2809   // Number of stack slots consumed by locking an object
2810   sync_stack_slots(2);
2811 
2812   // Compiled code's Frame Pointer
2813   frame_pointer(RSP);
2814 
2815   // Interpreter stores its frame pointer in a register which is
2816   // stored to the stack by I2CAdaptors.
2817   // I2CAdaptors convert from interpreted java to compiled java.
2818   interpreter_frame_pointer(RBP);
2819 
2820   // Stack alignment requirement
2821   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2822 
2823   // Number of stack slots between incoming argument block and the start of
2824   // a new frame.  The PROLOG must add this many slots to the stack.  The
2825   // EPILOG must remove this many slots.  amd64 needs two slots for
2826   // return address.
2827   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2828 
2829   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2830   // for calls to C.  Supports the var-args backing area for register parms.
2831   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2832 
2833   // The after-PROLOG location of the return address.  Location of
2834   // return address specifies a type (REG or STACK) and a number
2835   // representing the register number (i.e. - use a register name) or
2836   // stack slot.
2837   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2838   // Otherwise, it is above the locks and verification slot and alignment word
2839   return_addr(STACK - 2 +
2840               align_up((Compile::current()->in_preserve_stack_slots() +
2841                         Compile::current()->fixed_slots()),
2842                        stack_alignment_in_slots()));
2843 
2844   // Body of function which returns an integer array locating
2845   // arguments either in registers or in stack slots.  Passed an array
2846   // of ideal registers called "sig" and a "length" count.  Stack-slot
2847   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2848   // arguments for a CALLEE.  Incoming stack arguments are
2849   // automatically biased by the preserve_stack_slots field above.
2850 
2851   calling_convention
2852   %{
2853     // No difference between ingoing/outgoing just pass false
2854     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2855   %}
2856 
2857   c_calling_convention
2858   %{
2859     // This is obviously always outgoing
2860     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
2861   %}
2862 
2863   // Location of compiled Java return values.  Same as C for now.
2864   return_value
2865   %{
2866     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2867            "only return normal values");
2868 
2869     static const int lo[Op_RegL + 1] = {
2870       0,
2871       0,
2872       RAX_num,  // Op_RegN
2873       RAX_num,  // Op_RegI
2874       RAX_num,  // Op_RegP
2875       XMM0_num, // Op_RegF
2876       XMM0_num, // Op_RegD
2877       RAX_num   // Op_RegL
2878     };
2879     static const int hi[Op_RegL + 1] = {
2880       0,
2881       0,
2882       OptoReg::Bad, // Op_RegN
2883       OptoReg::Bad, // Op_RegI
2884       RAX_H_num,    // Op_RegP
2885       OptoReg::Bad, // Op_RegF
2886       XMM0b_num,    // Op_RegD
2887       RAX_H_num     // Op_RegL
2888     };
2889     // Excluded flags and vector registers.
2890     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
2891     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2892   %}
2893 %}
2894 
2895 //----------ATTRIBUTES---------------------------------------------------------
2896 //----------Operand Attributes-------------------------------------------------
2897 op_attrib op_cost(0);        // Required cost attribute
2898 
2899 //----------Instruction Attributes---------------------------------------------
2900 ins_attrib ins_cost(100);       // Required cost attribute
2901 ins_attrib ins_size(8);         // Required size attribute (in bits)
2902 ins_attrib ins_short_branch(0); // Required flag: is this instruction
2903                                 // a non-matching short branch variant
2904                                 // of some long branch?
2905 ins_attrib ins_alignment(1);    // Required alignment attribute (must
2906                                 // be a power of 2) specifies the
2907                                 // alignment that some part of the
2908                                 // instruction (not necessarily the
2909                                 // start) requires.  If > 1, a
2910                                 // compute_padding() function must be
2911                                 // provided for the instruction
2912 
2913 //----------OPERANDS-----------------------------------------------------------
2914 // Operand definitions must precede instruction definitions for correct parsing
2915 // in the ADLC because operands constitute user defined types which are used in
2916 // instruction definitions.
2917 
2918 //----------Simple Operands----------------------------------------------------
2919 // Immediate Operands
2920 // Integer Immediate
2921 operand immI()
2922 %{
2923   match(ConI);
2924 
2925   op_cost(10);
2926   format %{ %}
2927   interface(CONST_INTER);
2928 %}
2929 
2930 // Constant for test vs zero
2931 operand immI0()
2932 %{
2933   predicate(n->get_int() == 0);
2934   match(ConI);
2935 
2936   op_cost(0);
2937   format %{ %}
2938   interface(CONST_INTER);
2939 %}
2940 
2941 // Constant for increment
2942 operand immI1()
2943 %{
2944   predicate(n->get_int() == 1);
2945   match(ConI);
2946 
2947   op_cost(0);
2948   format %{ %}
2949   interface(CONST_INTER);
2950 %}
2951 
2952 // Constant for decrement
2953 operand immI_M1()
2954 %{
2955   predicate(n->get_int() == -1);
2956   match(ConI);
2957 
2958   op_cost(0);
2959   format %{ %}
2960   interface(CONST_INTER);
2961 %}
2962 
2963 // Valid scale values for addressing modes
2964 operand immI2()
2965 %{
2966   predicate(0 <= n->get_int() && (n->get_int() <= 3));
2967   match(ConI);
2968 
2969   format %{ %}
2970   interface(CONST_INTER);
2971 %}
2972 
2973 operand immI8()
2974 %{
2975   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
2976   match(ConI);
2977 
2978   op_cost(5);
2979   format %{ %}
2980   interface(CONST_INTER);
2981 %}
2982 
2983 operand immU8()
2984 %{
2985   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
2986   match(ConI);
2987 
2988   op_cost(5);
2989   format %{ %}
2990   interface(CONST_INTER);
2991 %}
2992 
2993 operand immI16()
2994 %{
2995   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
2996   match(ConI);
2997 
2998   op_cost(10);
2999   format %{ %}
3000   interface(CONST_INTER);
3001 %}
3002 
3003 // Int Immediate non-negative
3004 operand immU31()
3005 %{
3006   predicate(n->get_int() >= 0);
3007   match(ConI);
3008 
3009   op_cost(0);
3010   format %{ %}
3011   interface(CONST_INTER);
3012 %}
3013 
3014 // Constant for long shifts
3015 operand immI_32()
3016 %{
3017   predicate( n->get_int() == 32 );
3018   match(ConI);
3019 
3020   op_cost(0);
3021   format %{ %}
3022   interface(CONST_INTER);
3023 %}
3024 
3025 // Constant for long shifts
3026 operand immI_64()
3027 %{
3028   predicate( n->get_int() == 64 );
3029   match(ConI);
3030 
3031   op_cost(0);
3032   format %{ %}
3033   interface(CONST_INTER);
3034 %}
3035 
3036 // Pointer Immediate
3037 operand immP()
3038 %{
3039   match(ConP);
3040 
3041   op_cost(10);
3042   format %{ %}
3043   interface(CONST_INTER);
3044 %}
3045 
3046 // NULL Pointer Immediate
3047 operand immP0()
3048 %{
3049   predicate(n->get_ptr() == 0);
3050   match(ConP);
3051 
3052   op_cost(5);
3053   format %{ %}
3054   interface(CONST_INTER);
3055 %}
3056 
3057 // Pointer Immediate
3058 operand immN() %{
3059   match(ConN);
3060 
3061   op_cost(10);
3062   format %{ %}
3063   interface(CONST_INTER);
3064 %}
3065 
3066 operand immNKlass() %{
3067   match(ConNKlass);
3068 
3069   op_cost(10);
3070   format %{ %}
3071   interface(CONST_INTER);
3072 %}
3073 
3074 // NULL Pointer Immediate
3075 operand immN0() %{
3076   predicate(n->get_narrowcon() == 0);
3077   match(ConN);
3078 
3079   op_cost(5);
3080   format %{ %}
3081   interface(CONST_INTER);
3082 %}
3083 
3084 operand immP31()
3085 %{
3086   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3087             && (n->get_ptr() >> 31) == 0);
3088   match(ConP);
3089 
3090   op_cost(5);
3091   format %{ %}
3092   interface(CONST_INTER);
3093 %}
3094 
3095 
3096 // Long Immediate
3097 operand immL()
3098 %{
3099   match(ConL);
3100 
3101   op_cost(20);
3102   format %{ %}
3103   interface(CONST_INTER);
3104 %}
3105 
3106 // Long Immediate 8-bit
3107 operand immL8()
3108 %{
3109   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3110   match(ConL);
3111 
3112   op_cost(5);
3113   format %{ %}
3114   interface(CONST_INTER);
3115 %}
3116 
3117 // Long Immediate 32-bit unsigned
3118 operand immUL32()
3119 %{
3120   predicate(n->get_long() == (unsigned int) (n->get_long()));
3121   match(ConL);
3122 
3123   op_cost(10);
3124   format %{ %}
3125   interface(CONST_INTER);
3126 %}
3127 
3128 // Long Immediate 32-bit signed
3129 operand immL32()
3130 %{
3131   predicate(n->get_long() == (int) (n->get_long()));
3132   match(ConL);
3133 
3134   op_cost(15);
3135   format %{ %}
3136   interface(CONST_INTER);
3137 %}
3138 
3139 // Long Immediate zero
3140 operand immL0()
3141 %{
3142   predicate(n->get_long() == 0L);
3143   match(ConL);
3144 
3145   op_cost(10);
3146   format %{ %}
3147   interface(CONST_INTER);
3148 %}
3149 
3150 // Constant for increment
3151 operand immL1()
3152 %{
3153   predicate(n->get_long() == 1);
3154   match(ConL);
3155 
3156   format %{ %}
3157   interface(CONST_INTER);
3158 %}
3159 
3160 // Constant for decrement
3161 operand immL_M1()
3162 %{
3163   predicate(n->get_long() == -1);
3164   match(ConL);
3165 
3166   format %{ %}
3167   interface(CONST_INTER);
3168 %}
3169 
3170 // Long Immediate: the value 10
3171 operand immL10()
3172 %{
3173   predicate(n->get_long() == 10);
3174   match(ConL);
3175 
3176   format %{ %}
3177   interface(CONST_INTER);
3178 %}
3179 
3180 // Long immediate from 0 to 127.
3181 // Used for a shorter form of long mul by 10.
3182 operand immL_127()
3183 %{
3184   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3185   match(ConL);
3186 
3187   op_cost(10);
3188   format %{ %}
3189   interface(CONST_INTER);
3190 %}
3191 
3192 // Long Immediate: low 32-bit mask
3193 operand immL_32bits()
3194 %{
3195   predicate(n->get_long() == 0xFFFFFFFFL);
3196   match(ConL);
3197   op_cost(20);
3198 
3199   format %{ %}
3200   interface(CONST_INTER);
3201 %}
3202 
3203 // Float Immediate zero
3204 operand immF0()
3205 %{
3206   predicate(jint_cast(n->getf()) == 0);
3207   match(ConF);
3208 
3209   op_cost(5);
3210   format %{ %}
3211   interface(CONST_INTER);
3212 %}
3213 
3214 // Float Immediate
3215 operand immF()
3216 %{
3217   match(ConF);
3218 
3219   op_cost(15);
3220   format %{ %}
3221   interface(CONST_INTER);
3222 %}
3223 
3224 // Double Immediate zero
3225 operand immD0()
3226 %{
3227   predicate(jlong_cast(n->getd()) == 0);
3228   match(ConD);
3229 
3230   op_cost(5);
3231   format %{ %}
3232   interface(CONST_INTER);
3233 %}
3234 
3235 // Double Immediate
3236 operand immD()
3237 %{
3238   match(ConD);
3239 
3240   op_cost(15);
3241   format %{ %}
3242   interface(CONST_INTER);
3243 %}
3244 
3245 // Immediates for special shifts (sign extend)
3246 
3247 // Constants for increment
3248 operand immI_16()
3249 %{
3250   predicate(n->get_int() == 16);
3251   match(ConI);
3252 
3253   format %{ %}
3254   interface(CONST_INTER);
3255 %}
3256 
3257 operand immI_24()
3258 %{
3259   predicate(n->get_int() == 24);
3260   match(ConI);
3261 
3262   format %{ %}
3263   interface(CONST_INTER);
3264 %}
3265 
3266 // Constant for byte-wide masking
3267 operand immI_255()
3268 %{
3269   predicate(n->get_int() == 255);
3270   match(ConI);
3271 
3272   format %{ %}
3273   interface(CONST_INTER);
3274 %}
3275 
3276 // Constant for short-wide masking
3277 operand immI_65535()
3278 %{
3279   predicate(n->get_int() == 65535);
3280   match(ConI);
3281 
3282   format %{ %}
3283   interface(CONST_INTER);
3284 %}
3285 
3286 // Constant for byte-wide masking
3287 operand immL_255()
3288 %{
3289   predicate(n->get_long() == 255);
3290   match(ConL);
3291 
3292   format %{ %}
3293   interface(CONST_INTER);
3294 %}
3295 
3296 // Constant for short-wide masking
3297 operand immL_65535()
3298 %{
3299   predicate(n->get_long() == 65535);
3300   match(ConL);
3301 
3302   format %{ %}
3303   interface(CONST_INTER);
3304 %}
3305 
3306 // Register Operands
3307 // Integer Register
3308 operand rRegI()
3309 %{
3310   constraint(ALLOC_IN_RC(int_reg));
3311   match(RegI);
3312 
3313   match(rax_RegI);
3314   match(rbx_RegI);
3315   match(rcx_RegI);
3316   match(rdx_RegI);
3317   match(rdi_RegI);
3318 
3319   format %{ %}
3320   interface(REG_INTER);
3321 %}
3322 
3323 // Special Registers
3324 operand rax_RegI()
3325 %{
3326   constraint(ALLOC_IN_RC(int_rax_reg));
3327   match(RegI);
3328   match(rRegI);
3329 
3330   format %{ "RAX" %}
3331   interface(REG_INTER);
3332 %}
3333 
3334 // Special Registers
3335 operand rbx_RegI()
3336 %{
3337   constraint(ALLOC_IN_RC(int_rbx_reg));
3338   match(RegI);
3339   match(rRegI);
3340 
3341   format %{ "RBX" %}
3342   interface(REG_INTER);
3343 %}
3344 
3345 operand rcx_RegI()
3346 %{
3347   constraint(ALLOC_IN_RC(int_rcx_reg));
3348   match(RegI);
3349   match(rRegI);
3350 
3351   format %{ "RCX" %}
3352   interface(REG_INTER);
3353 %}
3354 
3355 operand rdx_RegI()
3356 %{
3357   constraint(ALLOC_IN_RC(int_rdx_reg));
3358   match(RegI);
3359   match(rRegI);
3360 
3361   format %{ "RDX" %}
3362   interface(REG_INTER);
3363 %}
3364 
3365 operand rdi_RegI()
3366 %{
3367   constraint(ALLOC_IN_RC(int_rdi_reg));
3368   match(RegI);
3369   match(rRegI);
3370 
3371   format %{ "RDI" %}
3372   interface(REG_INTER);
3373 %}
3374 
3375 operand no_rcx_RegI()
3376 %{
3377   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3378   match(RegI);
3379   match(rax_RegI);
3380   match(rbx_RegI);
3381   match(rdx_RegI);
3382   match(rdi_RegI);
3383 
3384   format %{ %}
3385   interface(REG_INTER);
3386 %}
3387 
3388 operand no_rax_rdx_RegI()
3389 %{
3390   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3391   match(RegI);
3392   match(rbx_RegI);
3393   match(rcx_RegI);
3394   match(rdi_RegI);
3395 
3396   format %{ %}
3397   interface(REG_INTER);
3398 %}
3399 
3400 // Pointer Register
3401 operand any_RegP()
3402 %{
3403   constraint(ALLOC_IN_RC(any_reg));
3404   match(RegP);
3405   match(rax_RegP);
3406   match(rbx_RegP);
3407   match(rdi_RegP);
3408   match(rsi_RegP);
3409   match(rbp_RegP);
3410   match(r15_RegP);
3411   match(rRegP);
3412 
3413   format %{ %}
3414   interface(REG_INTER);
3415 %}
3416 
3417 operand rRegP()
3418 %{
3419   constraint(ALLOC_IN_RC(ptr_reg));
3420   match(RegP);
3421   match(rax_RegP);
3422   match(rbx_RegP);
3423   match(rdi_RegP);
3424   match(rsi_RegP);
3425   match(rbp_RegP);  // See Q&A below about
3426   match(r15_RegP);  // r15_RegP and rbp_RegP.
3427 
3428   format %{ %}
3429   interface(REG_INTER);
3430 %}
3431 
3432 operand rRegN() %{
3433   constraint(ALLOC_IN_RC(int_reg));
3434   match(RegN);
3435 
3436   format %{ %}
3437   interface(REG_INTER);
3438 %}
3439 
3440 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3441 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3442 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
3443 // The output of an instruction is controlled by the allocator, which respects
3444 // register class masks, not match rules.  Unless an instruction mentions
3445 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3446 // by the allocator as an input.
3447 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
3448 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
3449 // result, RBP is not included in the output of the instruction either.
3450 
3451 operand no_rax_RegP()
3452 %{
3453   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3454   match(RegP);
3455   match(rbx_RegP);
3456   match(rsi_RegP);
3457   match(rdi_RegP);
3458 
3459   format %{ %}
3460   interface(REG_INTER);
3461 %}
3462 
3463 // This operand is not allowed to use RBP even if
3464 // RBP is not used to hold the frame pointer.
3465 operand no_rbp_RegP()
3466 %{
3467   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
3468   match(RegP);
3469   match(rbx_RegP);
3470   match(rsi_RegP);
3471   match(rdi_RegP);
3472 
3473   format %{ %}
3474   interface(REG_INTER);
3475 %}
3476 
3477 operand no_rax_rbx_RegP()
3478 %{
3479   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3480   match(RegP);
3481   match(rsi_RegP);
3482   match(rdi_RegP);
3483 
3484   format %{ %}
3485   interface(REG_INTER);
3486 %}
3487 
3488 // Special Registers
3489 // Return a pointer value
3490 operand rax_RegP()
3491 %{
3492   constraint(ALLOC_IN_RC(ptr_rax_reg));
3493   match(RegP);
3494   match(rRegP);
3495 
3496   format %{ %}
3497   interface(REG_INTER);
3498 %}
3499 
3500 // Special Registers
3501 // Return a compressed pointer value
3502 operand rax_RegN()
3503 %{
3504   constraint(ALLOC_IN_RC(int_rax_reg));
3505   match(RegN);
3506   match(rRegN);
3507 
3508   format %{ %}
3509   interface(REG_INTER);
3510 %}
3511 
3512 // Used in AtomicAdd
3513 operand rbx_RegP()
3514 %{
3515   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3516   match(RegP);
3517   match(rRegP);
3518 
3519   format %{ %}
3520   interface(REG_INTER);
3521 %}
3522 
3523 operand rsi_RegP()
3524 %{
3525   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3526   match(RegP);
3527   match(rRegP);
3528 
3529   format %{ %}
3530   interface(REG_INTER);
3531 %}
3532 
3533 // Used in rep stosq
3534 operand rdi_RegP()
3535 %{
3536   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3537   match(RegP);
3538   match(rRegP);
3539 
3540   format %{ %}
3541   interface(REG_INTER);
3542 %}
3543 
3544 operand r15_RegP()
3545 %{
3546   constraint(ALLOC_IN_RC(ptr_r15_reg));
3547   match(RegP);
3548   match(rRegP);
3549 
3550   format %{ %}
3551   interface(REG_INTER);
3552 %}
3553 
3554 operand rRegL()
3555 %{
3556   constraint(ALLOC_IN_RC(long_reg));
3557   match(RegL);
3558   match(rax_RegL);
3559   match(rdx_RegL);
3560 
3561   format %{ %}
3562   interface(REG_INTER);
3563 %}
3564 
3565 // Special Registers
3566 operand no_rax_rdx_RegL()
3567 %{
3568   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3569   match(RegL);
3570   match(rRegL);
3571 
3572   format %{ %}
3573   interface(REG_INTER);
3574 %}
3575 
3576 operand no_rax_RegL()
3577 %{
3578   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3579   match(RegL);
3580   match(rRegL);
3581   match(rdx_RegL);
3582 
3583   format %{ %}
3584   interface(REG_INTER);
3585 %}
3586 
3587 operand no_rcx_RegL()
3588 %{
3589   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3590   match(RegL);
3591   match(rRegL);
3592 
3593   format %{ %}
3594   interface(REG_INTER);
3595 %}
3596 
3597 operand rax_RegL()
3598 %{
3599   constraint(ALLOC_IN_RC(long_rax_reg));
3600   match(RegL);
3601   match(rRegL);
3602 
3603   format %{ "RAX" %}
3604   interface(REG_INTER);
3605 %}
3606 
3607 operand rcx_RegL()
3608 %{
3609   constraint(ALLOC_IN_RC(long_rcx_reg));
3610   match(RegL);
3611   match(rRegL);
3612 
3613   format %{ %}
3614   interface(REG_INTER);
3615 %}
3616 
3617 operand rdx_RegL()
3618 %{
3619   constraint(ALLOC_IN_RC(long_rdx_reg));
3620   match(RegL);
3621   match(rRegL);
3622 
3623   format %{ %}
3624   interface(REG_INTER);
3625 %}
3626 
3627 // Flags register, used as output of compare instructions
3628 operand rFlagsReg()
3629 %{
3630   constraint(ALLOC_IN_RC(int_flags));
3631   match(RegFlags);
3632 
3633   format %{ "RFLAGS" %}
3634   interface(REG_INTER);
3635 %}
3636 
3637 // Flags register, used as output of FLOATING POINT compare instructions
3638 operand rFlagsRegU()
3639 %{
3640   constraint(ALLOC_IN_RC(int_flags));
3641   match(RegFlags);
3642 
3643   format %{ "RFLAGS_U" %}
3644   interface(REG_INTER);
3645 %}
3646 
3647 operand rFlagsRegUCF() %{
3648   constraint(ALLOC_IN_RC(int_flags));
3649   match(RegFlags);
3650   predicate(false);
3651 
3652   format %{ "RFLAGS_U_CF" %}
3653   interface(REG_INTER);
3654 %}
3655 
3656 // Float register operands
3657 operand regF() %{
3658    constraint(ALLOC_IN_RC(float_reg));
3659    match(RegF);
3660 
3661    format %{ %}
3662    interface(REG_INTER);
3663 %}
3664 
3665 // Float register operands
3666 operand vlRegF() %{
3667    constraint(ALLOC_IN_RC(float_reg_vl));
3668    match(RegF);
3669 
3670    format %{ %}
3671    interface(REG_INTER);
3672 %}
3673 
3674 // Double register operands
3675 operand regD() %{
3676    constraint(ALLOC_IN_RC(double_reg));
3677    match(RegD);
3678 
3679    format %{ %}
3680    interface(REG_INTER);
3681 %}
3682 
3683 // Double register operands
3684 operand vlRegD() %{
3685    constraint(ALLOC_IN_RC(double_reg_vl));
3686    match(RegD);
3687 
3688    format %{ %}
3689    interface(REG_INTER);
3690 %}
3691 
3692 // Vectors
3693 operand vecS() %{
3694   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
3695   match(VecS);
3696 
3697   format %{ %}
3698   interface(REG_INTER);
3699 %}
3700 
3701 // Vectors
3702 operand legVecS() %{
3703   constraint(ALLOC_IN_RC(vectors_reg_legacy));
3704   match(VecS);
3705 
3706   format %{ %}
3707   interface(REG_INTER);
3708 %}
3709 
3710 operand vecD() %{
3711   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
3712   match(VecD);
3713 
3714   format %{ %}
3715   interface(REG_INTER);
3716 %}
3717 
3718 operand legVecD() %{
3719   constraint(ALLOC_IN_RC(vectord_reg_legacy));
3720   match(VecD);
3721 
3722   format %{ %}
3723   interface(REG_INTER);
3724 %}
3725 
3726 operand vecX() %{
3727   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
3728   match(VecX);
3729 
3730   format %{ %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 operand legVecX() %{
3735   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
3736   match(VecX);
3737 
3738   format %{ %}
3739   interface(REG_INTER);
3740 %}
3741 
3742 operand vecY() %{
3743   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
3744   match(VecY);
3745 
3746   format %{ %}
3747   interface(REG_INTER);
3748 %}
3749 
3750 operand legVecY() %{
3751   constraint(ALLOC_IN_RC(vectory_reg_legacy));
3752   match(VecY);
3753 
3754   format %{ %}
3755   interface(REG_INTER);
3756 %}
3757 
3758 //----------Memory Operands----------------------------------------------------
3759 // Direct Memory Operand
3760 // operand direct(immP addr)
3761 // %{
3762 //   match(addr);
3763 
3764 //   format %{ "[$addr]" %}
3765 //   interface(MEMORY_INTER) %{
3766 //     base(0xFFFFFFFF);
3767 //     index(0x4);
3768 //     scale(0x0);
3769 //     disp($addr);
3770 //   %}
3771 // %}
3772 
3773 // Indirect Memory Operand
3774 operand indirect(any_RegP reg)
3775 %{
3776   constraint(ALLOC_IN_RC(ptr_reg));
3777   match(reg);
3778 
3779   format %{ "[$reg]" %}
3780   interface(MEMORY_INTER) %{
3781     base($reg);
3782     index(0x4);
3783     scale(0x0);
3784     disp(0x0);
3785   %}
3786 %}
3787 
3788 // Indirect Memory Plus Short Offset Operand
3789 operand indOffset8(any_RegP reg, immL8 off)
3790 %{
3791   constraint(ALLOC_IN_RC(ptr_reg));
3792   match(AddP reg off);
3793 
3794   format %{ "[$reg + $off (8-bit)]" %}
3795   interface(MEMORY_INTER) %{
3796     base($reg);
3797     index(0x4);
3798     scale(0x0);
3799     disp($off);
3800   %}
3801 %}
3802 
3803 // Indirect Memory Plus Long Offset Operand
3804 operand indOffset32(any_RegP reg, immL32 off)
3805 %{
3806   constraint(ALLOC_IN_RC(ptr_reg));
3807   match(AddP reg off);
3808 
3809   format %{ "[$reg + $off (32-bit)]" %}
3810   interface(MEMORY_INTER) %{
3811     base($reg);
3812     index(0x4);
3813     scale(0x0);
3814     disp($off);
3815   %}
3816 %}
3817 
3818 // Indirect Memory Plus Index Register Plus Offset Operand
3819 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3820 %{
3821   constraint(ALLOC_IN_RC(ptr_reg));
3822   match(AddP (AddP reg lreg) off);
3823 
3824   op_cost(10);
3825   format %{"[$reg + $off + $lreg]" %}
3826   interface(MEMORY_INTER) %{
3827     base($reg);
3828     index($lreg);
3829     scale(0x0);
3830     disp($off);
3831   %}
3832 %}
3833 
3834 // Indirect Memory Plus Index Register Plus Offset Operand
3835 operand indIndex(any_RegP reg, rRegL lreg)
3836 %{
3837   constraint(ALLOC_IN_RC(ptr_reg));
3838   match(AddP reg lreg);
3839 
3840   op_cost(10);
3841   format %{"[$reg + $lreg]" %}
3842   interface(MEMORY_INTER) %{
3843     base($reg);
3844     index($lreg);
3845     scale(0x0);
3846     disp(0x0);
3847   %}
3848 %}
3849 
3850 // Indirect Memory Times Scale Plus Index Register
3851 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3852 %{
3853   constraint(ALLOC_IN_RC(ptr_reg));
3854   match(AddP reg (LShiftL lreg scale));
3855 
3856   op_cost(10);
3857   format %{"[$reg + $lreg << $scale]" %}
3858   interface(MEMORY_INTER) %{
3859     base($reg);
3860     index($lreg);
3861     scale($scale);
3862     disp(0x0);
3863   %}
3864 %}
3865 
3866 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
3867 %{
3868   constraint(ALLOC_IN_RC(ptr_reg));
3869   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3870   match(AddP reg (LShiftL (ConvI2L idx) scale));
3871 
3872   op_cost(10);
3873   format %{"[$reg + pos $idx << $scale]" %}
3874   interface(MEMORY_INTER) %{
3875     base($reg);
3876     index($idx);
3877     scale($scale);
3878     disp(0x0);
3879   %}
3880 %}
3881 
3882 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3883 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3884 %{
3885   constraint(ALLOC_IN_RC(ptr_reg));
3886   match(AddP (AddP reg (LShiftL lreg scale)) off);
3887 
3888   op_cost(10);
3889   format %{"[$reg + $off + $lreg << $scale]" %}
3890   interface(MEMORY_INTER) %{
3891     base($reg);
3892     index($lreg);
3893     scale($scale);
3894     disp($off);
3895   %}
3896 %}
3897 
3898 // Indirect Memory Plus Positive Index Register Plus Offset Operand
3899 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
3900 %{
3901   constraint(ALLOC_IN_RC(ptr_reg));
3902   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
3903   match(AddP (AddP reg (ConvI2L idx)) off);
3904 
3905   op_cost(10);
3906   format %{"[$reg + $off + $idx]" %}
3907   interface(MEMORY_INTER) %{
3908     base($reg);
3909     index($idx);
3910     scale(0x0);
3911     disp($off);
3912   %}
3913 %}
3914 
3915 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3916 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3917 %{
3918   constraint(ALLOC_IN_RC(ptr_reg));
3919   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3920   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3921 
3922   op_cost(10);
3923   format %{"[$reg + $off + $idx << $scale]" %}
3924   interface(MEMORY_INTER) %{
3925     base($reg);
3926     index($idx);
3927     scale($scale);
3928     disp($off);
3929   %}
3930 %}
3931 
3932 // Indirect Narrow Oop Plus Offset Operand
3933 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3934 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
3935 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3936   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
3937   constraint(ALLOC_IN_RC(ptr_reg));
3938   match(AddP (DecodeN reg) off);
3939 
3940   op_cost(10);
3941   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3942   interface(MEMORY_INTER) %{
3943     base(0xc); // R12
3944     index($reg);
3945     scale(0x3);
3946     disp($off);
3947   %}
3948 %}
3949 
3950 // Indirect Memory Operand
3951 operand indirectNarrow(rRegN reg)
3952 %{
3953   predicate(Universe::narrow_oop_shift() == 0);
3954   constraint(ALLOC_IN_RC(ptr_reg));
3955   match(DecodeN reg);
3956 
3957   format %{ "[$reg]" %}
3958   interface(MEMORY_INTER) %{
3959     base($reg);
3960     index(0x4);
3961     scale(0x0);
3962     disp(0x0);
3963   %}
3964 %}
3965 
3966 // Indirect Memory Plus Short Offset Operand
3967 operand indOffset8Narrow(rRegN reg, immL8 off)
3968 %{
3969   predicate(Universe::narrow_oop_shift() == 0);
3970   constraint(ALLOC_IN_RC(ptr_reg));
3971   match(AddP (DecodeN reg) off);
3972 
3973   format %{ "[$reg + $off (8-bit)]" %}
3974   interface(MEMORY_INTER) %{
3975     base($reg);
3976     index(0x4);
3977     scale(0x0);
3978     disp($off);
3979   %}
3980 %}
3981 
3982 // Indirect Memory Plus Long Offset Operand
3983 operand indOffset32Narrow(rRegN reg, immL32 off)
3984 %{
3985   predicate(Universe::narrow_oop_shift() == 0);
3986   constraint(ALLOC_IN_RC(ptr_reg));
3987   match(AddP (DecodeN reg) off);
3988 
3989   format %{ "[$reg + $off (32-bit)]" %}
3990   interface(MEMORY_INTER) %{
3991     base($reg);
3992     index(0x4);
3993     scale(0x0);
3994     disp($off);
3995   %}
3996 %}
3997 
3998 // Indirect Memory Plus Index Register Plus Offset Operand
3999 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
4000 %{
4001   predicate(Universe::narrow_oop_shift() == 0);
4002   constraint(ALLOC_IN_RC(ptr_reg));
4003   match(AddP (AddP (DecodeN reg) lreg) off);
4004 
4005   op_cost(10);
4006   format %{"[$reg + $off + $lreg]" %}
4007   interface(MEMORY_INTER) %{
4008     base($reg);
4009     index($lreg);
4010     scale(0x0);
4011     disp($off);
4012   %}
4013 %}
4014 
4015 // Indirect Memory Plus Index Register Plus Offset Operand
4016 operand indIndexNarrow(rRegN reg, rRegL lreg)
4017 %{
4018   predicate(Universe::narrow_oop_shift() == 0);
4019   constraint(ALLOC_IN_RC(ptr_reg));
4020   match(AddP (DecodeN reg) lreg);
4021 
4022   op_cost(10);
4023   format %{"[$reg + $lreg]" %}
4024   interface(MEMORY_INTER) %{
4025     base($reg);
4026     index($lreg);
4027     scale(0x0);
4028     disp(0x0);
4029   %}
4030 %}
4031 
4032 // Indirect Memory Times Scale Plus Index Register
4033 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
4034 %{
4035   predicate(Universe::narrow_oop_shift() == 0);
4036   constraint(ALLOC_IN_RC(ptr_reg));
4037   match(AddP (DecodeN reg) (LShiftL lreg scale));
4038 
4039   op_cost(10);
4040   format %{"[$reg + $lreg << $scale]" %}
4041   interface(MEMORY_INTER) %{
4042     base($reg);
4043     index($lreg);
4044     scale($scale);
4045     disp(0x0);
4046   %}
4047 %}
4048 
4049 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4050 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4051 %{
4052   predicate(Universe::narrow_oop_shift() == 0);
4053   constraint(ALLOC_IN_RC(ptr_reg));
4054   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4055 
4056   op_cost(10);
4057   format %{"[$reg + $off + $lreg << $scale]" %}
4058   interface(MEMORY_INTER) %{
4059     base($reg);
4060     index($lreg);
4061     scale($scale);
4062     disp($off);
4063   %}
4064 %}
4065 
4066 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
4067 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
4068 %{
4069   constraint(ALLOC_IN_RC(ptr_reg));
4070   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
4071   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
4072 
4073   op_cost(10);
4074   format %{"[$reg + $off + $idx]" %}
4075   interface(MEMORY_INTER) %{
4076     base($reg);
4077     index($idx);
4078     scale(0x0);
4079     disp($off);
4080   %}
4081 %}
4082 
4083 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4084 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4085 %{
4086   constraint(ALLOC_IN_RC(ptr_reg));
4087   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4088   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4089 
4090   op_cost(10);
4091   format %{"[$reg + $off + $idx << $scale]" %}
4092   interface(MEMORY_INTER) %{
4093     base($reg);
4094     index($idx);
4095     scale($scale);
4096     disp($off);
4097   %}
4098 %}
4099 
4100 //----------Special Memory Operands--------------------------------------------
4101 // Stack Slot Operand - This operand is used for loading and storing temporary
4102 //                      values on the stack where a match requires a value to
4103 //                      flow through memory.
4104 operand stackSlotP(sRegP reg)
4105 %{
4106   constraint(ALLOC_IN_RC(stack_slots));
4107   // No match rule because this operand is only generated in matching
4108 
4109   format %{ "[$reg]" %}
4110   interface(MEMORY_INTER) %{
4111     base(0x4);   // RSP
4112     index(0x4);  // No Index
4113     scale(0x0);  // No Scale
4114     disp($reg);  // Stack Offset
4115   %}
4116 %}
4117 
4118 operand stackSlotI(sRegI reg)
4119 %{
4120   constraint(ALLOC_IN_RC(stack_slots));
4121   // No match rule because this operand is only generated in matching
4122 
4123   format %{ "[$reg]" %}
4124   interface(MEMORY_INTER) %{
4125     base(0x4);   // RSP
4126     index(0x4);  // No Index
4127     scale(0x0);  // No Scale
4128     disp($reg);  // Stack Offset
4129   %}
4130 %}
4131 
4132 operand stackSlotF(sRegF reg)
4133 %{
4134   constraint(ALLOC_IN_RC(stack_slots));
4135   // No match rule because this operand is only generated in matching
4136 
4137   format %{ "[$reg]" %}
4138   interface(MEMORY_INTER) %{
4139     base(0x4);   // RSP
4140     index(0x4);  // No Index
4141     scale(0x0);  // No Scale
4142     disp($reg);  // Stack Offset
4143   %}
4144 %}
4145 
4146 operand stackSlotD(sRegD reg)
4147 %{
4148   constraint(ALLOC_IN_RC(stack_slots));
4149   // No match rule because this operand is only generated in matching
4150 
4151   format %{ "[$reg]" %}
4152   interface(MEMORY_INTER) %{
4153     base(0x4);   // RSP
4154     index(0x4);  // No Index
4155     scale(0x0);  // No Scale
4156     disp($reg);  // Stack Offset
4157   %}
4158 %}
4159 operand stackSlotL(sRegL reg)
4160 %{
4161   constraint(ALLOC_IN_RC(stack_slots));
4162   // No match rule because this operand is only generated in matching
4163 
4164   format %{ "[$reg]" %}
4165   interface(MEMORY_INTER) %{
4166     base(0x4);   // RSP
4167     index(0x4);  // No Index
4168     scale(0x0);  // No Scale
4169     disp($reg);  // Stack Offset
4170   %}
4171 %}
4172 
4173 //----------Conditional Branch Operands----------------------------------------
4174 // Comparison Op  - This is the operation of the comparison, and is limited to
4175 //                  the following set of codes:
4176 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4177 //
4178 // Other attributes of the comparison, such as unsignedness, are specified
4179 // by the comparison instruction that sets a condition code flags register.
4180 // That result is represented by a flags operand whose subtype is appropriate
4181 // to the unsignedness (etc.) of the comparison.
4182 //
4183 // Later, the instruction which matches both the Comparison Op (a Bool) and
4184 // the flags (produced by the Cmp) specifies the coding of the comparison op
4185 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4186 
4187 // Comparision Code
4188 operand cmpOp()
4189 %{
4190   match(Bool);
4191 
4192   format %{ "" %}
4193   interface(COND_INTER) %{
4194     equal(0x4, "e");
4195     not_equal(0x5, "ne");
4196     less(0xC, "l");
4197     greater_equal(0xD, "ge");
4198     less_equal(0xE, "le");
4199     greater(0xF, "g");
4200     overflow(0x0, "o");
4201     no_overflow(0x1, "no");
4202   %}
4203 %}
4204 
4205 // Comparison Code, unsigned compare.  Used by FP also, with
4206 // C2 (unordered) turned into GT or LT already.  The other bits
4207 // C0 and C3 are turned into Carry & Zero flags.
4208 operand cmpOpU()
4209 %{
4210   match(Bool);
4211 
4212   format %{ "" %}
4213   interface(COND_INTER) %{
4214     equal(0x4, "e");
4215     not_equal(0x5, "ne");
4216     less(0x2, "b");
4217     greater_equal(0x3, "nb");
4218     less_equal(0x6, "be");
4219     greater(0x7, "nbe");
4220     overflow(0x0, "o");
4221     no_overflow(0x1, "no");
4222   %}
4223 %}
4224 
4225 
4226 // Floating comparisons that don't require any fixup for the unordered case
4227 operand cmpOpUCF() %{
4228   match(Bool);
4229   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4230             n->as_Bool()->_test._test == BoolTest::ge ||
4231             n->as_Bool()->_test._test == BoolTest::le ||
4232             n->as_Bool()->_test._test == BoolTest::gt);
4233   format %{ "" %}
4234   interface(COND_INTER) %{
4235     equal(0x4, "e");
4236     not_equal(0x5, "ne");
4237     less(0x2, "b");
4238     greater_equal(0x3, "nb");
4239     less_equal(0x6, "be");
4240     greater(0x7, "nbe");
4241     overflow(0x0, "o");
4242     no_overflow(0x1, "no");
4243   %}
4244 %}
4245 
4246 
4247 // Floating comparisons that can be fixed up with extra conditional jumps
4248 operand cmpOpUCF2() %{
4249   match(Bool);
4250   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4251             n->as_Bool()->_test._test == BoolTest::eq);
4252   format %{ "" %}
4253   interface(COND_INTER) %{
4254     equal(0x4, "e");
4255     not_equal(0x5, "ne");
4256     less(0x2, "b");
4257     greater_equal(0x3, "nb");
4258     less_equal(0x6, "be");
4259     greater(0x7, "nbe");
4260     overflow(0x0, "o");
4261     no_overflow(0x1, "no");
4262   %}
4263 %}
4264 
4265 // Operands for bound floating pointer register arguments
4266 operand rxmm0() %{
4267   constraint(ALLOC_IN_RC(xmm0_reg));  match(VecX);
4268   predicate((UseSSE > 0) && (UseAVX<= 2));  format%{%}  interface(REG_INTER);
4269 %}
4270 operand rxmm1() %{
4271   constraint(ALLOC_IN_RC(xmm1_reg));  match(VecX);
4272   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4273 %}
4274 operand rxmm2() %{
4275   constraint(ALLOC_IN_RC(xmm2_reg));  match(VecX);
4276   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4277 %}
4278 operand rxmm3() %{
4279   constraint(ALLOC_IN_RC(xmm3_reg));  match(VecX);
4280   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4281 %}
4282 operand rxmm4() %{
4283   constraint(ALLOC_IN_RC(xmm4_reg));  match(VecX);
4284   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4285 %}
4286 operand rxmm5() %{
4287   constraint(ALLOC_IN_RC(xmm5_reg));  match(VecX);
4288   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4289 %}
4290 operand rxmm6() %{
4291   constraint(ALLOC_IN_RC(xmm6_reg));  match(VecX);
4292   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4293 %}
4294 operand rxmm7() %{
4295   constraint(ALLOC_IN_RC(xmm7_reg));  match(VecX);
4296   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4297 %}
4298 operand rxmm8() %{
4299   constraint(ALLOC_IN_RC(xmm8_reg));  match(VecX);
4300   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4301 %}
4302 operand rxmm9() %{
4303   constraint(ALLOC_IN_RC(xmm9_reg));  match(VecX);
4304   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4305 %}
4306 operand rxmm10() %{
4307   constraint(ALLOC_IN_RC(xmm10_reg));  match(VecX);
4308   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4309 %}
4310 operand rxmm11() %{
4311   constraint(ALLOC_IN_RC(xmm11_reg));  match(VecX);
4312   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4313 %}
4314 operand rxmm12() %{
4315   constraint(ALLOC_IN_RC(xmm12_reg));  match(VecX);
4316   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4317 %}
4318 operand rxmm13() %{
4319   constraint(ALLOC_IN_RC(xmm13_reg));  match(VecX);
4320   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4321 %}
4322 operand rxmm14() %{
4323   constraint(ALLOC_IN_RC(xmm14_reg));  match(VecX);
4324   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4325 %}
4326 operand rxmm15() %{
4327   constraint(ALLOC_IN_RC(xmm15_reg));  match(VecX);
4328   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4329 %}
4330 operand rxmm16() %{
4331   constraint(ALLOC_IN_RC(xmm16_reg));  match(VecX);
4332   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4333 %}
4334 operand rxmm17() %{
4335   constraint(ALLOC_IN_RC(xmm17_reg));  match(VecX);
4336   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4337 %}
4338 operand rxmm18() %{
4339   constraint(ALLOC_IN_RC(xmm18_reg));  match(VecX);
4340   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4341 %}
4342 operand rxmm19() %{
4343   constraint(ALLOC_IN_RC(xmm19_reg));  match(VecX);
4344   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4345 %}
4346 operand rxmm20() %{
4347   constraint(ALLOC_IN_RC(xmm20_reg));  match(VecX);
4348   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4349 %}
4350 operand rxmm21() %{
4351   constraint(ALLOC_IN_RC(xmm21_reg));  match(VecX);
4352   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4353 %}
4354 operand rxmm22() %{
4355   constraint(ALLOC_IN_RC(xmm22_reg));  match(VecX);
4356   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4357 %}
4358 operand rxmm23() %{
4359   constraint(ALLOC_IN_RC(xmm23_reg));  match(VecX);
4360   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4361 %}
4362 operand rxmm24() %{
4363   constraint(ALLOC_IN_RC(xmm24_reg));  match(VecX);
4364   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4365 %}
4366 operand rxmm25() %{
4367   constraint(ALLOC_IN_RC(xmm25_reg));  match(VecX);
4368   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4369 %}
4370 operand rxmm26() %{
4371   constraint(ALLOC_IN_RC(xmm26_reg));  match(VecX);
4372   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4373 %}
4374 operand rxmm27() %{
4375   constraint(ALLOC_IN_RC(xmm27_reg));  match(VecX);
4376   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4377 %}
4378 operand rxmm28() %{
4379   constraint(ALLOC_IN_RC(xmm28_reg));  match(VecX);
4380   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4381 %}
4382 operand rxmm29() %{
4383   constraint(ALLOC_IN_RC(xmm29_reg));  match(VecX);
4384   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4385 %}
4386 operand rxmm30() %{
4387   constraint(ALLOC_IN_RC(xmm30_reg));  match(VecX);
4388   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4389 %}
4390 operand rxmm31() %{
4391   constraint(ALLOC_IN_RC(xmm31_reg));  match(VecX);
4392   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4393 %}
4394 
4395 //----------OPERAND CLASSES----------------------------------------------------
4396 // Operand Classes are groups of operands that are used as to simplify
4397 // instruction definitions by not requiring the AD writer to specify separate
4398 // instructions for every form of operand when the instruction accepts
4399 // multiple operand types with the same basic encoding and format.  The classic
4400 // case of this is memory operands.
4401 
4402 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4403                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
4404                indCompressedOopOffset,
4405                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4406                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4407                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
4408 
4409 //----------PIPELINE-----------------------------------------------------------
4410 // Rules which define the behavior of the target architectures pipeline.
4411 pipeline %{
4412 
4413 //----------ATTRIBUTES---------------------------------------------------------
4414 attributes %{
4415   variable_size_instructions;        // Fixed size instructions
4416   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4417   instruction_unit_size = 1;         // An instruction is 1 bytes long
4418   instruction_fetch_unit_size = 16;  // The processor fetches one line
4419   instruction_fetch_units = 1;       // of 16 bytes
4420 
4421   // List of nop instructions
4422   nops( MachNop );
4423 %}
4424 
4425 //----------RESOURCES----------------------------------------------------------
4426 // Resources are the functional units available to the machine
4427 
4428 // Generic P2/P3 pipeline
4429 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4430 // 3 instructions decoded per cycle.
4431 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4432 // 3 ALU op, only ALU0 handles mul instructions.
4433 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4434            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4435            BR, FPU,
4436            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4437 
4438 //----------PIPELINE DESCRIPTION-----------------------------------------------
4439 // Pipeline Description specifies the stages in the machine's pipeline
4440 
4441 // Generic P2/P3 pipeline
4442 pipe_desc(S0, S1, S2, S3, S4, S5);
4443 
4444 //----------PIPELINE CLASSES---------------------------------------------------
4445 // Pipeline Classes describe the stages in which input and output are
4446 // referenced by the hardware pipeline.
4447 
4448 // Naming convention: ialu or fpu
4449 // Then: _reg
4450 // Then: _reg if there is a 2nd register
4451 // Then: _long if it's a pair of instructions implementing a long
4452 // Then: _fat if it requires the big decoder
4453 //   Or: _mem if it requires the big decoder and a memory unit.
4454 
4455 // Integer ALU reg operation
4456 pipe_class ialu_reg(rRegI dst)
4457 %{
4458     single_instruction;
4459     dst    : S4(write);
4460     dst    : S3(read);
4461     DECODE : S0;        // any decoder
4462     ALU    : S3;        // any alu
4463 %}
4464 
4465 // Long ALU reg operation
4466 pipe_class ialu_reg_long(rRegL dst)
4467 %{
4468     instruction_count(2);
4469     dst    : S4(write);
4470     dst    : S3(read);
4471     DECODE : S0(2);     // any 2 decoders
4472     ALU    : S3(2);     // both alus
4473 %}
4474 
4475 // Integer ALU reg operation using big decoder
4476 pipe_class ialu_reg_fat(rRegI dst)
4477 %{
4478     single_instruction;
4479     dst    : S4(write);
4480     dst    : S3(read);
4481     D0     : S0;        // big decoder only
4482     ALU    : S3;        // any alu
4483 %}
4484 
4485 // Long ALU reg operation using big decoder
4486 pipe_class ialu_reg_long_fat(rRegL dst)
4487 %{
4488     instruction_count(2);
4489     dst    : S4(write);
4490     dst    : S3(read);
4491     D0     : S0(2);     // big decoder only; twice
4492     ALU    : S3(2);     // any 2 alus
4493 %}
4494 
4495 // Integer ALU reg-reg operation
4496 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4497 %{
4498     single_instruction;
4499     dst    : S4(write);
4500     src    : S3(read);
4501     DECODE : S0;        // any decoder
4502     ALU    : S3;        // any alu
4503 %}
4504 
4505 // Long ALU reg-reg operation
4506 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4507 %{
4508     instruction_count(2);
4509     dst    : S4(write);
4510     src    : S3(read);
4511     DECODE : S0(2);     // any 2 decoders
4512     ALU    : S3(2);     // both alus
4513 %}
4514 
4515 // Integer ALU reg-reg operation
4516 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4517 %{
4518     single_instruction;
4519     dst    : S4(write);
4520     src    : S3(read);
4521     D0     : S0;        // big decoder only
4522     ALU    : S3;        // any alu
4523 %}
4524 
4525 // Long ALU reg-reg operation
4526 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4527 %{
4528     instruction_count(2);
4529     dst    : S4(write);
4530     src    : S3(read);
4531     D0     : S0(2);     // big decoder only; twice
4532     ALU    : S3(2);     // both alus
4533 %}
4534 
4535 // Integer ALU reg-mem operation
4536 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4537 %{
4538     single_instruction;
4539     dst    : S5(write);
4540     mem    : S3(read);
4541     D0     : S0;        // big decoder only
4542     ALU    : S4;        // any alu
4543     MEM    : S3;        // any mem
4544 %}
4545 
4546 // Integer mem operation (prefetch)
4547 pipe_class ialu_mem(memory mem)
4548 %{
4549     single_instruction;
4550     mem    : S3(read);
4551     D0     : S0;        // big decoder only
4552     MEM    : S3;        // any mem
4553 %}
4554 
4555 // Integer Store to Memory
4556 pipe_class ialu_mem_reg(memory mem, rRegI src)
4557 %{
4558     single_instruction;
4559     mem    : S3(read);
4560     src    : S5(read);
4561     D0     : S0;        // big decoder only
4562     ALU    : S4;        // any alu
4563     MEM    : S3;
4564 %}
4565 
4566 // // Long Store to Memory
4567 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4568 // %{
4569 //     instruction_count(2);
4570 //     mem    : S3(read);
4571 //     src    : S5(read);
4572 //     D0     : S0(2);          // big decoder only; twice
4573 //     ALU    : S4(2);     // any 2 alus
4574 //     MEM    : S3(2);  // Both mems
4575 // %}
4576 
4577 // Integer Store to Memory
4578 pipe_class ialu_mem_imm(memory mem)
4579 %{
4580     single_instruction;
4581     mem    : S3(read);
4582     D0     : S0;        // big decoder only
4583     ALU    : S4;        // any alu
4584     MEM    : S3;
4585 %}
4586 
4587 // Integer ALU0 reg-reg operation
4588 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4589 %{
4590     single_instruction;
4591     dst    : S4(write);
4592     src    : S3(read);
4593     D0     : S0;        // Big decoder only
4594     ALU0   : S3;        // only alu0
4595 %}
4596 
4597 // Integer ALU0 reg-mem operation
4598 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4599 %{
4600     single_instruction;
4601     dst    : S5(write);
4602     mem    : S3(read);
4603     D0     : S0;        // big decoder only
4604     ALU0   : S4;        // ALU0 only
4605     MEM    : S3;        // any mem
4606 %}
4607 
4608 // Integer ALU reg-reg operation
4609 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4610 %{
4611     single_instruction;
4612     cr     : S4(write);
4613     src1   : S3(read);
4614     src2   : S3(read);
4615     DECODE : S0;        // any decoder
4616     ALU    : S3;        // any alu
4617 %}
4618 
4619 // Integer ALU reg-imm operation
4620 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4621 %{
4622     single_instruction;
4623     cr     : S4(write);
4624     src1   : S3(read);
4625     DECODE : S0;        // any decoder
4626     ALU    : S3;        // any alu
4627 %}
4628 
4629 // Integer ALU reg-mem operation
4630 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4631 %{
4632     single_instruction;
4633     cr     : S4(write);
4634     src1   : S3(read);
4635     src2   : S3(read);
4636     D0     : S0;        // big decoder only
4637     ALU    : S4;        // any alu
4638     MEM    : S3;
4639 %}
4640 
4641 // Conditional move reg-reg
4642 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4643 %{
4644     instruction_count(4);
4645     y      : S4(read);
4646     q      : S3(read);
4647     p      : S3(read);
4648     DECODE : S0(4);     // any decoder
4649 %}
4650 
4651 // Conditional move reg-reg
4652 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4653 %{
4654     single_instruction;
4655     dst    : S4(write);
4656     src    : S3(read);
4657     cr     : S3(read);
4658     DECODE : S0;        // any decoder
4659 %}
4660 
4661 // Conditional move reg-mem
4662 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4663 %{
4664     single_instruction;
4665     dst    : S4(write);
4666     src    : S3(read);
4667     cr     : S3(read);
4668     DECODE : S0;        // any decoder
4669     MEM    : S3;
4670 %}
4671 
4672 // Conditional move reg-reg long
4673 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4674 %{
4675     single_instruction;
4676     dst    : S4(write);
4677     src    : S3(read);
4678     cr     : S3(read);
4679     DECODE : S0(2);     // any 2 decoders
4680 %}
4681 
4682 // XXX
4683 // // Conditional move double reg-reg
4684 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4685 // %{
4686 //     single_instruction;
4687 //     dst    : S4(write);
4688 //     src    : S3(read);
4689 //     cr     : S3(read);
4690 //     DECODE : S0;     // any decoder
4691 // %}
4692 
4693 // Float reg-reg operation
4694 pipe_class fpu_reg(regD dst)
4695 %{
4696     instruction_count(2);
4697     dst    : S3(read);
4698     DECODE : S0(2);     // any 2 decoders
4699     FPU    : S3;
4700 %}
4701 
4702 // Float reg-reg operation
4703 pipe_class fpu_reg_reg(regD dst, regD src)
4704 %{
4705     instruction_count(2);
4706     dst    : S4(write);
4707     src    : S3(read);
4708     DECODE : S0(2);     // any 2 decoders
4709     FPU    : S3;
4710 %}
4711 
4712 // Float reg-reg operation
4713 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4714 %{
4715     instruction_count(3);
4716     dst    : S4(write);
4717     src1   : S3(read);
4718     src2   : S3(read);
4719     DECODE : S0(3);     // any 3 decoders
4720     FPU    : S3(2);
4721 %}
4722 
4723 // Float reg-reg operation
4724 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4725 %{
4726     instruction_count(4);
4727     dst    : S4(write);
4728     src1   : S3(read);
4729     src2   : S3(read);
4730     src3   : S3(read);
4731     DECODE : S0(4);     // any 3 decoders
4732     FPU    : S3(2);
4733 %}
4734 
4735 // Float reg-reg operation
4736 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4737 %{
4738     instruction_count(4);
4739     dst    : S4(write);
4740     src1   : S3(read);
4741     src2   : S3(read);
4742     src3   : S3(read);
4743     DECODE : S1(3);     // any 3 decoders
4744     D0     : S0;        // Big decoder only
4745     FPU    : S3(2);
4746     MEM    : S3;
4747 %}
4748 
4749 // Float reg-mem operation
4750 pipe_class fpu_reg_mem(regD dst, memory mem)
4751 %{
4752     instruction_count(2);
4753     dst    : S5(write);
4754     mem    : S3(read);
4755     D0     : S0;        // big decoder only
4756     DECODE : S1;        // any decoder for FPU POP
4757     FPU    : S4;
4758     MEM    : S3;        // any mem
4759 %}
4760 
4761 // Float reg-mem operation
4762 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4763 %{
4764     instruction_count(3);
4765     dst    : S5(write);
4766     src1   : S3(read);
4767     mem    : S3(read);
4768     D0     : S0;        // big decoder only
4769     DECODE : S1(2);     // any decoder for FPU POP
4770     FPU    : S4;
4771     MEM    : S3;        // any mem
4772 %}
4773 
4774 // Float mem-reg operation
4775 pipe_class fpu_mem_reg(memory mem, regD src)
4776 %{
4777     instruction_count(2);
4778     src    : S5(read);
4779     mem    : S3(read);
4780     DECODE : S0;        // any decoder for FPU PUSH
4781     D0     : S1;        // big decoder only
4782     FPU    : S4;
4783     MEM    : S3;        // any mem
4784 %}
4785 
4786 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4787 %{
4788     instruction_count(3);
4789     src1   : S3(read);
4790     src2   : S3(read);
4791     mem    : S3(read);
4792     DECODE : S0(2);     // any decoder for FPU PUSH
4793     D0     : S1;        // big decoder only
4794     FPU    : S4;
4795     MEM    : S3;        // any mem
4796 %}
4797 
4798 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4799 %{
4800     instruction_count(3);
4801     src1   : S3(read);
4802     src2   : S3(read);
4803     mem    : S4(read);
4804     DECODE : S0;        // any decoder for FPU PUSH
4805     D0     : S0(2);     // big decoder only
4806     FPU    : S4;
4807     MEM    : S3(2);     // any mem
4808 %}
4809 
4810 pipe_class fpu_mem_mem(memory dst, memory src1)
4811 %{
4812     instruction_count(2);
4813     src1   : S3(read);
4814     dst    : S4(read);
4815     D0     : S0(2);     // big decoder only
4816     MEM    : S3(2);     // any mem
4817 %}
4818 
4819 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4820 %{
4821     instruction_count(3);
4822     src1   : S3(read);
4823     src2   : S3(read);
4824     dst    : S4(read);
4825     D0     : S0(3);     // big decoder only
4826     FPU    : S4;
4827     MEM    : S3(3);     // any mem
4828 %}
4829 
4830 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4831 %{
4832     instruction_count(3);
4833     src1   : S4(read);
4834     mem    : S4(read);
4835     DECODE : S0;        // any decoder for FPU PUSH
4836     D0     : S0(2);     // big decoder only
4837     FPU    : S4;
4838     MEM    : S3(2);     // any mem
4839 %}
4840 
4841 // Float load constant
4842 pipe_class fpu_reg_con(regD dst)
4843 %{
4844     instruction_count(2);
4845     dst    : S5(write);
4846     D0     : S0;        // big decoder only for the load
4847     DECODE : S1;        // any decoder for FPU POP
4848     FPU    : S4;
4849     MEM    : S3;        // any mem
4850 %}
4851 
4852 // Float load constant
4853 pipe_class fpu_reg_reg_con(regD dst, regD src)
4854 %{
4855     instruction_count(3);
4856     dst    : S5(write);
4857     src    : S3(read);
4858     D0     : S0;        // big decoder only for the load
4859     DECODE : S1(2);     // any decoder for FPU POP
4860     FPU    : S4;
4861     MEM    : S3;        // any mem
4862 %}
4863 
4864 // UnConditional branch
4865 pipe_class pipe_jmp(label labl)
4866 %{
4867     single_instruction;
4868     BR   : S3;
4869 %}
4870 
4871 // Conditional branch
4872 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4873 %{
4874     single_instruction;
4875     cr    : S1(read);
4876     BR    : S3;
4877 %}
4878 
4879 // Allocation idiom
4880 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4881 %{
4882     instruction_count(1); force_serialization;
4883     fixed_latency(6);
4884     heap_ptr : S3(read);
4885     DECODE   : S0(3);
4886     D0       : S2;
4887     MEM      : S3;
4888     ALU      : S3(2);
4889     dst      : S5(write);
4890     BR       : S5;
4891 %}
4892 
4893 // Generic big/slow expanded idiom
4894 pipe_class pipe_slow()
4895 %{
4896     instruction_count(10); multiple_bundles; force_serialization;
4897     fixed_latency(100);
4898     D0  : S0(2);
4899     MEM : S3(2);
4900 %}
4901 
4902 // The real do-nothing guy
4903 pipe_class empty()
4904 %{
4905     instruction_count(0);
4906 %}
4907 
4908 // Define the class for the Nop node
4909 define
4910 %{
4911    MachNop = empty;
4912 %}
4913 
4914 %}
4915 
4916 //----------INSTRUCTIONS-------------------------------------------------------
4917 //
4918 // match      -- States which machine-independent subtree may be replaced
4919 //               by this instruction.
4920 // ins_cost   -- The estimated cost of this instruction is used by instruction
4921 //               selection to identify a minimum cost tree of machine
4922 //               instructions that matches a tree of machine-independent
4923 //               instructions.
4924 // format     -- A string providing the disassembly for this instruction.
4925 //               The value of an instruction's operand may be inserted
4926 //               by referring to it with a '$' prefix.
4927 // opcode     -- Three instruction opcodes may be provided.  These are referred
4928 //               to within an encode class as $primary, $secondary, and $tertiary
4929 //               rrspectively.  The primary opcode is commonly used to
4930 //               indicate the type of machine instruction, while secondary
4931 //               and tertiary are often used for prefix options or addressing
4932 //               modes.
4933 // ins_encode -- A list of encode classes with parameters. The encode class
4934 //               name must have been defined in an 'enc_class' specification
4935 //               in the encode section of the architecture description.
4936 
4937 
4938 //----------Load/Store/Move Instructions---------------------------------------
4939 //----------Load Instructions--------------------------------------------------
4940 
4941 // Load Byte (8 bit signed)
4942 instruct loadB(rRegI dst, memory mem)
4943 %{
4944   match(Set dst (LoadB mem));
4945 
4946   ins_cost(125);
4947   format %{ "movsbl  $dst, $mem\t# byte" %}
4948 
4949   ins_encode %{
4950     __ movsbl($dst$$Register, $mem$$Address);
4951   %}
4952 
4953   ins_pipe(ialu_reg_mem);
4954 %}
4955 
4956 // Load Byte (8 bit signed) into Long Register
4957 instruct loadB2L(rRegL dst, memory mem)
4958 %{
4959   match(Set dst (ConvI2L (LoadB mem)));
4960 
4961   ins_cost(125);
4962   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
4963 
4964   ins_encode %{
4965     __ movsbq($dst$$Register, $mem$$Address);
4966   %}
4967 
4968   ins_pipe(ialu_reg_mem);
4969 %}
4970 
4971 // Load Unsigned Byte (8 bit UNsigned)
4972 instruct loadUB(rRegI dst, memory mem)
4973 %{
4974   match(Set dst (LoadUB mem));
4975 
4976   ins_cost(125);
4977   format %{ "movzbl  $dst, $mem\t# ubyte" %}
4978 
4979   ins_encode %{
4980     __ movzbl($dst$$Register, $mem$$Address);
4981   %}
4982 
4983   ins_pipe(ialu_reg_mem);
4984 %}
4985 
4986 // Load Unsigned Byte (8 bit UNsigned) into Long Register
4987 instruct loadUB2L(rRegL dst, memory mem)
4988 %{
4989   match(Set dst (ConvI2L (LoadUB mem)));
4990 
4991   ins_cost(125);
4992   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
4993 
4994   ins_encode %{
4995     __ movzbq($dst$$Register, $mem$$Address);
4996   %}
4997 
4998   ins_pipe(ialu_reg_mem);
4999 %}
5000 
5001 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
5002 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5003   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5004   effect(KILL cr);
5005 
5006   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
5007             "andl    $dst, right_n_bits($mask, 8)" %}
5008   ins_encode %{
5009     Register Rdst = $dst$$Register;
5010     __ movzbq(Rdst, $mem$$Address);
5011     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5012   %}
5013   ins_pipe(ialu_reg_mem);
5014 %}
5015 
5016 // Load Short (16 bit signed)
5017 instruct loadS(rRegI dst, memory mem)
5018 %{
5019   match(Set dst (LoadS mem));
5020 
5021   ins_cost(125);
5022   format %{ "movswl $dst, $mem\t# short" %}
5023 
5024   ins_encode %{
5025     __ movswl($dst$$Register, $mem$$Address);
5026   %}
5027 
5028   ins_pipe(ialu_reg_mem);
5029 %}
5030 
5031 // Load Short (16 bit signed) to Byte (8 bit signed)
5032 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5033   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5034 
5035   ins_cost(125);
5036   format %{ "movsbl $dst, $mem\t# short -> byte" %}
5037   ins_encode %{
5038     __ movsbl($dst$$Register, $mem$$Address);
5039   %}
5040   ins_pipe(ialu_reg_mem);
5041 %}
5042 
5043 // Load Short (16 bit signed) into Long Register
5044 instruct loadS2L(rRegL dst, memory mem)
5045 %{
5046   match(Set dst (ConvI2L (LoadS mem)));
5047 
5048   ins_cost(125);
5049   format %{ "movswq $dst, $mem\t# short -> long" %}
5050 
5051   ins_encode %{
5052     __ movswq($dst$$Register, $mem$$Address);
5053   %}
5054 
5055   ins_pipe(ialu_reg_mem);
5056 %}
5057 
5058 // Load Unsigned Short/Char (16 bit UNsigned)
5059 instruct loadUS(rRegI dst, memory mem)
5060 %{
5061   match(Set dst (LoadUS mem));
5062 
5063   ins_cost(125);
5064   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5065 
5066   ins_encode %{
5067     __ movzwl($dst$$Register, $mem$$Address);
5068   %}
5069 
5070   ins_pipe(ialu_reg_mem);
5071 %}
5072 
5073 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5074 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5075   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5076 
5077   ins_cost(125);
5078   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5079   ins_encode %{
5080     __ movsbl($dst$$Register, $mem$$Address);
5081   %}
5082   ins_pipe(ialu_reg_mem);
5083 %}
5084 
5085 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5086 instruct loadUS2L(rRegL dst, memory mem)
5087 %{
5088   match(Set dst (ConvI2L (LoadUS mem)));
5089 
5090   ins_cost(125);
5091   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5092 
5093   ins_encode %{
5094     __ movzwq($dst$$Register, $mem$$Address);
5095   %}
5096 
5097   ins_pipe(ialu_reg_mem);
5098 %}
5099 
5100 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5101 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5102   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5103 
5104   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5105   ins_encode %{
5106     __ movzbq($dst$$Register, $mem$$Address);
5107   %}
5108   ins_pipe(ialu_reg_mem);
5109 %}
5110 
5111 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
5112 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5113   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5114   effect(KILL cr);
5115 
5116   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5117             "andl    $dst, right_n_bits($mask, 16)" %}
5118   ins_encode %{
5119     Register Rdst = $dst$$Register;
5120     __ movzwq(Rdst, $mem$$Address);
5121     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5122   %}
5123   ins_pipe(ialu_reg_mem);
5124 %}
5125 
5126 // Load Integer
5127 instruct loadI(rRegI dst, memory mem)
5128 %{
5129   match(Set dst (LoadI mem));
5130 
5131   ins_cost(125);
5132   format %{ "movl    $dst, $mem\t# int" %}
5133 
5134   ins_encode %{
5135     __ movl($dst$$Register, $mem$$Address);
5136   %}
5137 
5138   ins_pipe(ialu_reg_mem);
5139 %}
5140 
5141 // Load Integer (32 bit signed) to Byte (8 bit signed)
5142 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5143   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5144 
5145   ins_cost(125);
5146   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5147   ins_encode %{
5148     __ movsbl($dst$$Register, $mem$$Address);
5149   %}
5150   ins_pipe(ialu_reg_mem);
5151 %}
5152 
5153 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5154 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5155   match(Set dst (AndI (LoadI mem) mask));
5156 
5157   ins_cost(125);
5158   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5159   ins_encode %{
5160     __ movzbl($dst$$Register, $mem$$Address);
5161   %}
5162   ins_pipe(ialu_reg_mem);
5163 %}
5164 
5165 // Load Integer (32 bit signed) to Short (16 bit signed)
5166 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5167   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5168 
5169   ins_cost(125);
5170   format %{ "movswl  $dst, $mem\t# int -> short" %}
5171   ins_encode %{
5172     __ movswl($dst$$Register, $mem$$Address);
5173   %}
5174   ins_pipe(ialu_reg_mem);
5175 %}
5176 
5177 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5178 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5179   match(Set dst (AndI (LoadI mem) mask));
5180 
5181   ins_cost(125);
5182   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5183   ins_encode %{
5184     __ movzwl($dst$$Register, $mem$$Address);
5185   %}
5186   ins_pipe(ialu_reg_mem);
5187 %}
5188 
5189 // Load Integer into Long Register
5190 instruct loadI2L(rRegL dst, memory mem)
5191 %{
5192   match(Set dst (ConvI2L (LoadI mem)));
5193 
5194   ins_cost(125);
5195   format %{ "movslq  $dst, $mem\t# int -> long" %}
5196 
5197   ins_encode %{
5198     __ movslq($dst$$Register, $mem$$Address);
5199   %}
5200 
5201   ins_pipe(ialu_reg_mem);
5202 %}
5203 
5204 // Load Integer with mask 0xFF into Long Register
5205 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5206   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5207 
5208   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5209   ins_encode %{
5210     __ movzbq($dst$$Register, $mem$$Address);
5211   %}
5212   ins_pipe(ialu_reg_mem);
5213 %}
5214 
5215 // Load Integer with mask 0xFFFF into Long Register
5216 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5217   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5218 
5219   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5220   ins_encode %{
5221     __ movzwq($dst$$Register, $mem$$Address);
5222   %}
5223   ins_pipe(ialu_reg_mem);
5224 %}
5225 
5226 // Load Integer with a 31-bit mask into Long Register
5227 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
5228   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5229   effect(KILL cr);
5230 
5231   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
5232             "andl    $dst, $mask" %}
5233   ins_encode %{
5234     Register Rdst = $dst$$Register;
5235     __ movl(Rdst, $mem$$Address);
5236     __ andl(Rdst, $mask$$constant);
5237   %}
5238   ins_pipe(ialu_reg_mem);
5239 %}
5240 
5241 // Load Unsigned Integer into Long Register
5242 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
5243 %{
5244   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5245 
5246   ins_cost(125);
5247   format %{ "movl    $dst, $mem\t# uint -> long" %}
5248 
5249   ins_encode %{
5250     __ movl($dst$$Register, $mem$$Address);
5251   %}
5252 
5253   ins_pipe(ialu_reg_mem);
5254 %}
5255 
5256 // Load Long
5257 instruct loadL(rRegL dst, memory mem)
5258 %{
5259   match(Set dst (LoadL mem));
5260 
5261   ins_cost(125);
5262   format %{ "movq    $dst, $mem\t# long" %}
5263 
5264   ins_encode %{
5265     __ movq($dst$$Register, $mem$$Address);
5266   %}
5267 
5268   ins_pipe(ialu_reg_mem); // XXX
5269 %}
5270 
5271 // Load Range
5272 instruct loadRange(rRegI dst, memory mem)
5273 %{
5274   match(Set dst (LoadRange mem));
5275 
5276   ins_cost(125); // XXX
5277   format %{ "movl    $dst, $mem\t# range" %}
5278   opcode(0x8B);
5279   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5280   ins_pipe(ialu_reg_mem);
5281 %}
5282 
5283 // Load Pointer
5284 instruct loadP(rRegP dst, memory mem)
5285 %{
5286   match(Set dst (LoadP mem));
5287 
5288   ins_cost(125); // XXX
5289   format %{ "movq    $dst, $mem\t# ptr" %}
5290   opcode(0x8B);
5291   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5292   ins_pipe(ialu_reg_mem); // XXX
5293 %}
5294 
5295 // Load Compressed Pointer
5296 instruct loadN(rRegN dst, memory mem)
5297 %{
5298    match(Set dst (LoadN mem));
5299 
5300    ins_cost(125); // XXX
5301    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5302    ins_encode %{
5303      __ movl($dst$$Register, $mem$$Address);
5304    %}
5305    ins_pipe(ialu_reg_mem); // XXX
5306 %}
5307 
5308 
5309 // Load Klass Pointer
5310 instruct loadKlass(rRegP dst, memory mem)
5311 %{
5312   match(Set dst (LoadKlass mem));
5313 
5314   ins_cost(125); // XXX
5315   format %{ "movq    $dst, $mem\t# class" %}
5316   opcode(0x8B);
5317   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5318   ins_pipe(ialu_reg_mem); // XXX
5319 %}
5320 
5321 // Load narrow Klass Pointer
5322 instruct loadNKlass(rRegN dst, memory mem)
5323 %{
5324   match(Set dst (LoadNKlass mem));
5325 
5326   ins_cost(125); // XXX
5327   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5328   ins_encode %{
5329     __ movl($dst$$Register, $mem$$Address);
5330   %}
5331   ins_pipe(ialu_reg_mem); // XXX
5332 %}
5333 
5334 // Load Float
5335 instruct loadF(regF dst, memory mem)
5336 %{
5337   match(Set dst (LoadF mem));
5338 
5339   ins_cost(145); // XXX
5340   format %{ "movss   $dst, $mem\t# float" %}
5341   ins_encode %{
5342     __ movflt($dst$$XMMRegister, $mem$$Address);
5343   %}
5344   ins_pipe(pipe_slow); // XXX
5345 %}
5346 
5347 // Load Float
5348 instruct MoveF2VL(vlRegF dst, regF src) %{
5349   match(Set dst src);
5350   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
5351   ins_encode %{
5352     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5353   %}
5354   ins_pipe( fpu_reg_reg );
5355 %}
5356 
5357 // Load Float
5358 instruct MoveVL2F(regF dst, vlRegF src) %{
5359   match(Set dst src);
5360   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
5361   ins_encode %{
5362     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5363   %}
5364   ins_pipe( fpu_reg_reg );
5365 %}
5366 
5367 // Load Double
5368 instruct loadD_partial(regD dst, memory mem)
5369 %{
5370   predicate(!UseXmmLoadAndClearUpper);
5371   match(Set dst (LoadD mem));
5372 
5373   ins_cost(145); // XXX
5374   format %{ "movlpd  $dst, $mem\t# double" %}
5375   ins_encode %{
5376     __ movdbl($dst$$XMMRegister, $mem$$Address);
5377   %}
5378   ins_pipe(pipe_slow); // XXX
5379 %}
5380 
5381 instruct loadD(regD dst, memory mem)
5382 %{
5383   predicate(UseXmmLoadAndClearUpper);
5384   match(Set dst (LoadD mem));
5385 
5386   ins_cost(145); // XXX
5387   format %{ "movsd   $dst, $mem\t# double" %}
5388   ins_encode %{
5389     __ movdbl($dst$$XMMRegister, $mem$$Address);
5390   %}
5391   ins_pipe(pipe_slow); // XXX
5392 %}
5393 
5394 // Load Double
5395 instruct MoveD2VL(vlRegD dst, regD src) %{
5396   match(Set dst src);
5397   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
5398   ins_encode %{
5399     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5400   %}
5401   ins_pipe( fpu_reg_reg );
5402 %}
5403 
5404 // Load Double
5405 instruct MoveVL2D(regD dst, vlRegD src) %{
5406   match(Set dst src);
5407   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
5408   ins_encode %{
5409     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5410   %}
5411   ins_pipe( fpu_reg_reg );
5412 %}
5413 
5414 // Load Effective Address
5415 instruct leaP8(rRegP dst, indOffset8 mem)
5416 %{
5417   match(Set dst mem);
5418 
5419   ins_cost(110); // XXX
5420   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5421   opcode(0x8D);
5422   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5423   ins_pipe(ialu_reg_reg_fat);
5424 %}
5425 
5426 instruct leaP32(rRegP dst, indOffset32 mem)
5427 %{
5428   match(Set dst mem);
5429 
5430   ins_cost(110);
5431   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5432   opcode(0x8D);
5433   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5434   ins_pipe(ialu_reg_reg_fat);
5435 %}
5436 
5437 // instruct leaPIdx(rRegP dst, indIndex mem)
5438 // %{
5439 //   match(Set dst mem);
5440 
5441 //   ins_cost(110);
5442 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5443 //   opcode(0x8D);
5444 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5445 //   ins_pipe(ialu_reg_reg_fat);
5446 // %}
5447 
5448 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5449 %{
5450   match(Set dst mem);
5451 
5452   ins_cost(110);
5453   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5454   opcode(0x8D);
5455   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5456   ins_pipe(ialu_reg_reg_fat);
5457 %}
5458 
5459 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5460 %{
5461   match(Set dst mem);
5462 
5463   ins_cost(110);
5464   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5465   opcode(0x8D);
5466   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5467   ins_pipe(ialu_reg_reg_fat);
5468 %}
5469 
5470 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
5471 %{
5472   match(Set dst mem);
5473 
5474   ins_cost(110);
5475   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5476   opcode(0x8D);
5477   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5478   ins_pipe(ialu_reg_reg_fat);
5479 %}
5480 
5481 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5482 %{
5483   match(Set dst mem);
5484 
5485   ins_cost(110);
5486   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5487   opcode(0x8D);
5488   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5489   ins_pipe(ialu_reg_reg_fat);
5490 %}
5491 
5492 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
5493 %{
5494   match(Set dst mem);
5495 
5496   ins_cost(110);
5497   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
5498   opcode(0x8D);
5499   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5500   ins_pipe(ialu_reg_reg_fat);
5501 %}
5502 
5503 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5504 %{
5505   match(Set dst mem);
5506 
5507   ins_cost(110);
5508   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5509   opcode(0x8D);
5510   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5511   ins_pipe(ialu_reg_reg_fat);
5512 %}
5513 
5514 // Load Effective Address which uses Narrow (32-bits) oop
5515 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5516 %{
5517   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5518   match(Set dst mem);
5519 
5520   ins_cost(110);
5521   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5522   opcode(0x8D);
5523   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5524   ins_pipe(ialu_reg_reg_fat);
5525 %}
5526 
5527 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5528 %{
5529   predicate(Universe::narrow_oop_shift() == 0);
5530   match(Set dst mem);
5531 
5532   ins_cost(110); // XXX
5533   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5534   opcode(0x8D);
5535   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5536   ins_pipe(ialu_reg_reg_fat);
5537 %}
5538 
5539 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5540 %{
5541   predicate(Universe::narrow_oop_shift() == 0);
5542   match(Set dst mem);
5543 
5544   ins_cost(110);
5545   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5546   opcode(0x8D);
5547   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5548   ins_pipe(ialu_reg_reg_fat);
5549 %}
5550 
5551 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5552 %{
5553   predicate(Universe::narrow_oop_shift() == 0);
5554   match(Set dst mem);
5555 
5556   ins_cost(110);
5557   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5558   opcode(0x8D);
5559   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5560   ins_pipe(ialu_reg_reg_fat);
5561 %}
5562 
5563 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5564 %{
5565   predicate(Universe::narrow_oop_shift() == 0);
5566   match(Set dst mem);
5567 
5568   ins_cost(110);
5569   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5570   opcode(0x8D);
5571   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5572   ins_pipe(ialu_reg_reg_fat);
5573 %}
5574 
5575 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5576 %{
5577   predicate(Universe::narrow_oop_shift() == 0);
5578   match(Set dst mem);
5579 
5580   ins_cost(110);
5581   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5582   opcode(0x8D);
5583   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5584   ins_pipe(ialu_reg_reg_fat);
5585 %}
5586 
5587 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
5588 %{
5589   predicate(Universe::narrow_oop_shift() == 0);
5590   match(Set dst mem);
5591 
5592   ins_cost(110);
5593   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
5594   opcode(0x8D);
5595   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5596   ins_pipe(ialu_reg_reg_fat);
5597 %}
5598 
5599 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5600 %{
5601   predicate(Universe::narrow_oop_shift() == 0);
5602   match(Set dst mem);
5603 
5604   ins_cost(110);
5605   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5606   opcode(0x8D);
5607   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5608   ins_pipe(ialu_reg_reg_fat);
5609 %}
5610 
5611 instruct loadConI(rRegI dst, immI src)
5612 %{
5613   match(Set dst src);
5614 
5615   format %{ "movl    $dst, $src\t# int" %}
5616   ins_encode(load_immI(dst, src));
5617   ins_pipe(ialu_reg_fat); // XXX
5618 %}
5619 
5620 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5621 %{
5622   match(Set dst src);
5623   effect(KILL cr);
5624 
5625   ins_cost(50);
5626   format %{ "xorl    $dst, $dst\t# int" %}
5627   opcode(0x33); /* + rd */
5628   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5629   ins_pipe(ialu_reg);
5630 %}
5631 
5632 instruct loadConL(rRegL dst, immL src)
5633 %{
5634   match(Set dst src);
5635 
5636   ins_cost(150);
5637   format %{ "movq    $dst, $src\t# long" %}
5638   ins_encode(load_immL(dst, src));
5639   ins_pipe(ialu_reg);
5640 %}
5641 
5642 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5643 %{
5644   match(Set dst src);
5645   effect(KILL cr);
5646 
5647   ins_cost(50);
5648   format %{ "xorl    $dst, $dst\t# long" %}
5649   opcode(0x33); /* + rd */
5650   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5651   ins_pipe(ialu_reg); // XXX
5652 %}
5653 
5654 instruct loadConUL32(rRegL dst, immUL32 src)
5655 %{
5656   match(Set dst src);
5657 
5658   ins_cost(60);
5659   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5660   ins_encode(load_immUL32(dst, src));
5661   ins_pipe(ialu_reg);
5662 %}
5663 
5664 instruct loadConL32(rRegL dst, immL32 src)
5665 %{
5666   match(Set dst src);
5667 
5668   ins_cost(70);
5669   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5670   ins_encode(load_immL32(dst, src));
5671   ins_pipe(ialu_reg);
5672 %}
5673 
5674 instruct loadConP(rRegP dst, immP con) %{
5675   match(Set dst con);
5676 
5677   format %{ "movq    $dst, $con\t# ptr" %}
5678   ins_encode(load_immP(dst, con));
5679   ins_pipe(ialu_reg_fat); // XXX
5680 %}
5681 
5682 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5683 %{
5684   match(Set dst src);
5685   effect(KILL cr);
5686 
5687   ins_cost(50);
5688   format %{ "xorl    $dst, $dst\t# ptr" %}
5689   opcode(0x33); /* + rd */
5690   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5691   ins_pipe(ialu_reg);
5692 %}
5693 
5694 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5695 %{
5696   match(Set dst src);
5697   effect(KILL cr);
5698 
5699   ins_cost(60);
5700   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5701   ins_encode(load_immP31(dst, src));
5702   ins_pipe(ialu_reg);
5703 %}
5704 
5705 instruct loadConF(regF dst, immF con) %{
5706   match(Set dst con);
5707   ins_cost(125);
5708   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5709   ins_encode %{
5710     __ movflt($dst$$XMMRegister, $constantaddress($con));
5711   %}
5712   ins_pipe(pipe_slow);
5713 %}
5714 
5715 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5716   match(Set dst src);
5717   effect(KILL cr);
5718   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5719   ins_encode %{
5720     __ xorq($dst$$Register, $dst$$Register);
5721   %}
5722   ins_pipe(ialu_reg);
5723 %}
5724 
5725 instruct loadConN(rRegN dst, immN src) %{
5726   match(Set dst src);
5727 
5728   ins_cost(125);
5729   format %{ "movl    $dst, $src\t# compressed ptr" %}
5730   ins_encode %{
5731     address con = (address)$src$$constant;
5732     if (con == NULL) {
5733       ShouldNotReachHere();
5734     } else {
5735       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5736     }
5737   %}
5738   ins_pipe(ialu_reg_fat); // XXX
5739 %}
5740 
5741 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5742   match(Set dst src);
5743 
5744   ins_cost(125);
5745   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
5746   ins_encode %{
5747     address con = (address)$src$$constant;
5748     if (con == NULL) {
5749       ShouldNotReachHere();
5750     } else {
5751       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5752     }
5753   %}
5754   ins_pipe(ialu_reg_fat); // XXX
5755 %}
5756 
5757 instruct loadConF0(regF dst, immF0 src)
5758 %{
5759   match(Set dst src);
5760   ins_cost(100);
5761 
5762   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5763   ins_encode %{
5764     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5765   %}
5766   ins_pipe(pipe_slow);
5767 %}
5768 
5769 // Use the same format since predicate() can not be used here.
5770 instruct loadConD(regD dst, immD con) %{
5771   match(Set dst con);
5772   ins_cost(125);
5773   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5774   ins_encode %{
5775     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5776   %}
5777   ins_pipe(pipe_slow);
5778 %}
5779 
5780 instruct loadConD0(regD dst, immD0 src)
5781 %{
5782   match(Set dst src);
5783   ins_cost(100);
5784 
5785   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5786   ins_encode %{
5787     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5788   %}
5789   ins_pipe(pipe_slow);
5790 %}
5791 
5792 instruct loadSSI(rRegI dst, stackSlotI src)
5793 %{
5794   match(Set dst src);
5795 
5796   ins_cost(125);
5797   format %{ "movl    $dst, $src\t# int stk" %}
5798   opcode(0x8B);
5799   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5800   ins_pipe(ialu_reg_mem);
5801 %}
5802 
5803 instruct loadSSL(rRegL dst, stackSlotL src)
5804 %{
5805   match(Set dst src);
5806 
5807   ins_cost(125);
5808   format %{ "movq    $dst, $src\t# long stk" %}
5809   opcode(0x8B);
5810   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5811   ins_pipe(ialu_reg_mem);
5812 %}
5813 
5814 instruct loadSSP(rRegP dst, stackSlotP src)
5815 %{
5816   match(Set dst src);
5817 
5818   ins_cost(125);
5819   format %{ "movq    $dst, $src\t# ptr stk" %}
5820   opcode(0x8B);
5821   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5822   ins_pipe(ialu_reg_mem);
5823 %}
5824 
5825 instruct loadSSF(regF dst, stackSlotF src)
5826 %{
5827   match(Set dst src);
5828 
5829   ins_cost(125);
5830   format %{ "movss   $dst, $src\t# float stk" %}
5831   ins_encode %{
5832     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5833   %}
5834   ins_pipe(pipe_slow); // XXX
5835 %}
5836 
5837 // Use the same format since predicate() can not be used here.
5838 instruct loadSSD(regD dst, stackSlotD src)
5839 %{
5840   match(Set dst src);
5841 
5842   ins_cost(125);
5843   format %{ "movsd   $dst, $src\t# double stk" %}
5844   ins_encode  %{
5845     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5846   %}
5847   ins_pipe(pipe_slow); // XXX
5848 %}
5849 
5850 // Prefetch instructions for allocation.
5851 // Must be safe to execute with invalid address (cannot fault).
5852 
5853 instruct prefetchAlloc( memory mem ) %{
5854   predicate(AllocatePrefetchInstr==3);
5855   match(PrefetchAllocation mem);
5856   ins_cost(125);
5857 
5858   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5859   ins_encode %{
5860     __ prefetchw($mem$$Address);
5861   %}
5862   ins_pipe(ialu_mem);
5863 %}
5864 
5865 instruct prefetchAllocNTA( memory mem ) %{
5866   predicate(AllocatePrefetchInstr==0);
5867   match(PrefetchAllocation mem);
5868   ins_cost(125);
5869 
5870   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5871   ins_encode %{
5872     __ prefetchnta($mem$$Address);
5873   %}
5874   ins_pipe(ialu_mem);
5875 %}
5876 
5877 instruct prefetchAllocT0( memory mem ) %{
5878   predicate(AllocatePrefetchInstr==1);
5879   match(PrefetchAllocation mem);
5880   ins_cost(125);
5881 
5882   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
5883   ins_encode %{
5884     __ prefetcht0($mem$$Address);
5885   %}
5886   ins_pipe(ialu_mem);
5887 %}
5888 
5889 instruct prefetchAllocT2( memory mem ) %{
5890   predicate(AllocatePrefetchInstr==2);
5891   match(PrefetchAllocation mem);
5892   ins_cost(125);
5893 
5894   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
5895   ins_encode %{
5896     __ prefetcht2($mem$$Address);
5897   %}
5898   ins_pipe(ialu_mem);
5899 %}
5900 
5901 //----------Store Instructions-------------------------------------------------
5902 
5903 // Store Byte
5904 instruct storeB(memory mem, rRegI src)
5905 %{
5906   match(Set mem (StoreB mem src));
5907 
5908   ins_cost(125); // XXX
5909   format %{ "movb    $mem, $src\t# byte" %}
5910   opcode(0x88);
5911   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
5912   ins_pipe(ialu_mem_reg);
5913 %}
5914 
5915 // Store Char/Short
5916 instruct storeC(memory mem, rRegI src)
5917 %{
5918   match(Set mem (StoreC mem src));
5919 
5920   ins_cost(125); // XXX
5921   format %{ "movw    $mem, $src\t# char/short" %}
5922   opcode(0x89);
5923   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5924   ins_pipe(ialu_mem_reg);
5925 %}
5926 
5927 // Store Integer
5928 instruct storeI(memory mem, rRegI src)
5929 %{
5930   match(Set mem (StoreI mem src));
5931 
5932   ins_cost(125); // XXX
5933   format %{ "movl    $mem, $src\t# int" %}
5934   opcode(0x89);
5935   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5936   ins_pipe(ialu_mem_reg);
5937 %}
5938 
5939 // Store Long
5940 instruct storeL(memory mem, rRegL src)
5941 %{
5942   match(Set mem (StoreL mem src));
5943 
5944   ins_cost(125); // XXX
5945   format %{ "movq    $mem, $src\t# long" %}
5946   opcode(0x89);
5947   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5948   ins_pipe(ialu_mem_reg); // XXX
5949 %}
5950 
5951 // Store Pointer
5952 instruct storeP(memory mem, any_RegP src)
5953 %{
5954   match(Set mem (StoreP mem src));
5955 
5956   ins_cost(125); // XXX
5957   format %{ "movq    $mem, $src\t# ptr" %}
5958   opcode(0x89);
5959   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5960   ins_pipe(ialu_mem_reg);
5961 %}
5962 
5963 instruct storeImmP0(memory mem, immP0 zero)
5964 %{
5965   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5966   match(Set mem (StoreP mem zero));
5967 
5968   ins_cost(125); // XXX
5969   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
5970   ins_encode %{
5971     __ movq($mem$$Address, r12);
5972   %}
5973   ins_pipe(ialu_mem_reg);
5974 %}
5975 
5976 // Store NULL Pointer, mark word, or other simple pointer constant.
5977 instruct storeImmP(memory mem, immP31 src)
5978 %{
5979   match(Set mem (StoreP mem src));
5980 
5981   ins_cost(150); // XXX
5982   format %{ "movq    $mem, $src\t# ptr" %}
5983   opcode(0xC7); /* C7 /0 */
5984   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5985   ins_pipe(ialu_mem_imm);
5986 %}
5987 
5988 // Store Compressed Pointer
5989 instruct storeN(memory mem, rRegN src)
5990 %{
5991   match(Set mem (StoreN mem src));
5992 
5993   ins_cost(125); // XXX
5994   format %{ "movl    $mem, $src\t# compressed ptr" %}
5995   ins_encode %{
5996     __ movl($mem$$Address, $src$$Register);
5997   %}
5998   ins_pipe(ialu_mem_reg);
5999 %}
6000 
6001 instruct storeNKlass(memory mem, rRegN src)
6002 %{
6003   match(Set mem (StoreNKlass mem src));
6004 
6005   ins_cost(125); // XXX
6006   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
6007   ins_encode %{
6008     __ movl($mem$$Address, $src$$Register);
6009   %}
6010   ins_pipe(ialu_mem_reg);
6011 %}
6012 
6013 instruct storeImmN0(memory mem, immN0 zero)
6014 %{
6015   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
6016   match(Set mem (StoreN mem zero));
6017 
6018   ins_cost(125); // XXX
6019   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
6020   ins_encode %{
6021     __ movl($mem$$Address, r12);
6022   %}
6023   ins_pipe(ialu_mem_reg);
6024 %}
6025 
6026 instruct storeImmN(memory mem, immN src)
6027 %{
6028   match(Set mem (StoreN mem src));
6029 
6030   ins_cost(150); // XXX
6031   format %{ "movl    $mem, $src\t# compressed ptr" %}
6032   ins_encode %{
6033     address con = (address)$src$$constant;
6034     if (con == NULL) {
6035       __ movl($mem$$Address, (int32_t)0);
6036     } else {
6037       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
6038     }
6039   %}
6040   ins_pipe(ialu_mem_imm);
6041 %}
6042 
6043 instruct storeImmNKlass(memory mem, immNKlass src)
6044 %{
6045   match(Set mem (StoreNKlass mem src));
6046 
6047   ins_cost(150); // XXX
6048   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
6049   ins_encode %{
6050     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
6051   %}
6052   ins_pipe(ialu_mem_imm);
6053 %}
6054 
6055 // Store Integer Immediate
6056 instruct storeImmI0(memory mem, immI0 zero)
6057 %{
6058   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6059   match(Set mem (StoreI mem zero));
6060 
6061   ins_cost(125); // XXX
6062   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
6063   ins_encode %{
6064     __ movl($mem$$Address, r12);
6065   %}
6066   ins_pipe(ialu_mem_reg);
6067 %}
6068 
6069 instruct storeImmI(memory mem, immI src)
6070 %{
6071   match(Set mem (StoreI mem src));
6072 
6073   ins_cost(150);
6074   format %{ "movl    $mem, $src\t# int" %}
6075   opcode(0xC7); /* C7 /0 */
6076   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6077   ins_pipe(ialu_mem_imm);
6078 %}
6079 
6080 // Store Long Immediate
6081 instruct storeImmL0(memory mem, immL0 zero)
6082 %{
6083   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6084   match(Set mem (StoreL mem zero));
6085 
6086   ins_cost(125); // XXX
6087   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
6088   ins_encode %{
6089     __ movq($mem$$Address, r12);
6090   %}
6091   ins_pipe(ialu_mem_reg);
6092 %}
6093 
6094 instruct storeImmL(memory mem, immL32 src)
6095 %{
6096   match(Set mem (StoreL mem src));
6097 
6098   ins_cost(150);
6099   format %{ "movq    $mem, $src\t# long" %}
6100   opcode(0xC7); /* C7 /0 */
6101   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6102   ins_pipe(ialu_mem_imm);
6103 %}
6104 
6105 // Store Short/Char Immediate
6106 instruct storeImmC0(memory mem, immI0 zero)
6107 %{
6108   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6109   match(Set mem (StoreC mem zero));
6110 
6111   ins_cost(125); // XXX
6112   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6113   ins_encode %{
6114     __ movw($mem$$Address, r12);
6115   %}
6116   ins_pipe(ialu_mem_reg);
6117 %}
6118 
6119 instruct storeImmI16(memory mem, immI16 src)
6120 %{
6121   predicate(UseStoreImmI16);
6122   match(Set mem (StoreC mem src));
6123 
6124   ins_cost(150);
6125   format %{ "movw    $mem, $src\t# short/char" %}
6126   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6127   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6128   ins_pipe(ialu_mem_imm);
6129 %}
6130 
6131 // Store Byte Immediate
6132 instruct storeImmB0(memory mem, immI0 zero)
6133 %{
6134   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6135   match(Set mem (StoreB mem zero));
6136 
6137   ins_cost(125); // XXX
6138   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6139   ins_encode %{
6140     __ movb($mem$$Address, r12);
6141   %}
6142   ins_pipe(ialu_mem_reg);
6143 %}
6144 
6145 instruct storeImmB(memory mem, immI8 src)
6146 %{
6147   match(Set mem (StoreB mem src));
6148 
6149   ins_cost(150); // XXX
6150   format %{ "movb    $mem, $src\t# byte" %}
6151   opcode(0xC6); /* C6 /0 */
6152   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6153   ins_pipe(ialu_mem_imm);
6154 %}
6155 
6156 // Store CMS card-mark Immediate
6157 instruct storeImmCM0_reg(memory mem, immI0 zero)
6158 %{
6159   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6160   match(Set mem (StoreCM mem zero));
6161 
6162   ins_cost(125); // XXX
6163   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6164   ins_encode %{
6165     __ movb($mem$$Address, r12);
6166   %}
6167   ins_pipe(ialu_mem_reg);
6168 %}
6169 
6170 instruct storeImmCM0(memory mem, immI0 src)
6171 %{
6172   match(Set mem (StoreCM mem src));
6173 
6174   ins_cost(150); // XXX
6175   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6176   opcode(0xC6); /* C6 /0 */
6177   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6178   ins_pipe(ialu_mem_imm);
6179 %}
6180 
6181 // Store Float
6182 instruct storeF(memory mem, regF src)
6183 %{
6184   match(Set mem (StoreF mem src));
6185 
6186   ins_cost(95); // XXX
6187   format %{ "movss   $mem, $src\t# float" %}
6188   ins_encode %{
6189     __ movflt($mem$$Address, $src$$XMMRegister);
6190   %}
6191   ins_pipe(pipe_slow); // XXX
6192 %}
6193 
6194 // Store immediate Float value (it is faster than store from XMM register)
6195 instruct storeF0(memory mem, immF0 zero)
6196 %{
6197   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6198   match(Set mem (StoreF mem zero));
6199 
6200   ins_cost(25); // XXX
6201   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6202   ins_encode %{
6203     __ movl($mem$$Address, r12);
6204   %}
6205   ins_pipe(ialu_mem_reg);
6206 %}
6207 
6208 instruct storeF_imm(memory mem, immF src)
6209 %{
6210   match(Set mem (StoreF mem src));
6211 
6212   ins_cost(50);
6213   format %{ "movl    $mem, $src\t# float" %}
6214   opcode(0xC7); /* C7 /0 */
6215   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6216   ins_pipe(ialu_mem_imm);
6217 %}
6218 
6219 // Store Double
6220 instruct storeD(memory mem, regD src)
6221 %{
6222   match(Set mem (StoreD mem src));
6223 
6224   ins_cost(95); // XXX
6225   format %{ "movsd   $mem, $src\t# double" %}
6226   ins_encode %{
6227     __ movdbl($mem$$Address, $src$$XMMRegister);
6228   %}
6229   ins_pipe(pipe_slow); // XXX
6230 %}
6231 
6232 // Store immediate double 0.0 (it is faster than store from XMM register)
6233 instruct storeD0_imm(memory mem, immD0 src)
6234 %{
6235   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6236   match(Set mem (StoreD mem src));
6237 
6238   ins_cost(50);
6239   format %{ "movq    $mem, $src\t# double 0." %}
6240   opcode(0xC7); /* C7 /0 */
6241   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6242   ins_pipe(ialu_mem_imm);
6243 %}
6244 
6245 instruct storeD0(memory mem, immD0 zero)
6246 %{
6247   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6248   match(Set mem (StoreD mem zero));
6249 
6250   ins_cost(25); // XXX
6251   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6252   ins_encode %{
6253     __ movq($mem$$Address, r12);
6254   %}
6255   ins_pipe(ialu_mem_reg);
6256 %}
6257 
6258 instruct storeSSI(stackSlotI dst, rRegI src)
6259 %{
6260   match(Set dst src);
6261 
6262   ins_cost(100);
6263   format %{ "movl    $dst, $src\t# int stk" %}
6264   opcode(0x89);
6265   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6266   ins_pipe( ialu_mem_reg );
6267 %}
6268 
6269 instruct storeSSL(stackSlotL dst, rRegL src)
6270 %{
6271   match(Set dst src);
6272 
6273   ins_cost(100);
6274   format %{ "movq    $dst, $src\t# long stk" %}
6275   opcode(0x89);
6276   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6277   ins_pipe(ialu_mem_reg);
6278 %}
6279 
6280 instruct storeSSP(stackSlotP dst, rRegP src)
6281 %{
6282   match(Set dst src);
6283 
6284   ins_cost(100);
6285   format %{ "movq    $dst, $src\t# ptr stk" %}
6286   opcode(0x89);
6287   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6288   ins_pipe(ialu_mem_reg);
6289 %}
6290 
6291 instruct storeSSF(stackSlotF dst, regF src)
6292 %{
6293   match(Set dst src);
6294 
6295   ins_cost(95); // XXX
6296   format %{ "movss   $dst, $src\t# float stk" %}
6297   ins_encode %{
6298     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6299   %}
6300   ins_pipe(pipe_slow); // XXX
6301 %}
6302 
6303 instruct storeSSD(stackSlotD dst, regD src)
6304 %{
6305   match(Set dst src);
6306 
6307   ins_cost(95); // XXX
6308   format %{ "movsd   $dst, $src\t# double stk" %}
6309   ins_encode %{
6310     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6311   %}
6312   ins_pipe(pipe_slow); // XXX
6313 %}
6314 
6315 //----------BSWAP Instructions-------------------------------------------------
6316 instruct bytes_reverse_int(rRegI dst) %{
6317   match(Set dst (ReverseBytesI dst));
6318 
6319   format %{ "bswapl  $dst" %}
6320   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6321   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6322   ins_pipe( ialu_reg );
6323 %}
6324 
6325 instruct bytes_reverse_long(rRegL dst) %{
6326   match(Set dst (ReverseBytesL dst));
6327 
6328   format %{ "bswapq  $dst" %}
6329   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6330   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6331   ins_pipe( ialu_reg);
6332 %}
6333 
6334 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6335   match(Set dst (ReverseBytesUS dst));
6336   effect(KILL cr);
6337 
6338   format %{ "bswapl  $dst\n\t"
6339             "shrl    $dst,16\n\t" %}
6340   ins_encode %{
6341     __ bswapl($dst$$Register);
6342     __ shrl($dst$$Register, 16);
6343   %}
6344   ins_pipe( ialu_reg );
6345 %}
6346 
6347 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6348   match(Set dst (ReverseBytesS dst));
6349   effect(KILL cr);
6350 
6351   format %{ "bswapl  $dst\n\t"
6352             "sar     $dst,16\n\t" %}
6353   ins_encode %{
6354     __ bswapl($dst$$Register);
6355     __ sarl($dst$$Register, 16);
6356   %}
6357   ins_pipe( ialu_reg );
6358 %}
6359 
6360 //---------- Zeros Count Instructions ------------------------------------------
6361 
6362 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6363   predicate(UseCountLeadingZerosInstruction);
6364   match(Set dst (CountLeadingZerosI src));
6365   effect(KILL cr);
6366 
6367   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6368   ins_encode %{
6369     __ lzcntl($dst$$Register, $src$$Register);
6370   %}
6371   ins_pipe(ialu_reg);
6372 %}
6373 
6374 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6375   predicate(!UseCountLeadingZerosInstruction);
6376   match(Set dst (CountLeadingZerosI src));
6377   effect(KILL cr);
6378 
6379   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6380             "jnz     skip\n\t"
6381             "movl    $dst, -1\n"
6382       "skip:\n\t"
6383             "negl    $dst\n\t"
6384             "addl    $dst, 31" %}
6385   ins_encode %{
6386     Register Rdst = $dst$$Register;
6387     Register Rsrc = $src$$Register;
6388     Label skip;
6389     __ bsrl(Rdst, Rsrc);
6390     __ jccb(Assembler::notZero, skip);
6391     __ movl(Rdst, -1);
6392     __ bind(skip);
6393     __ negl(Rdst);
6394     __ addl(Rdst, BitsPerInt - 1);
6395   %}
6396   ins_pipe(ialu_reg);
6397 %}
6398 
6399 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6400   predicate(UseCountLeadingZerosInstruction);
6401   match(Set dst (CountLeadingZerosL src));
6402   effect(KILL cr);
6403 
6404   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6405   ins_encode %{
6406     __ lzcntq($dst$$Register, $src$$Register);
6407   %}
6408   ins_pipe(ialu_reg);
6409 %}
6410 
6411 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6412   predicate(!UseCountLeadingZerosInstruction);
6413   match(Set dst (CountLeadingZerosL src));
6414   effect(KILL cr);
6415 
6416   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6417             "jnz     skip\n\t"
6418             "movl    $dst, -1\n"
6419       "skip:\n\t"
6420             "negl    $dst\n\t"
6421             "addl    $dst, 63" %}
6422   ins_encode %{
6423     Register Rdst = $dst$$Register;
6424     Register Rsrc = $src$$Register;
6425     Label skip;
6426     __ bsrq(Rdst, Rsrc);
6427     __ jccb(Assembler::notZero, skip);
6428     __ movl(Rdst, -1);
6429     __ bind(skip);
6430     __ negl(Rdst);
6431     __ addl(Rdst, BitsPerLong - 1);
6432   %}
6433   ins_pipe(ialu_reg);
6434 %}
6435 
6436 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6437   predicate(UseCountTrailingZerosInstruction);
6438   match(Set dst (CountTrailingZerosI src));
6439   effect(KILL cr);
6440 
6441   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
6442   ins_encode %{
6443     __ tzcntl($dst$$Register, $src$$Register);
6444   %}
6445   ins_pipe(ialu_reg);
6446 %}
6447 
6448 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
6449   predicate(!UseCountTrailingZerosInstruction);
6450   match(Set dst (CountTrailingZerosI src));
6451   effect(KILL cr);
6452 
6453   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6454             "jnz     done\n\t"
6455             "movl    $dst, 32\n"
6456       "done:" %}
6457   ins_encode %{
6458     Register Rdst = $dst$$Register;
6459     Label done;
6460     __ bsfl(Rdst, $src$$Register);
6461     __ jccb(Assembler::notZero, done);
6462     __ movl(Rdst, BitsPerInt);
6463     __ bind(done);
6464   %}
6465   ins_pipe(ialu_reg);
6466 %}
6467 
6468 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6469   predicate(UseCountTrailingZerosInstruction);
6470   match(Set dst (CountTrailingZerosL src));
6471   effect(KILL cr);
6472 
6473   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
6474   ins_encode %{
6475     __ tzcntq($dst$$Register, $src$$Register);
6476   %}
6477   ins_pipe(ialu_reg);
6478 %}
6479 
6480 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
6481   predicate(!UseCountTrailingZerosInstruction);
6482   match(Set dst (CountTrailingZerosL src));
6483   effect(KILL cr);
6484 
6485   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6486             "jnz     done\n\t"
6487             "movl    $dst, 64\n"
6488       "done:" %}
6489   ins_encode %{
6490     Register Rdst = $dst$$Register;
6491     Label done;
6492     __ bsfq(Rdst, $src$$Register);
6493     __ jccb(Assembler::notZero, done);
6494     __ movl(Rdst, BitsPerLong);
6495     __ bind(done);
6496   %}
6497   ins_pipe(ialu_reg);
6498 %}
6499 
6500 
6501 //---------- Population Count Instructions -------------------------------------
6502 
6503 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6504   predicate(UsePopCountInstruction);
6505   match(Set dst (PopCountI src));
6506   effect(KILL cr);
6507 
6508   format %{ "popcnt  $dst, $src" %}
6509   ins_encode %{
6510     __ popcntl($dst$$Register, $src$$Register);
6511   %}
6512   ins_pipe(ialu_reg);
6513 %}
6514 
6515 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6516   predicate(UsePopCountInstruction);
6517   match(Set dst (PopCountI (LoadI mem)));
6518   effect(KILL cr);
6519 
6520   format %{ "popcnt  $dst, $mem" %}
6521   ins_encode %{
6522     __ popcntl($dst$$Register, $mem$$Address);
6523   %}
6524   ins_pipe(ialu_reg);
6525 %}
6526 
6527 // Note: Long.bitCount(long) returns an int.
6528 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6529   predicate(UsePopCountInstruction);
6530   match(Set dst (PopCountL src));
6531   effect(KILL cr);
6532 
6533   format %{ "popcnt  $dst, $src" %}
6534   ins_encode %{
6535     __ popcntq($dst$$Register, $src$$Register);
6536   %}
6537   ins_pipe(ialu_reg);
6538 %}
6539 
6540 // Note: Long.bitCount(long) returns an int.
6541 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6542   predicate(UsePopCountInstruction);
6543   match(Set dst (PopCountL (LoadL mem)));
6544   effect(KILL cr);
6545 
6546   format %{ "popcnt  $dst, $mem" %}
6547   ins_encode %{
6548     __ popcntq($dst$$Register, $mem$$Address);
6549   %}
6550   ins_pipe(ialu_reg);
6551 %}
6552 
6553 
6554 //----------MemBar Instructions-----------------------------------------------
6555 // Memory barrier flavors
6556 
6557 instruct membar_acquire()
6558 %{
6559   match(MemBarAcquire);
6560   match(LoadFence);
6561   ins_cost(0);
6562 
6563   size(0);
6564   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6565   ins_encode();
6566   ins_pipe(empty);
6567 %}
6568 
6569 instruct membar_acquire_lock()
6570 %{
6571   match(MemBarAcquireLock);
6572   ins_cost(0);
6573 
6574   size(0);
6575   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6576   ins_encode();
6577   ins_pipe(empty);
6578 %}
6579 
6580 instruct membar_release()
6581 %{
6582   match(MemBarRelease);
6583   match(StoreFence);
6584   ins_cost(0);
6585 
6586   size(0);
6587   format %{ "MEMBAR-release ! (empty encoding)" %}
6588   ins_encode();
6589   ins_pipe(empty);
6590 %}
6591 
6592 instruct membar_release_lock()
6593 %{
6594   match(MemBarReleaseLock);
6595   ins_cost(0);
6596 
6597   size(0);
6598   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6599   ins_encode();
6600   ins_pipe(empty);
6601 %}
6602 
6603 instruct membar_volatile(rFlagsReg cr) %{
6604   match(MemBarVolatile);
6605   effect(KILL cr);
6606   ins_cost(400);
6607 
6608   format %{
6609     $$template
6610     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6611   %}
6612   ins_encode %{
6613     __ membar(Assembler::StoreLoad);
6614   %}
6615   ins_pipe(pipe_slow);
6616 %}
6617 
6618 instruct unnecessary_membar_volatile()
6619 %{
6620   match(MemBarVolatile);
6621   predicate(Matcher::post_store_load_barrier(n));
6622   ins_cost(0);
6623 
6624   size(0);
6625   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6626   ins_encode();
6627   ins_pipe(empty);
6628 %}
6629 
6630 instruct membar_storestore() %{
6631   match(MemBarStoreStore);
6632   ins_cost(0);
6633 
6634   size(0);
6635   format %{ "MEMBAR-storestore (empty encoding)" %}
6636   ins_encode( );
6637   ins_pipe(empty);
6638 %}
6639 
6640 //----------Move Instructions--------------------------------------------------
6641 
6642 instruct castX2P(rRegP dst, rRegL src)
6643 %{
6644   match(Set dst (CastX2P src));
6645 
6646   format %{ "movq    $dst, $src\t# long->ptr" %}
6647   ins_encode %{
6648     if ($dst$$reg != $src$$reg) {
6649       __ movptr($dst$$Register, $src$$Register);
6650     }
6651   %}
6652   ins_pipe(ialu_reg_reg); // XXX
6653 %}
6654 
6655 instruct castN2X(rRegL dst, rRegN src)
6656 %{
6657   match(Set dst (CastP2X src));
6658 
6659   format %{ "movq    $dst, $src\t# ptr -> long" %}
6660   ins_encode %{
6661     if ($dst$$reg != $src$$reg) {
6662       __ movptr($dst$$Register, $src$$Register);
6663     }
6664   %}
6665   ins_pipe(ialu_reg_reg); // XXX
6666 %}
6667 
6668 instruct castP2X(rRegL dst, rRegP src)
6669 %{
6670   match(Set dst (CastP2X src));
6671 
6672   format %{ "movq    $dst, $src\t# ptr -> long" %}
6673   ins_encode %{
6674     if ($dst$$reg != $src$$reg) {
6675       __ movptr($dst$$Register, $src$$Register);
6676     }
6677   %}
6678   ins_pipe(ialu_reg_reg); // XXX
6679 %}
6680 
6681 // Convert oop into int for vectors alignment masking
6682 instruct convP2I(rRegI dst, rRegP src)
6683 %{
6684   match(Set dst (ConvL2I (CastP2X src)));
6685 
6686   format %{ "movl    $dst, $src\t# ptr -> int" %}
6687   ins_encode %{
6688     __ movl($dst$$Register, $src$$Register);
6689   %}
6690   ins_pipe(ialu_reg_reg); // XXX
6691 %}
6692 
6693 // Convert compressed oop into int for vectors alignment masking
6694 // in case of 32bit oops (heap < 4Gb).
6695 instruct convN2I(rRegI dst, rRegN src)
6696 %{
6697   predicate(Universe::narrow_oop_shift() == 0);
6698   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6699 
6700   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
6701   ins_encode %{
6702     __ movl($dst$$Register, $src$$Register);
6703   %}
6704   ins_pipe(ialu_reg_reg); // XXX
6705 %}
6706 
6707 // Convert oop pointer into compressed form
6708 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6709   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6710   match(Set dst (EncodeP src));
6711   effect(KILL cr);
6712   format %{ "encode_heap_oop $dst,$src" %}
6713   ins_encode %{
6714     Register s = $src$$Register;
6715     Register d = $dst$$Register;
6716     if (s != d) {
6717       __ movq(d, s);
6718     }
6719     __ encode_heap_oop(d);
6720   %}
6721   ins_pipe(ialu_reg_long);
6722 %}
6723 
6724 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6725   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6726   match(Set dst (EncodeP src));
6727   effect(KILL cr);
6728   format %{ "encode_heap_oop_not_null $dst,$src" %}
6729   ins_encode %{
6730     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6731   %}
6732   ins_pipe(ialu_reg_long);
6733 %}
6734 
6735 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6736   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
6737             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
6738   match(Set dst (DecodeN src));
6739   effect(KILL cr);
6740   format %{ "decode_heap_oop $dst,$src" %}
6741   ins_encode %{
6742     Register s = $src$$Register;
6743     Register d = $dst$$Register;
6744     if (s != d) {
6745       __ movq(d, s);
6746     }
6747     __ decode_heap_oop(d);
6748   %}
6749   ins_pipe(ialu_reg_long);
6750 %}
6751 
6752 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6753   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
6754             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
6755   match(Set dst (DecodeN src));
6756   effect(KILL cr);
6757   format %{ "decode_heap_oop_not_null $dst,$src" %}
6758   ins_encode %{
6759     Register s = $src$$Register;
6760     Register d = $dst$$Register;
6761     if (s != d) {
6762       __ decode_heap_oop_not_null(d, s);
6763     } else {
6764       __ decode_heap_oop_not_null(d);
6765     }
6766   %}
6767   ins_pipe(ialu_reg_long);
6768 %}
6769 
6770 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6771   match(Set dst (EncodePKlass src));
6772   effect(KILL cr);
6773   format %{ "encode_klass_not_null $dst,$src" %}
6774   ins_encode %{
6775     __ encode_klass_not_null($dst$$Register, $src$$Register);
6776   %}
6777   ins_pipe(ialu_reg_long);
6778 %}
6779 
6780 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6781   match(Set dst (DecodeNKlass src));
6782   effect(KILL cr);
6783   format %{ "decode_klass_not_null $dst,$src" %}
6784   ins_encode %{
6785     Register s = $src$$Register;
6786     Register d = $dst$$Register;
6787     if (s != d) {
6788       __ decode_klass_not_null(d, s);
6789     } else {
6790       __ decode_klass_not_null(d);
6791     }
6792   %}
6793   ins_pipe(ialu_reg_long);
6794 %}
6795 
6796 
6797 //----------Conditional Move---------------------------------------------------
6798 // Jump
6799 // dummy instruction for generating temp registers
6800 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6801   match(Jump (LShiftL switch_val shift));
6802   ins_cost(350);
6803   predicate(false);
6804   effect(TEMP dest);
6805 
6806   format %{ "leaq    $dest, [$constantaddress]\n\t"
6807             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6808   ins_encode %{
6809     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6810     // to do that and the compiler is using that register as one it can allocate.
6811     // So we build it all by hand.
6812     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6813     // ArrayAddress dispatch(table, index);
6814     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6815     __ lea($dest$$Register, $constantaddress);
6816     __ jmp(dispatch);
6817   %}
6818   ins_pipe(pipe_jmp);
6819 %}
6820 
6821 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6822   match(Jump (AddL (LShiftL switch_val shift) offset));
6823   ins_cost(350);
6824   effect(TEMP dest);
6825 
6826   format %{ "leaq    $dest, [$constantaddress]\n\t"
6827             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6828   ins_encode %{
6829     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6830     // to do that and the compiler is using that register as one it can allocate.
6831     // So we build it all by hand.
6832     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6833     // ArrayAddress dispatch(table, index);
6834     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6835     __ lea($dest$$Register, $constantaddress);
6836     __ jmp(dispatch);
6837   %}
6838   ins_pipe(pipe_jmp);
6839 %}
6840 
6841 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6842   match(Jump switch_val);
6843   ins_cost(350);
6844   effect(TEMP dest);
6845 
6846   format %{ "leaq    $dest, [$constantaddress]\n\t"
6847             "jmp     [$dest + $switch_val]\n\t" %}
6848   ins_encode %{
6849     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6850     // to do that and the compiler is using that register as one it can allocate.
6851     // So we build it all by hand.
6852     // Address index(noreg, switch_reg, Address::times_1);
6853     // ArrayAddress dispatch(table, index);
6854     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6855     __ lea($dest$$Register, $constantaddress);
6856     __ jmp(dispatch);
6857   %}
6858   ins_pipe(pipe_jmp);
6859 %}
6860 
6861 // Conditional move
6862 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6863 %{
6864   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6865 
6866   ins_cost(200); // XXX
6867   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6868   opcode(0x0F, 0x40);
6869   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6870   ins_pipe(pipe_cmov_reg);
6871 %}
6872 
6873 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6874   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6875 
6876   ins_cost(200); // XXX
6877   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6878   opcode(0x0F, 0x40);
6879   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6880   ins_pipe(pipe_cmov_reg);
6881 %}
6882 
6883 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6884   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6885   ins_cost(200);
6886   expand %{
6887     cmovI_regU(cop, cr, dst, src);
6888   %}
6889 %}
6890 
6891 // Conditional move
6892 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6893   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6894 
6895   ins_cost(250); // XXX
6896   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6897   opcode(0x0F, 0x40);
6898   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6899   ins_pipe(pipe_cmov_mem);
6900 %}
6901 
6902 // Conditional move
6903 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6904 %{
6905   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6906 
6907   ins_cost(250); // XXX
6908   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6909   opcode(0x0F, 0x40);
6910   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6911   ins_pipe(pipe_cmov_mem);
6912 %}
6913 
6914 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6915   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6916   ins_cost(250);
6917   expand %{
6918     cmovI_memU(cop, cr, dst, src);
6919   %}
6920 %}
6921 
6922 // Conditional move
6923 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6924 %{
6925   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6926 
6927   ins_cost(200); // XXX
6928   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6929   opcode(0x0F, 0x40);
6930   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6931   ins_pipe(pipe_cmov_reg);
6932 %}
6933 
6934 // Conditional move
6935 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6936 %{
6937   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6938 
6939   ins_cost(200); // XXX
6940   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6941   opcode(0x0F, 0x40);
6942   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6943   ins_pipe(pipe_cmov_reg);
6944 %}
6945 
6946 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6947   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6948   ins_cost(200);
6949   expand %{
6950     cmovN_regU(cop, cr, dst, src);
6951   %}
6952 %}
6953 
6954 // Conditional move
6955 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
6956 %{
6957   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6958 
6959   ins_cost(200); // XXX
6960   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
6961   opcode(0x0F, 0x40);
6962   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6963   ins_pipe(pipe_cmov_reg);  // XXX
6964 %}
6965 
6966 // Conditional move
6967 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
6968 %{
6969   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6970 
6971   ins_cost(200); // XXX
6972   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
6973   opcode(0x0F, 0x40);
6974   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6975   ins_pipe(pipe_cmov_reg); // XXX
6976 %}
6977 
6978 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
6979   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6980   ins_cost(200);
6981   expand %{
6982     cmovP_regU(cop, cr, dst, src);
6983   %}
6984 %}
6985 
6986 // DISABLED: Requires the ADLC to emit a bottom_type call that
6987 // correctly meets the two pointer arguments; one is an incoming
6988 // register but the other is a memory operand.  ALSO appears to
6989 // be buggy with implicit null checks.
6990 //
6991 //// Conditional move
6992 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
6993 //%{
6994 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6995 //  ins_cost(250);
6996 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6997 //  opcode(0x0F,0x40);
6998 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6999 //  ins_pipe( pipe_cmov_mem );
7000 //%}
7001 //
7002 //// Conditional move
7003 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
7004 //%{
7005 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7006 //  ins_cost(250);
7007 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7008 //  opcode(0x0F,0x40);
7009 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
7010 //  ins_pipe( pipe_cmov_mem );
7011 //%}
7012 
7013 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
7014 %{
7015   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7016 
7017   ins_cost(200); // XXX
7018   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7019   opcode(0x0F, 0x40);
7020   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7021   ins_pipe(pipe_cmov_reg);  // XXX
7022 %}
7023 
7024 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
7025 %{
7026   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7027 
7028   ins_cost(200); // XXX
7029   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
7030   opcode(0x0F, 0x40);
7031   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7032   ins_pipe(pipe_cmov_mem);  // XXX
7033 %}
7034 
7035 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
7036 %{
7037   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7038 
7039   ins_cost(200); // XXX
7040   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7041   opcode(0x0F, 0x40);
7042   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
7043   ins_pipe(pipe_cmov_reg); // XXX
7044 %}
7045 
7046 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
7047   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7048   ins_cost(200);
7049   expand %{
7050     cmovL_regU(cop, cr, dst, src);
7051   %}
7052 %}
7053 
7054 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
7055 %{
7056   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7057 
7058   ins_cost(200); // XXX
7059   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
7060   opcode(0x0F, 0x40);
7061   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
7062   ins_pipe(pipe_cmov_mem); // XXX
7063 %}
7064 
7065 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
7066   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
7067   ins_cost(200);
7068   expand %{
7069     cmovL_memU(cop, cr, dst, src);
7070   %}
7071 %}
7072 
7073 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
7074 %{
7075   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7076 
7077   ins_cost(200); // XXX
7078   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7079             "movss     $dst, $src\n"
7080     "skip:" %}
7081   ins_encode %{
7082     Label Lskip;
7083     // Invert sense of branch from sense of CMOV
7084     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7085     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7086     __ bind(Lskip);
7087   %}
7088   ins_pipe(pipe_slow);
7089 %}
7090 
7091 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7092 // %{
7093 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7094 
7095 //   ins_cost(200); // XXX
7096 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7097 //             "movss     $dst, $src\n"
7098 //     "skip:" %}
7099 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7100 //   ins_pipe(pipe_slow);
7101 // %}
7102 
7103 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7104 %{
7105   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7106 
7107   ins_cost(200); // XXX
7108   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7109             "movss     $dst, $src\n"
7110     "skip:" %}
7111   ins_encode %{
7112     Label Lskip;
7113     // Invert sense of branch from sense of CMOV
7114     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7115     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7116     __ bind(Lskip);
7117   %}
7118   ins_pipe(pipe_slow);
7119 %}
7120 
7121 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7122   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7123   ins_cost(200);
7124   expand %{
7125     cmovF_regU(cop, cr, dst, src);
7126   %}
7127 %}
7128 
7129 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7130 %{
7131   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7132 
7133   ins_cost(200); // XXX
7134   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7135             "movsd     $dst, $src\n"
7136     "skip:" %}
7137   ins_encode %{
7138     Label Lskip;
7139     // Invert sense of branch from sense of CMOV
7140     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7141     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7142     __ bind(Lskip);
7143   %}
7144   ins_pipe(pipe_slow);
7145 %}
7146 
7147 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7148 %{
7149   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7150 
7151   ins_cost(200); // XXX
7152   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7153             "movsd     $dst, $src\n"
7154     "skip:" %}
7155   ins_encode %{
7156     Label Lskip;
7157     // Invert sense of branch from sense of CMOV
7158     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7159     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7160     __ bind(Lskip);
7161   %}
7162   ins_pipe(pipe_slow);
7163 %}
7164 
7165 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7166   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7167   ins_cost(200);
7168   expand %{
7169     cmovD_regU(cop, cr, dst, src);
7170   %}
7171 %}
7172 
7173 //----------Arithmetic Instructions--------------------------------------------
7174 //----------Addition Instructions----------------------------------------------
7175 
7176 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7177 %{
7178   match(Set dst (AddI dst src));
7179   effect(KILL cr);
7180 
7181   format %{ "addl    $dst, $src\t# int" %}
7182   opcode(0x03);
7183   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7184   ins_pipe(ialu_reg_reg);
7185 %}
7186 
7187 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7188 %{
7189   match(Set dst (AddI dst src));
7190   effect(KILL cr);
7191 
7192   format %{ "addl    $dst, $src\t# int" %}
7193   opcode(0x81, 0x00); /* /0 id */
7194   ins_encode(OpcSErm(dst, src), Con8or32(src));
7195   ins_pipe( ialu_reg );
7196 %}
7197 
7198 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7199 %{
7200   match(Set dst (AddI dst (LoadI src)));
7201   effect(KILL cr);
7202 
7203   ins_cost(125); // XXX
7204   format %{ "addl    $dst, $src\t# int" %}
7205   opcode(0x03);
7206   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7207   ins_pipe(ialu_reg_mem);
7208 %}
7209 
7210 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7211 %{
7212   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7213   effect(KILL cr);
7214 
7215   ins_cost(150); // XXX
7216   format %{ "addl    $dst, $src\t# int" %}
7217   opcode(0x01); /* Opcode 01 /r */
7218   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7219   ins_pipe(ialu_mem_reg);
7220 %}
7221 
7222 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7223 %{
7224   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7225   effect(KILL cr);
7226 
7227   ins_cost(125); // XXX
7228   format %{ "addl    $dst, $src\t# int" %}
7229   opcode(0x81); /* Opcode 81 /0 id */
7230   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7231   ins_pipe(ialu_mem_imm);
7232 %}
7233 
7234 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7235 %{
7236   predicate(UseIncDec);
7237   match(Set dst (AddI dst src));
7238   effect(KILL cr);
7239 
7240   format %{ "incl    $dst\t# int" %}
7241   opcode(0xFF, 0x00); // FF /0
7242   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7243   ins_pipe(ialu_reg);
7244 %}
7245 
7246 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7247 %{
7248   predicate(UseIncDec);
7249   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7250   effect(KILL cr);
7251 
7252   ins_cost(125); // XXX
7253   format %{ "incl    $dst\t# int" %}
7254   opcode(0xFF); /* Opcode FF /0 */
7255   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7256   ins_pipe(ialu_mem_imm);
7257 %}
7258 
7259 // XXX why does that use AddI
7260 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7261 %{
7262   predicate(UseIncDec);
7263   match(Set dst (AddI dst src));
7264   effect(KILL cr);
7265 
7266   format %{ "decl    $dst\t# int" %}
7267   opcode(0xFF, 0x01); // FF /1
7268   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7269   ins_pipe(ialu_reg);
7270 %}
7271 
7272 // XXX why does that use AddI
7273 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7274 %{
7275   predicate(UseIncDec);
7276   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7277   effect(KILL cr);
7278 
7279   ins_cost(125); // XXX
7280   format %{ "decl    $dst\t# int" %}
7281   opcode(0xFF); /* Opcode FF /1 */
7282   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7283   ins_pipe(ialu_mem_imm);
7284 %}
7285 
7286 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7287 %{
7288   match(Set dst (AddI src0 src1));
7289 
7290   ins_cost(110);
7291   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7292   opcode(0x8D); /* 0x8D /r */
7293   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7294   ins_pipe(ialu_reg_reg);
7295 %}
7296 
7297 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7298 %{
7299   match(Set dst (AddL dst src));
7300   effect(KILL cr);
7301 
7302   format %{ "addq    $dst, $src\t# long" %}
7303   opcode(0x03);
7304   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7305   ins_pipe(ialu_reg_reg);
7306 %}
7307 
7308 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7309 %{
7310   match(Set dst (AddL dst src));
7311   effect(KILL cr);
7312 
7313   format %{ "addq    $dst, $src\t# long" %}
7314   opcode(0x81, 0x00); /* /0 id */
7315   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7316   ins_pipe( ialu_reg );
7317 %}
7318 
7319 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7320 %{
7321   match(Set dst (AddL dst (LoadL src)));
7322   effect(KILL cr);
7323 
7324   ins_cost(125); // XXX
7325   format %{ "addq    $dst, $src\t# long" %}
7326   opcode(0x03);
7327   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7328   ins_pipe(ialu_reg_mem);
7329 %}
7330 
7331 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7332 %{
7333   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7334   effect(KILL cr);
7335 
7336   ins_cost(150); // XXX
7337   format %{ "addq    $dst, $src\t# long" %}
7338   opcode(0x01); /* Opcode 01 /r */
7339   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7340   ins_pipe(ialu_mem_reg);
7341 %}
7342 
7343 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7344 %{
7345   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7346   effect(KILL cr);
7347 
7348   ins_cost(125); // XXX
7349   format %{ "addq    $dst, $src\t# long" %}
7350   opcode(0x81); /* Opcode 81 /0 id */
7351   ins_encode(REX_mem_wide(dst),
7352              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7353   ins_pipe(ialu_mem_imm);
7354 %}
7355 
7356 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7357 %{
7358   predicate(UseIncDec);
7359   match(Set dst (AddL dst src));
7360   effect(KILL cr);
7361 
7362   format %{ "incq    $dst\t# long" %}
7363   opcode(0xFF, 0x00); // FF /0
7364   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7365   ins_pipe(ialu_reg);
7366 %}
7367 
7368 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7369 %{
7370   predicate(UseIncDec);
7371   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7372   effect(KILL cr);
7373 
7374   ins_cost(125); // XXX
7375   format %{ "incq    $dst\t# long" %}
7376   opcode(0xFF); /* Opcode FF /0 */
7377   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7378   ins_pipe(ialu_mem_imm);
7379 %}
7380 
7381 // XXX why does that use AddL
7382 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7383 %{
7384   predicate(UseIncDec);
7385   match(Set dst (AddL dst src));
7386   effect(KILL cr);
7387 
7388   format %{ "decq    $dst\t# long" %}
7389   opcode(0xFF, 0x01); // FF /1
7390   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7391   ins_pipe(ialu_reg);
7392 %}
7393 
7394 // XXX why does that use AddL
7395 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7396 %{
7397   predicate(UseIncDec);
7398   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7399   effect(KILL cr);
7400 
7401   ins_cost(125); // XXX
7402   format %{ "decq    $dst\t# long" %}
7403   opcode(0xFF); /* Opcode FF /1 */
7404   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7405   ins_pipe(ialu_mem_imm);
7406 %}
7407 
7408 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7409 %{
7410   match(Set dst (AddL src0 src1));
7411 
7412   ins_cost(110);
7413   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7414   opcode(0x8D); /* 0x8D /r */
7415   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7416   ins_pipe(ialu_reg_reg);
7417 %}
7418 
7419 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7420 %{
7421   match(Set dst (AddP dst src));
7422   effect(KILL cr);
7423 
7424   format %{ "addq    $dst, $src\t# ptr" %}
7425   opcode(0x03);
7426   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7427   ins_pipe(ialu_reg_reg);
7428 %}
7429 
7430 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7431 %{
7432   match(Set dst (AddP dst src));
7433   effect(KILL cr);
7434 
7435   format %{ "addq    $dst, $src\t# ptr" %}
7436   opcode(0x81, 0x00); /* /0 id */
7437   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7438   ins_pipe( ialu_reg );
7439 %}
7440 
7441 // XXX addP mem ops ????
7442 
7443 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7444 %{
7445   match(Set dst (AddP src0 src1));
7446 
7447   ins_cost(110);
7448   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7449   opcode(0x8D); /* 0x8D /r */
7450   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7451   ins_pipe(ialu_reg_reg);
7452 %}
7453 
7454 instruct checkCastPP(rRegP dst)
7455 %{
7456   match(Set dst (CheckCastPP dst));
7457 
7458   size(0);
7459   format %{ "# checkcastPP of $dst" %}
7460   ins_encode(/* empty encoding */);
7461   ins_pipe(empty);
7462 %}
7463 
7464 instruct castPP(rRegP dst)
7465 %{
7466   match(Set dst (CastPP dst));
7467 
7468   size(0);
7469   format %{ "# castPP of $dst" %}
7470   ins_encode(/* empty encoding */);
7471   ins_pipe(empty);
7472 %}
7473 
7474 instruct castII(rRegI dst)
7475 %{
7476   match(Set dst (CastII dst));
7477 
7478   size(0);
7479   format %{ "# castII of $dst" %}
7480   ins_encode(/* empty encoding */);
7481   ins_cost(0);
7482   ins_pipe(empty);
7483 %}
7484 
7485 // LoadP-locked same as a regular LoadP when used with compare-swap
7486 instruct loadPLocked(rRegP dst, memory mem)
7487 %{
7488   match(Set dst (LoadPLocked mem));
7489 
7490   ins_cost(125); // XXX
7491   format %{ "movq    $dst, $mem\t# ptr locked" %}
7492   opcode(0x8B);
7493   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7494   ins_pipe(ialu_reg_mem); // XXX
7495 %}
7496 
7497 // Conditional-store of the updated heap-top.
7498 // Used during allocation of the shared heap.
7499 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7500 
7501 instruct storePConditional(memory heap_top_ptr,
7502                            rax_RegP oldval, rRegP newval,
7503                            rFlagsReg cr)
7504 %{
7505   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7506 
7507   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7508             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7509   opcode(0x0F, 0xB1);
7510   ins_encode(lock_prefix,
7511              REX_reg_mem_wide(newval, heap_top_ptr),
7512              OpcP, OpcS,
7513              reg_mem(newval, heap_top_ptr));
7514   ins_pipe(pipe_cmpxchg);
7515 %}
7516 
7517 // Conditional-store of an int value.
7518 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7519 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7520 %{
7521   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7522   effect(KILL oldval);
7523 
7524   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7525   opcode(0x0F, 0xB1);
7526   ins_encode(lock_prefix,
7527              REX_reg_mem(newval, mem),
7528              OpcP, OpcS,
7529              reg_mem(newval, mem));
7530   ins_pipe(pipe_cmpxchg);
7531 %}
7532 
7533 // Conditional-store of a long value.
7534 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7535 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7536 %{
7537   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7538   effect(KILL oldval);
7539 
7540   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7541   opcode(0x0F, 0xB1);
7542   ins_encode(lock_prefix,
7543              REX_reg_mem_wide(newval, mem),
7544              OpcP, OpcS,
7545              reg_mem(newval, mem));
7546   ins_pipe(pipe_cmpxchg);
7547 %}
7548 
7549 
7550 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7551 instruct compareAndSwapP(rRegI res,
7552                          memory mem_ptr,
7553                          rax_RegP oldval, rRegP newval,
7554                          rFlagsReg cr)
7555 %{
7556   predicate(VM_Version::supports_cx8());
7557   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7558   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7559   effect(KILL cr, KILL oldval);
7560 
7561   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7562             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7563             "sete    $res\n\t"
7564             "movzbl  $res, $res" %}
7565   opcode(0x0F, 0xB1);
7566   ins_encode(lock_prefix,
7567              REX_reg_mem_wide(newval, mem_ptr),
7568              OpcP, OpcS,
7569              reg_mem(newval, mem_ptr),
7570              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7571              REX_reg_breg(res, res), // movzbl
7572              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7573   ins_pipe( pipe_cmpxchg );
7574 %}
7575 
7576 instruct compareAndSwapL(rRegI res,
7577                          memory mem_ptr,
7578                          rax_RegL oldval, rRegL newval,
7579                          rFlagsReg cr)
7580 %{
7581   predicate(VM_Version::supports_cx8());
7582   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7583   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7584   effect(KILL cr, KILL oldval);
7585 
7586   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7587             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7588             "sete    $res\n\t"
7589             "movzbl  $res, $res" %}
7590   opcode(0x0F, 0xB1);
7591   ins_encode(lock_prefix,
7592              REX_reg_mem_wide(newval, mem_ptr),
7593              OpcP, OpcS,
7594              reg_mem(newval, mem_ptr),
7595              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7596              REX_reg_breg(res, res), // movzbl
7597              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7598   ins_pipe( pipe_cmpxchg );
7599 %}
7600 
7601 instruct compareAndSwapI(rRegI res,
7602                          memory mem_ptr,
7603                          rax_RegI oldval, rRegI newval,
7604                          rFlagsReg cr)
7605 %{
7606   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7607   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7608   effect(KILL cr, KILL oldval);
7609 
7610   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7611             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7612             "sete    $res\n\t"
7613             "movzbl  $res, $res" %}
7614   opcode(0x0F, 0xB1);
7615   ins_encode(lock_prefix,
7616              REX_reg_mem(newval, mem_ptr),
7617              OpcP, OpcS,
7618              reg_mem(newval, mem_ptr),
7619              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7620              REX_reg_breg(res, res), // movzbl
7621              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7622   ins_pipe( pipe_cmpxchg );
7623 %}
7624 
7625 instruct compareAndSwapB(rRegI res,
7626                          memory mem_ptr,
7627                          rax_RegI oldval, rRegI newval,
7628                          rFlagsReg cr)
7629 %{
7630   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7631   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7632   effect(KILL cr, KILL oldval);
7633 
7634   format %{ "cmpxchgb $mem_ptr,$newval\t# "
7635             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7636             "sete    $res\n\t"
7637             "movzbl  $res, $res" %}
7638   opcode(0x0F, 0xB0);
7639   ins_encode(lock_prefix,
7640              REX_breg_mem(newval, mem_ptr),
7641              OpcP, OpcS,
7642              reg_mem(newval, mem_ptr),
7643              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7644              REX_reg_breg(res, res), // movzbl
7645              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7646   ins_pipe( pipe_cmpxchg );
7647 %}
7648 
7649 instruct compareAndSwapS(rRegI res,
7650                          memory mem_ptr,
7651                          rax_RegI oldval, rRegI newval,
7652                          rFlagsReg cr)
7653 %{
7654   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7655   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7656   effect(KILL cr, KILL oldval);
7657 
7658   format %{ "cmpxchgw $mem_ptr,$newval\t# "
7659             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7660             "sete    $res\n\t"
7661             "movzbl  $res, $res" %}
7662   opcode(0x0F, 0xB1);
7663   ins_encode(lock_prefix,
7664              SizePrefix,
7665              REX_reg_mem(newval, mem_ptr),
7666              OpcP, OpcS,
7667              reg_mem(newval, mem_ptr),
7668              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7669              REX_reg_breg(res, res), // movzbl
7670              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7671   ins_pipe( pipe_cmpxchg );
7672 %}
7673 
7674 instruct compareAndSwapN(rRegI res,
7675                           memory mem_ptr,
7676                           rax_RegN oldval, rRegN newval,
7677                           rFlagsReg cr) %{
7678   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7679   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
7680   effect(KILL cr, KILL oldval);
7681 
7682   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7683             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7684             "sete    $res\n\t"
7685             "movzbl  $res, $res" %}
7686   opcode(0x0F, 0xB1);
7687   ins_encode(lock_prefix,
7688              REX_reg_mem(newval, mem_ptr),
7689              OpcP, OpcS,
7690              reg_mem(newval, mem_ptr),
7691              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7692              REX_reg_breg(res, res), // movzbl
7693              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7694   ins_pipe( pipe_cmpxchg );
7695 %}
7696 
7697 instruct compareAndExchangeB(
7698                          memory mem_ptr,
7699                          rax_RegI oldval, rRegI newval,
7700                          rFlagsReg cr)
7701 %{
7702   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7703   effect(KILL cr);
7704 
7705   format %{ "cmpxchgb $mem_ptr,$newval\t# "
7706             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7707   opcode(0x0F, 0xB0);
7708   ins_encode(lock_prefix,
7709              REX_breg_mem(newval, mem_ptr),
7710              OpcP, OpcS,
7711              reg_mem(newval, mem_ptr) // lock cmpxchg
7712              );
7713   ins_pipe( pipe_cmpxchg );
7714 %}
7715 
7716 instruct compareAndExchangeS(
7717                          memory mem_ptr,
7718                          rax_RegI oldval, rRegI newval,
7719                          rFlagsReg cr)
7720 %{
7721   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7722   effect(KILL cr);
7723 
7724   format %{ "cmpxchgw $mem_ptr,$newval\t# "
7725             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7726   opcode(0x0F, 0xB1);
7727   ins_encode(lock_prefix,
7728              SizePrefix,
7729              REX_reg_mem(newval, mem_ptr),
7730              OpcP, OpcS,
7731              reg_mem(newval, mem_ptr) // lock cmpxchg
7732              );
7733   ins_pipe( pipe_cmpxchg );
7734 %}
7735 
7736 instruct compareAndExchangeI(
7737                          memory mem_ptr,
7738                          rax_RegI oldval, rRegI newval,
7739                          rFlagsReg cr)
7740 %{
7741   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7742   effect(KILL cr);
7743 
7744   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7745             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7746   opcode(0x0F, 0xB1);
7747   ins_encode(lock_prefix,
7748              REX_reg_mem(newval, mem_ptr),
7749              OpcP, OpcS,
7750              reg_mem(newval, mem_ptr) // lock cmpxchg
7751              );
7752   ins_pipe( pipe_cmpxchg );
7753 %}
7754 
7755 instruct compareAndExchangeL(
7756                          memory mem_ptr,
7757                          rax_RegL oldval, rRegL newval,
7758                          rFlagsReg cr)
7759 %{
7760   predicate(VM_Version::supports_cx8());
7761   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7762   effect(KILL cr);
7763 
7764   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7765             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7766   opcode(0x0F, 0xB1);
7767   ins_encode(lock_prefix,
7768              REX_reg_mem_wide(newval, mem_ptr),
7769              OpcP, OpcS,
7770              reg_mem(newval, mem_ptr)  // lock cmpxchg
7771             );
7772   ins_pipe( pipe_cmpxchg );
7773 %}
7774 
7775 instruct compareAndExchangeN(
7776                           memory mem_ptr,
7777                           rax_RegN oldval, rRegN newval,
7778                           rFlagsReg cr) %{
7779   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
7780   effect(KILL cr);
7781 
7782   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7783             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
7784   opcode(0x0F, 0xB1);
7785   ins_encode(lock_prefix,
7786              REX_reg_mem(newval, mem_ptr),
7787              OpcP, OpcS,
7788              reg_mem(newval, mem_ptr)  // lock cmpxchg
7789           );
7790   ins_pipe( pipe_cmpxchg );
7791 %}
7792 
7793 instruct compareAndExchangeP(
7794                          memory mem_ptr,
7795                          rax_RegP oldval, rRegP newval,
7796                          rFlagsReg cr)
7797 %{
7798   predicate(VM_Version::supports_cx8());
7799   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7800   effect(KILL cr);
7801 
7802   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7803             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
7804   opcode(0x0F, 0xB1);
7805   ins_encode(lock_prefix,
7806              REX_reg_mem_wide(newval, mem_ptr),
7807              OpcP, OpcS,
7808              reg_mem(newval, mem_ptr)  // lock cmpxchg
7809           );
7810   ins_pipe( pipe_cmpxchg );
7811 %}
7812 
7813 instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7814   predicate(n->as_LoadStore()->result_not_used());
7815   match(Set dummy (GetAndAddB mem add));
7816   effect(KILL cr);
7817   format %{ "ADDB  [$mem],$add" %}
7818   ins_encode %{
7819     __ lock();
7820     __ addb($mem$$Address, $add$$constant);
7821   %}
7822   ins_pipe( pipe_cmpxchg );
7823 %}
7824 
7825 instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
7826   match(Set newval (GetAndAddB mem newval));
7827   effect(KILL cr);
7828   format %{ "XADDB  [$mem],$newval" %}
7829   ins_encode %{
7830     __ lock();
7831     __ xaddb($mem$$Address, $newval$$Register);
7832   %}
7833   ins_pipe( pipe_cmpxchg );
7834 %}
7835 
7836 instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7837   predicate(n->as_LoadStore()->result_not_used());
7838   match(Set dummy (GetAndAddS mem add));
7839   effect(KILL cr);
7840   format %{ "ADDW  [$mem],$add" %}
7841   ins_encode %{
7842     __ lock();
7843     __ addw($mem$$Address, $add$$constant);
7844   %}
7845   ins_pipe( pipe_cmpxchg );
7846 %}
7847 
7848 instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
7849   match(Set newval (GetAndAddS mem newval));
7850   effect(KILL cr);
7851   format %{ "XADDW  [$mem],$newval" %}
7852   ins_encode %{
7853     __ lock();
7854     __ xaddw($mem$$Address, $newval$$Register);
7855   %}
7856   ins_pipe( pipe_cmpxchg );
7857 %}
7858 
7859 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7860   predicate(n->as_LoadStore()->result_not_used());
7861   match(Set dummy (GetAndAddI mem add));
7862   effect(KILL cr);
7863   format %{ "ADDL  [$mem],$add" %}
7864   ins_encode %{
7865     __ lock();
7866     __ addl($mem$$Address, $add$$constant);
7867   %}
7868   ins_pipe( pipe_cmpxchg );
7869 %}
7870 
7871 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
7872   match(Set newval (GetAndAddI mem newval));
7873   effect(KILL cr);
7874   format %{ "XADDL  [$mem],$newval" %}
7875   ins_encode %{
7876     __ lock();
7877     __ xaddl($mem$$Address, $newval$$Register);
7878   %}
7879   ins_pipe( pipe_cmpxchg );
7880 %}
7881 
7882 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
7883   predicate(n->as_LoadStore()->result_not_used());
7884   match(Set dummy (GetAndAddL mem add));
7885   effect(KILL cr);
7886   format %{ "ADDQ  [$mem],$add" %}
7887   ins_encode %{
7888     __ lock();
7889     __ addq($mem$$Address, $add$$constant);
7890   %}
7891   ins_pipe( pipe_cmpxchg );
7892 %}
7893 
7894 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
7895   match(Set newval (GetAndAddL mem newval));
7896   effect(KILL cr);
7897   format %{ "XADDQ  [$mem],$newval" %}
7898   ins_encode %{
7899     __ lock();
7900     __ xaddq($mem$$Address, $newval$$Register);
7901   %}
7902   ins_pipe( pipe_cmpxchg );
7903 %}
7904 
7905 instruct xchgB( memory mem, rRegI newval) %{
7906   match(Set newval (GetAndSetB mem newval));
7907   format %{ "XCHGB  $newval,[$mem]" %}
7908   ins_encode %{
7909     __ xchgb($newval$$Register, $mem$$Address);
7910   %}
7911   ins_pipe( pipe_cmpxchg );
7912 %}
7913 
7914 instruct xchgS( memory mem, rRegI newval) %{
7915   match(Set newval (GetAndSetS mem newval));
7916   format %{ "XCHGW  $newval,[$mem]" %}
7917   ins_encode %{
7918     __ xchgw($newval$$Register, $mem$$Address);
7919   %}
7920   ins_pipe( pipe_cmpxchg );
7921 %}
7922 
7923 instruct xchgI( memory mem, rRegI newval) %{
7924   match(Set newval (GetAndSetI mem newval));
7925   format %{ "XCHGL  $newval,[$mem]" %}
7926   ins_encode %{
7927     __ xchgl($newval$$Register, $mem$$Address);
7928   %}
7929   ins_pipe( pipe_cmpxchg );
7930 %}
7931 
7932 instruct xchgL( memory mem, rRegL newval) %{
7933   match(Set newval (GetAndSetL mem newval));
7934   format %{ "XCHGL  $newval,[$mem]" %}
7935   ins_encode %{
7936     __ xchgq($newval$$Register, $mem$$Address);
7937   %}
7938   ins_pipe( pipe_cmpxchg );
7939 %}
7940 
7941 instruct xchgP( memory mem, rRegP newval) %{
7942   match(Set newval (GetAndSetP mem newval));
7943   format %{ "XCHGQ  $newval,[$mem]" %}
7944   ins_encode %{
7945     __ xchgq($newval$$Register, $mem$$Address);
7946   %}
7947   ins_pipe( pipe_cmpxchg );
7948 %}
7949 
7950 instruct xchgN( memory mem, rRegN newval) %{
7951   match(Set newval (GetAndSetN mem newval));
7952   format %{ "XCHGL  $newval,$mem]" %}
7953   ins_encode %{
7954     __ xchgl($newval$$Register, $mem$$Address);
7955   %}
7956   ins_pipe( pipe_cmpxchg );
7957 %}
7958 
7959 //----------Subtraction Instructions-------------------------------------------
7960 
7961 // Integer Subtraction Instructions
7962 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7963 %{
7964   match(Set dst (SubI dst src));
7965   effect(KILL cr);
7966 
7967   format %{ "subl    $dst, $src\t# int" %}
7968   opcode(0x2B);
7969   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7970   ins_pipe(ialu_reg_reg);
7971 %}
7972 
7973 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7974 %{
7975   match(Set dst (SubI dst src));
7976   effect(KILL cr);
7977 
7978   format %{ "subl    $dst, $src\t# int" %}
7979   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7980   ins_encode(OpcSErm(dst, src), Con8or32(src));
7981   ins_pipe(ialu_reg);
7982 %}
7983 
7984 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7985 %{
7986   match(Set dst (SubI dst (LoadI src)));
7987   effect(KILL cr);
7988 
7989   ins_cost(125);
7990   format %{ "subl    $dst, $src\t# int" %}
7991   opcode(0x2B);
7992   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7993   ins_pipe(ialu_reg_mem);
7994 %}
7995 
7996 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7997 %{
7998   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7999   effect(KILL cr);
8000 
8001   ins_cost(150);
8002   format %{ "subl    $dst, $src\t# int" %}
8003   opcode(0x29); /* Opcode 29 /r */
8004   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
8005   ins_pipe(ialu_mem_reg);
8006 %}
8007 
8008 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
8009 %{
8010   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8011   effect(KILL cr);
8012 
8013   ins_cost(125); // XXX
8014   format %{ "subl    $dst, $src\t# int" %}
8015   opcode(0x81); /* Opcode 81 /5 id */
8016   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8017   ins_pipe(ialu_mem_imm);
8018 %}
8019 
8020 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8021 %{
8022   match(Set dst (SubL dst src));
8023   effect(KILL cr);
8024 
8025   format %{ "subq    $dst, $src\t# long" %}
8026   opcode(0x2B);
8027   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8028   ins_pipe(ialu_reg_reg);
8029 %}
8030 
8031 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
8032 %{
8033   match(Set dst (SubL dst src));
8034   effect(KILL cr);
8035 
8036   format %{ "subq    $dst, $src\t# long" %}
8037   opcode(0x81, 0x05);  /* Opcode 81 /5 */
8038   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
8039   ins_pipe(ialu_reg);
8040 %}
8041 
8042 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
8043 %{
8044   match(Set dst (SubL dst (LoadL src)));
8045   effect(KILL cr);
8046 
8047   ins_cost(125);
8048   format %{ "subq    $dst, $src\t# long" %}
8049   opcode(0x2B);
8050   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
8051   ins_pipe(ialu_reg_mem);
8052 %}
8053 
8054 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
8055 %{
8056   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8057   effect(KILL cr);
8058 
8059   ins_cost(150);
8060   format %{ "subq    $dst, $src\t# long" %}
8061   opcode(0x29); /* Opcode 29 /r */
8062   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
8063   ins_pipe(ialu_mem_reg);
8064 %}
8065 
8066 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
8067 %{
8068   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
8069   effect(KILL cr);
8070 
8071   ins_cost(125); // XXX
8072   format %{ "subq    $dst, $src\t# long" %}
8073   opcode(0x81); /* Opcode 81 /5 id */
8074   ins_encode(REX_mem_wide(dst),
8075              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
8076   ins_pipe(ialu_mem_imm);
8077 %}
8078 
8079 // Subtract from a pointer
8080 // XXX hmpf???
8081 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
8082 %{
8083   match(Set dst (AddP dst (SubI zero src)));
8084   effect(KILL cr);
8085 
8086   format %{ "subq    $dst, $src\t# ptr - int" %}
8087   opcode(0x2B);
8088   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8089   ins_pipe(ialu_reg_reg);
8090 %}
8091 
8092 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8093 %{
8094   match(Set dst (SubI zero dst));
8095   effect(KILL cr);
8096 
8097   format %{ "negl    $dst\t# int" %}
8098   opcode(0xF7, 0x03);  // Opcode F7 /3
8099   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8100   ins_pipe(ialu_reg);
8101 %}
8102 
8103 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8104 %{
8105   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8106   effect(KILL cr);
8107 
8108   format %{ "negl    $dst\t# int" %}
8109   opcode(0xF7, 0x03);  // Opcode F7 /3
8110   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8111   ins_pipe(ialu_reg);
8112 %}
8113 
8114 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8115 %{
8116   match(Set dst (SubL zero dst));
8117   effect(KILL cr);
8118 
8119   format %{ "negq    $dst\t# long" %}
8120   opcode(0xF7, 0x03);  // Opcode F7 /3
8121   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8122   ins_pipe(ialu_reg);
8123 %}
8124 
8125 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8126 %{
8127   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8128   effect(KILL cr);
8129 
8130   format %{ "negq    $dst\t# long" %}
8131   opcode(0xF7, 0x03);  // Opcode F7 /3
8132   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8133   ins_pipe(ialu_reg);
8134 %}
8135 
8136 //----------Multiplication/Division Instructions-------------------------------
8137 // Integer Multiplication Instructions
8138 // Multiply Register
8139 
8140 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8141 %{
8142   match(Set dst (MulI dst src));
8143   effect(KILL cr);
8144 
8145   ins_cost(300);
8146   format %{ "imull   $dst, $src\t# int" %}
8147   opcode(0x0F, 0xAF);
8148   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8149   ins_pipe(ialu_reg_reg_alu0);
8150 %}
8151 
8152 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8153 %{
8154   match(Set dst (MulI src imm));
8155   effect(KILL cr);
8156 
8157   ins_cost(300);
8158   format %{ "imull   $dst, $src, $imm\t# int" %}
8159   opcode(0x69); /* 69 /r id */
8160   ins_encode(REX_reg_reg(dst, src),
8161              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8162   ins_pipe(ialu_reg_reg_alu0);
8163 %}
8164 
8165 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8166 %{
8167   match(Set dst (MulI dst (LoadI src)));
8168   effect(KILL cr);
8169 
8170   ins_cost(350);
8171   format %{ "imull   $dst, $src\t# int" %}
8172   opcode(0x0F, 0xAF);
8173   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8174   ins_pipe(ialu_reg_mem_alu0);
8175 %}
8176 
8177 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8178 %{
8179   match(Set dst (MulI (LoadI src) imm));
8180   effect(KILL cr);
8181 
8182   ins_cost(300);
8183   format %{ "imull   $dst, $src, $imm\t# int" %}
8184   opcode(0x69); /* 69 /r id */
8185   ins_encode(REX_reg_mem(dst, src),
8186              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8187   ins_pipe(ialu_reg_mem_alu0);
8188 %}
8189 
8190 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8191 %{
8192   match(Set dst (MulL dst src));
8193   effect(KILL cr);
8194 
8195   ins_cost(300);
8196   format %{ "imulq   $dst, $src\t# long" %}
8197   opcode(0x0F, 0xAF);
8198   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8199   ins_pipe(ialu_reg_reg_alu0);
8200 %}
8201 
8202 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8203 %{
8204   match(Set dst (MulL src imm));
8205   effect(KILL cr);
8206 
8207   ins_cost(300);
8208   format %{ "imulq   $dst, $src, $imm\t# long" %}
8209   opcode(0x69); /* 69 /r id */
8210   ins_encode(REX_reg_reg_wide(dst, src),
8211              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8212   ins_pipe(ialu_reg_reg_alu0);
8213 %}
8214 
8215 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8216 %{
8217   match(Set dst (MulL dst (LoadL src)));
8218   effect(KILL cr);
8219 
8220   ins_cost(350);
8221   format %{ "imulq   $dst, $src\t# long" %}
8222   opcode(0x0F, 0xAF);
8223   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8224   ins_pipe(ialu_reg_mem_alu0);
8225 %}
8226 
8227 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8228 %{
8229   match(Set dst (MulL (LoadL src) imm));
8230   effect(KILL cr);
8231 
8232   ins_cost(300);
8233   format %{ "imulq   $dst, $src, $imm\t# long" %}
8234   opcode(0x69); /* 69 /r id */
8235   ins_encode(REX_reg_mem_wide(dst, src),
8236              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8237   ins_pipe(ialu_reg_mem_alu0);
8238 %}
8239 
8240 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8241 %{
8242   match(Set dst (MulHiL src rax));
8243   effect(USE_KILL rax, KILL cr);
8244 
8245   ins_cost(300);
8246   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8247   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8248   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8249   ins_pipe(ialu_reg_reg_alu0);
8250 %}
8251 
8252 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8253                    rFlagsReg cr)
8254 %{
8255   match(Set rax (DivI rax div));
8256   effect(KILL rdx, KILL cr);
8257 
8258   ins_cost(30*100+10*100); // XXX
8259   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8260             "jne,s   normal\n\t"
8261             "xorl    rdx, rdx\n\t"
8262             "cmpl    $div, -1\n\t"
8263             "je,s    done\n"
8264     "normal: cdql\n\t"
8265             "idivl   $div\n"
8266     "done:"        %}
8267   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8268   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8269   ins_pipe(ialu_reg_reg_alu0);
8270 %}
8271 
8272 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8273                    rFlagsReg cr)
8274 %{
8275   match(Set rax (DivL rax div));
8276   effect(KILL rdx, KILL cr);
8277 
8278   ins_cost(30*100+10*100); // XXX
8279   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8280             "cmpq    rax, rdx\n\t"
8281             "jne,s   normal\n\t"
8282             "xorl    rdx, rdx\n\t"
8283             "cmpq    $div, -1\n\t"
8284             "je,s    done\n"
8285     "normal: cdqq\n\t"
8286             "idivq   $div\n"
8287     "done:"        %}
8288   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8289   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8290   ins_pipe(ialu_reg_reg_alu0);
8291 %}
8292 
8293 // Integer DIVMOD with Register, both quotient and mod results
8294 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8295                              rFlagsReg cr)
8296 %{
8297   match(DivModI rax div);
8298   effect(KILL cr);
8299 
8300   ins_cost(30*100+10*100); // XXX
8301   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8302             "jne,s   normal\n\t"
8303             "xorl    rdx, rdx\n\t"
8304             "cmpl    $div, -1\n\t"
8305             "je,s    done\n"
8306     "normal: cdql\n\t"
8307             "idivl   $div\n"
8308     "done:"        %}
8309   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8310   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8311   ins_pipe(pipe_slow);
8312 %}
8313 
8314 // Long DIVMOD with Register, both quotient and mod results
8315 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8316                              rFlagsReg cr)
8317 %{
8318   match(DivModL rax div);
8319   effect(KILL cr);
8320 
8321   ins_cost(30*100+10*100); // XXX
8322   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8323             "cmpq    rax, rdx\n\t"
8324             "jne,s   normal\n\t"
8325             "xorl    rdx, rdx\n\t"
8326             "cmpq    $div, -1\n\t"
8327             "je,s    done\n"
8328     "normal: cdqq\n\t"
8329             "idivq   $div\n"
8330     "done:"        %}
8331   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8332   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8333   ins_pipe(pipe_slow);
8334 %}
8335 
8336 //----------- DivL-By-Constant-Expansions--------------------------------------
8337 // DivI cases are handled by the compiler
8338 
8339 // Magic constant, reciprocal of 10
8340 instruct loadConL_0x6666666666666667(rRegL dst)
8341 %{
8342   effect(DEF dst);
8343 
8344   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8345   ins_encode(load_immL(dst, 0x6666666666666667));
8346   ins_pipe(ialu_reg);
8347 %}
8348 
8349 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8350 %{
8351   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8352 
8353   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8354   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8355   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8356   ins_pipe(ialu_reg_reg_alu0);
8357 %}
8358 
8359 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8360 %{
8361   effect(USE_DEF dst, KILL cr);
8362 
8363   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8364   opcode(0xC1, 0x7); /* C1 /7 ib */
8365   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8366   ins_pipe(ialu_reg);
8367 %}
8368 
8369 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8370 %{
8371   effect(USE_DEF dst, KILL cr);
8372 
8373   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8374   opcode(0xC1, 0x7); /* C1 /7 ib */
8375   ins_encode(reg_opc_imm_wide(dst, 0x2));
8376   ins_pipe(ialu_reg);
8377 %}
8378 
8379 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8380 %{
8381   match(Set dst (DivL src div));
8382 
8383   ins_cost((5+8)*100);
8384   expand %{
8385     rax_RegL rax;                     // Killed temp
8386     rFlagsReg cr;                     // Killed
8387     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8388     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8389     sarL_rReg_63(src, cr);            // sarq  src, 63
8390     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8391     subL_rReg(dst, src, cr);          // subl  rdx, src
8392   %}
8393 %}
8394 
8395 //-----------------------------------------------------------------------------
8396 
8397 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8398                    rFlagsReg cr)
8399 %{
8400   match(Set rdx (ModI rax div));
8401   effect(KILL rax, KILL cr);
8402 
8403   ins_cost(300); // XXX
8404   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8405             "jne,s   normal\n\t"
8406             "xorl    rdx, rdx\n\t"
8407             "cmpl    $div, -1\n\t"
8408             "je,s    done\n"
8409     "normal: cdql\n\t"
8410             "idivl   $div\n"
8411     "done:"        %}
8412   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8413   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8414   ins_pipe(ialu_reg_reg_alu0);
8415 %}
8416 
8417 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8418                    rFlagsReg cr)
8419 %{
8420   match(Set rdx (ModL rax div));
8421   effect(KILL rax, KILL cr);
8422 
8423   ins_cost(300); // XXX
8424   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8425             "cmpq    rax, rdx\n\t"
8426             "jne,s   normal\n\t"
8427             "xorl    rdx, rdx\n\t"
8428             "cmpq    $div, -1\n\t"
8429             "je,s    done\n"
8430     "normal: cdqq\n\t"
8431             "idivq   $div\n"
8432     "done:"        %}
8433   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8434   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8435   ins_pipe(ialu_reg_reg_alu0);
8436 %}
8437 
8438 // Integer Shift Instructions
8439 // Shift Left by one
8440 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8441 %{
8442   match(Set dst (LShiftI dst shift));
8443   effect(KILL cr);
8444 
8445   format %{ "sall    $dst, $shift" %}
8446   opcode(0xD1, 0x4); /* D1 /4 */
8447   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8448   ins_pipe(ialu_reg);
8449 %}
8450 
8451 // Shift Left by one
8452 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8453 %{
8454   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8455   effect(KILL cr);
8456 
8457   format %{ "sall    $dst, $shift\t" %}
8458   opcode(0xD1, 0x4); /* D1 /4 */
8459   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8460   ins_pipe(ialu_mem_imm);
8461 %}
8462 
8463 // Shift Left by 8-bit immediate
8464 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8465 %{
8466   match(Set dst (LShiftI dst shift));
8467   effect(KILL cr);
8468 
8469   format %{ "sall    $dst, $shift" %}
8470   opcode(0xC1, 0x4); /* C1 /4 ib */
8471   ins_encode(reg_opc_imm(dst, shift));
8472   ins_pipe(ialu_reg);
8473 %}
8474 
8475 // Shift Left by 8-bit immediate
8476 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8477 %{
8478   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8479   effect(KILL cr);
8480 
8481   format %{ "sall    $dst, $shift" %}
8482   opcode(0xC1, 0x4); /* C1 /4 ib */
8483   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8484   ins_pipe(ialu_mem_imm);
8485 %}
8486 
8487 // Shift Left by variable
8488 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8489 %{
8490   match(Set dst (LShiftI dst shift));
8491   effect(KILL cr);
8492 
8493   format %{ "sall    $dst, $shift" %}
8494   opcode(0xD3, 0x4); /* D3 /4 */
8495   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8496   ins_pipe(ialu_reg_reg);
8497 %}
8498 
8499 // Shift Left by variable
8500 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8501 %{
8502   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8503   effect(KILL cr);
8504 
8505   format %{ "sall    $dst, $shift" %}
8506   opcode(0xD3, 0x4); /* D3 /4 */
8507   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8508   ins_pipe(ialu_mem_reg);
8509 %}
8510 
8511 // Arithmetic shift right by one
8512 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8513 %{
8514   match(Set dst (RShiftI dst shift));
8515   effect(KILL cr);
8516 
8517   format %{ "sarl    $dst, $shift" %}
8518   opcode(0xD1, 0x7); /* D1 /7 */
8519   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8520   ins_pipe(ialu_reg);
8521 %}
8522 
8523 // Arithmetic shift right by one
8524 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8525 %{
8526   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8527   effect(KILL cr);
8528 
8529   format %{ "sarl    $dst, $shift" %}
8530   opcode(0xD1, 0x7); /* D1 /7 */
8531   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8532   ins_pipe(ialu_mem_imm);
8533 %}
8534 
8535 // Arithmetic Shift Right by 8-bit immediate
8536 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8537 %{
8538   match(Set dst (RShiftI dst shift));
8539   effect(KILL cr);
8540 
8541   format %{ "sarl    $dst, $shift" %}
8542   opcode(0xC1, 0x7); /* C1 /7 ib */
8543   ins_encode(reg_opc_imm(dst, shift));
8544   ins_pipe(ialu_mem_imm);
8545 %}
8546 
8547 // Arithmetic Shift Right by 8-bit immediate
8548 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8549 %{
8550   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8551   effect(KILL cr);
8552 
8553   format %{ "sarl    $dst, $shift" %}
8554   opcode(0xC1, 0x7); /* C1 /7 ib */
8555   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8556   ins_pipe(ialu_mem_imm);
8557 %}
8558 
8559 // Arithmetic Shift Right by variable
8560 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8561 %{
8562   match(Set dst (RShiftI dst shift));
8563   effect(KILL cr);
8564 
8565   format %{ "sarl    $dst, $shift" %}
8566   opcode(0xD3, 0x7); /* D3 /7 */
8567   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8568   ins_pipe(ialu_reg_reg);
8569 %}
8570 
8571 // Arithmetic Shift Right by variable
8572 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8573 %{
8574   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8575   effect(KILL cr);
8576 
8577   format %{ "sarl    $dst, $shift" %}
8578   opcode(0xD3, 0x7); /* D3 /7 */
8579   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8580   ins_pipe(ialu_mem_reg);
8581 %}
8582 
8583 // Logical shift right by one
8584 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8585 %{
8586   match(Set dst (URShiftI dst shift));
8587   effect(KILL cr);
8588 
8589   format %{ "shrl    $dst, $shift" %}
8590   opcode(0xD1, 0x5); /* D1 /5 */
8591   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8592   ins_pipe(ialu_reg);
8593 %}
8594 
8595 // Logical shift right by one
8596 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8597 %{
8598   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8599   effect(KILL cr);
8600 
8601   format %{ "shrl    $dst, $shift" %}
8602   opcode(0xD1, 0x5); /* D1 /5 */
8603   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8604   ins_pipe(ialu_mem_imm);
8605 %}
8606 
8607 // Logical Shift Right by 8-bit immediate
8608 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8609 %{
8610   match(Set dst (URShiftI dst shift));
8611   effect(KILL cr);
8612 
8613   format %{ "shrl    $dst, $shift" %}
8614   opcode(0xC1, 0x5); /* C1 /5 ib */
8615   ins_encode(reg_opc_imm(dst, shift));
8616   ins_pipe(ialu_reg);
8617 %}
8618 
8619 // Logical Shift Right by 8-bit immediate
8620 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8621 %{
8622   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8623   effect(KILL cr);
8624 
8625   format %{ "shrl    $dst, $shift" %}
8626   opcode(0xC1, 0x5); /* C1 /5 ib */
8627   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8628   ins_pipe(ialu_mem_imm);
8629 %}
8630 
8631 // Logical Shift Right by variable
8632 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8633 %{
8634   match(Set dst (URShiftI dst shift));
8635   effect(KILL cr);
8636 
8637   format %{ "shrl    $dst, $shift" %}
8638   opcode(0xD3, 0x5); /* D3 /5 */
8639   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8640   ins_pipe(ialu_reg_reg);
8641 %}
8642 
8643 // Logical Shift Right by variable
8644 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8645 %{
8646   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8647   effect(KILL cr);
8648 
8649   format %{ "shrl    $dst, $shift" %}
8650   opcode(0xD3, 0x5); /* D3 /5 */
8651   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8652   ins_pipe(ialu_mem_reg);
8653 %}
8654 
8655 // Long Shift Instructions
8656 // Shift Left by one
8657 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8658 %{
8659   match(Set dst (LShiftL dst shift));
8660   effect(KILL cr);
8661 
8662   format %{ "salq    $dst, $shift" %}
8663   opcode(0xD1, 0x4); /* D1 /4 */
8664   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8665   ins_pipe(ialu_reg);
8666 %}
8667 
8668 // Shift Left by one
8669 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8670 %{
8671   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8672   effect(KILL cr);
8673 
8674   format %{ "salq    $dst, $shift" %}
8675   opcode(0xD1, 0x4); /* D1 /4 */
8676   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8677   ins_pipe(ialu_mem_imm);
8678 %}
8679 
8680 // Shift Left by 8-bit immediate
8681 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8682 %{
8683   match(Set dst (LShiftL dst shift));
8684   effect(KILL cr);
8685 
8686   format %{ "salq    $dst, $shift" %}
8687   opcode(0xC1, 0x4); /* C1 /4 ib */
8688   ins_encode(reg_opc_imm_wide(dst, shift));
8689   ins_pipe(ialu_reg);
8690 %}
8691 
8692 // Shift Left by 8-bit immediate
8693 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8694 %{
8695   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8696   effect(KILL cr);
8697 
8698   format %{ "salq    $dst, $shift" %}
8699   opcode(0xC1, 0x4); /* C1 /4 ib */
8700   ins_encode(REX_mem_wide(dst), OpcP,
8701              RM_opc_mem(secondary, dst), Con8or32(shift));
8702   ins_pipe(ialu_mem_imm);
8703 %}
8704 
8705 // Shift Left by variable
8706 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8707 %{
8708   match(Set dst (LShiftL dst shift));
8709   effect(KILL cr);
8710 
8711   format %{ "salq    $dst, $shift" %}
8712   opcode(0xD3, 0x4); /* D3 /4 */
8713   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8714   ins_pipe(ialu_reg_reg);
8715 %}
8716 
8717 // Shift Left by variable
8718 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8719 %{
8720   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8721   effect(KILL cr);
8722 
8723   format %{ "salq    $dst, $shift" %}
8724   opcode(0xD3, 0x4); /* D3 /4 */
8725   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8726   ins_pipe(ialu_mem_reg);
8727 %}
8728 
8729 // Arithmetic shift right by one
8730 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8731 %{
8732   match(Set dst (RShiftL dst shift));
8733   effect(KILL cr);
8734 
8735   format %{ "sarq    $dst, $shift" %}
8736   opcode(0xD1, 0x7); /* D1 /7 */
8737   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8738   ins_pipe(ialu_reg);
8739 %}
8740 
8741 // Arithmetic shift right by one
8742 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8743 %{
8744   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8745   effect(KILL cr);
8746 
8747   format %{ "sarq    $dst, $shift" %}
8748   opcode(0xD1, 0x7); /* D1 /7 */
8749   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8750   ins_pipe(ialu_mem_imm);
8751 %}
8752 
8753 // Arithmetic Shift Right by 8-bit immediate
8754 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8755 %{
8756   match(Set dst (RShiftL dst shift));
8757   effect(KILL cr);
8758 
8759   format %{ "sarq    $dst, $shift" %}
8760   opcode(0xC1, 0x7); /* C1 /7 ib */
8761   ins_encode(reg_opc_imm_wide(dst, shift));
8762   ins_pipe(ialu_mem_imm);
8763 %}
8764 
8765 // Arithmetic Shift Right by 8-bit immediate
8766 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8767 %{
8768   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8769   effect(KILL cr);
8770 
8771   format %{ "sarq    $dst, $shift" %}
8772   opcode(0xC1, 0x7); /* C1 /7 ib */
8773   ins_encode(REX_mem_wide(dst), OpcP,
8774              RM_opc_mem(secondary, dst), Con8or32(shift));
8775   ins_pipe(ialu_mem_imm);
8776 %}
8777 
8778 // Arithmetic Shift Right by variable
8779 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8780 %{
8781   match(Set dst (RShiftL dst shift));
8782   effect(KILL cr);
8783 
8784   format %{ "sarq    $dst, $shift" %}
8785   opcode(0xD3, 0x7); /* D3 /7 */
8786   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8787   ins_pipe(ialu_reg_reg);
8788 %}
8789 
8790 // Arithmetic Shift Right by variable
8791 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8792 %{
8793   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8794   effect(KILL cr);
8795 
8796   format %{ "sarq    $dst, $shift" %}
8797   opcode(0xD3, 0x7); /* D3 /7 */
8798   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8799   ins_pipe(ialu_mem_reg);
8800 %}
8801 
8802 // Logical shift right by one
8803 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8804 %{
8805   match(Set dst (URShiftL dst shift));
8806   effect(KILL cr);
8807 
8808   format %{ "shrq    $dst, $shift" %}
8809   opcode(0xD1, 0x5); /* D1 /5 */
8810   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8811   ins_pipe(ialu_reg);
8812 %}
8813 
8814 // Logical shift right by one
8815 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8816 %{
8817   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8818   effect(KILL cr);
8819 
8820   format %{ "shrq    $dst, $shift" %}
8821   opcode(0xD1, 0x5); /* D1 /5 */
8822   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8823   ins_pipe(ialu_mem_imm);
8824 %}
8825 
8826 // Logical Shift Right by 8-bit immediate
8827 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8828 %{
8829   match(Set dst (URShiftL dst shift));
8830   effect(KILL cr);
8831 
8832   format %{ "shrq    $dst, $shift" %}
8833   opcode(0xC1, 0x5); /* C1 /5 ib */
8834   ins_encode(reg_opc_imm_wide(dst, shift));
8835   ins_pipe(ialu_reg);
8836 %}
8837 
8838 
8839 // Logical Shift Right by 8-bit immediate
8840 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8841 %{
8842   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8843   effect(KILL cr);
8844 
8845   format %{ "shrq    $dst, $shift" %}
8846   opcode(0xC1, 0x5); /* C1 /5 ib */
8847   ins_encode(REX_mem_wide(dst), OpcP,
8848              RM_opc_mem(secondary, dst), Con8or32(shift));
8849   ins_pipe(ialu_mem_imm);
8850 %}
8851 
8852 // Logical Shift Right by variable
8853 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8854 %{
8855   match(Set dst (URShiftL dst shift));
8856   effect(KILL cr);
8857 
8858   format %{ "shrq    $dst, $shift" %}
8859   opcode(0xD3, 0x5); /* D3 /5 */
8860   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8861   ins_pipe(ialu_reg_reg);
8862 %}
8863 
8864 // Logical Shift Right by variable
8865 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8866 %{
8867   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8868   effect(KILL cr);
8869 
8870   format %{ "shrq    $dst, $shift" %}
8871   opcode(0xD3, 0x5); /* D3 /5 */
8872   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8873   ins_pipe(ialu_mem_reg);
8874 %}
8875 
8876 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8877 // This idiom is used by the compiler for the i2b bytecode.
8878 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8879 %{
8880   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8881 
8882   format %{ "movsbl  $dst, $src\t# i2b" %}
8883   opcode(0x0F, 0xBE);
8884   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8885   ins_pipe(ialu_reg_reg);
8886 %}
8887 
8888 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8889 // This idiom is used by the compiler the i2s bytecode.
8890 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8891 %{
8892   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8893 
8894   format %{ "movswl  $dst, $src\t# i2s" %}
8895   opcode(0x0F, 0xBF);
8896   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8897   ins_pipe(ialu_reg_reg);
8898 %}
8899 
8900 // ROL/ROR instructions
8901 
8902 // ROL expand
8903 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8904   effect(KILL cr, USE_DEF dst);
8905 
8906   format %{ "roll    $dst" %}
8907   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8908   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8909   ins_pipe(ialu_reg);
8910 %}
8911 
8912 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8913   effect(USE_DEF dst, USE shift, KILL cr);
8914 
8915   format %{ "roll    $dst, $shift" %}
8916   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8917   ins_encode( reg_opc_imm(dst, shift) );
8918   ins_pipe(ialu_reg);
8919 %}
8920 
8921 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8922 %{
8923   effect(USE_DEF dst, USE shift, KILL cr);
8924 
8925   format %{ "roll    $dst, $shift" %}
8926   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8927   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8928   ins_pipe(ialu_reg_reg);
8929 %}
8930 // end of ROL expand
8931 
8932 // Rotate Left by one
8933 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8934 %{
8935   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8936 
8937   expand %{
8938     rolI_rReg_imm1(dst, cr);
8939   %}
8940 %}
8941 
8942 // Rotate Left by 8-bit immediate
8943 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8944 %{
8945   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8946   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8947 
8948   expand %{
8949     rolI_rReg_imm8(dst, lshift, cr);
8950   %}
8951 %}
8952 
8953 // Rotate Left by variable
8954 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8955 %{
8956   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8957 
8958   expand %{
8959     rolI_rReg_CL(dst, shift, cr);
8960   %}
8961 %}
8962 
8963 // Rotate Left by variable
8964 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8965 %{
8966   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8967 
8968   expand %{
8969     rolI_rReg_CL(dst, shift, cr);
8970   %}
8971 %}
8972 
8973 // ROR expand
8974 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8975 %{
8976   effect(USE_DEF dst, KILL cr);
8977 
8978   format %{ "rorl    $dst" %}
8979   opcode(0xD1, 0x1); /* D1 /1 */
8980   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8981   ins_pipe(ialu_reg);
8982 %}
8983 
8984 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8985 %{
8986   effect(USE_DEF dst, USE shift, KILL cr);
8987 
8988   format %{ "rorl    $dst, $shift" %}
8989   opcode(0xC1, 0x1); /* C1 /1 ib */
8990   ins_encode(reg_opc_imm(dst, shift));
8991   ins_pipe(ialu_reg);
8992 %}
8993 
8994 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8995 %{
8996   effect(USE_DEF dst, USE shift, KILL cr);
8997 
8998   format %{ "rorl    $dst, $shift" %}
8999   opcode(0xD3, 0x1); /* D3 /1 */
9000   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
9001   ins_pipe(ialu_reg_reg);
9002 %}
9003 // end of ROR expand
9004 
9005 // Rotate Right by one
9006 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9007 %{
9008   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9009 
9010   expand %{
9011     rorI_rReg_imm1(dst, cr);
9012   %}
9013 %}
9014 
9015 // Rotate Right by 8-bit immediate
9016 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9017 %{
9018   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9019   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9020 
9021   expand %{
9022     rorI_rReg_imm8(dst, rshift, cr);
9023   %}
9024 %}
9025 
9026 // Rotate Right by variable
9027 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9028 %{
9029   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9030 
9031   expand %{
9032     rorI_rReg_CL(dst, shift, cr);
9033   %}
9034 %}
9035 
9036 // Rotate Right by variable
9037 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
9038 %{
9039   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9040 
9041   expand %{
9042     rorI_rReg_CL(dst, shift, cr);
9043   %}
9044 %}
9045 
9046 // for long rotate
9047 // ROL expand
9048 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
9049   effect(USE_DEF dst, KILL cr);
9050 
9051   format %{ "rolq    $dst" %}
9052   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
9053   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9054   ins_pipe(ialu_reg);
9055 %}
9056 
9057 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
9058   effect(USE_DEF dst, USE shift, KILL cr);
9059 
9060   format %{ "rolq    $dst, $shift" %}
9061   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
9062   ins_encode( reg_opc_imm_wide(dst, shift) );
9063   ins_pipe(ialu_reg);
9064 %}
9065 
9066 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9067 %{
9068   effect(USE_DEF dst, USE shift, KILL cr);
9069 
9070   format %{ "rolq    $dst, $shift" %}
9071   opcode(0xD3, 0x0); /* Opcode D3 /0 */
9072   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9073   ins_pipe(ialu_reg_reg);
9074 %}
9075 // end of ROL expand
9076 
9077 // Rotate Left by one
9078 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
9079 %{
9080   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9081 
9082   expand %{
9083     rolL_rReg_imm1(dst, cr);
9084   %}
9085 %}
9086 
9087 // Rotate Left by 8-bit immediate
9088 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9089 %{
9090   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9091   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9092 
9093   expand %{
9094     rolL_rReg_imm8(dst, lshift, cr);
9095   %}
9096 %}
9097 
9098 // Rotate Left by variable
9099 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9100 %{
9101   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9102 
9103   expand %{
9104     rolL_rReg_CL(dst, shift, cr);
9105   %}
9106 %}
9107 
9108 // Rotate Left by variable
9109 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9110 %{
9111   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9112 
9113   expand %{
9114     rolL_rReg_CL(dst, shift, cr);
9115   %}
9116 %}
9117 
9118 // ROR expand
9119 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9120 %{
9121   effect(USE_DEF dst, KILL cr);
9122 
9123   format %{ "rorq    $dst" %}
9124   opcode(0xD1, 0x1); /* D1 /1 */
9125   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9126   ins_pipe(ialu_reg);
9127 %}
9128 
9129 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9130 %{
9131   effect(USE_DEF dst, USE shift, KILL cr);
9132 
9133   format %{ "rorq    $dst, $shift" %}
9134   opcode(0xC1, 0x1); /* C1 /1 ib */
9135   ins_encode(reg_opc_imm_wide(dst, shift));
9136   ins_pipe(ialu_reg);
9137 %}
9138 
9139 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9140 %{
9141   effect(USE_DEF dst, USE shift, KILL cr);
9142 
9143   format %{ "rorq    $dst, $shift" %}
9144   opcode(0xD3, 0x1); /* D3 /1 */
9145   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9146   ins_pipe(ialu_reg_reg);
9147 %}
9148 // end of ROR expand
9149 
9150 // Rotate Right by one
9151 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9152 %{
9153   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9154 
9155   expand %{
9156     rorL_rReg_imm1(dst, cr);
9157   %}
9158 %}
9159 
9160 // Rotate Right by 8-bit immediate
9161 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9162 %{
9163   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9164   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9165 
9166   expand %{
9167     rorL_rReg_imm8(dst, rshift, cr);
9168   %}
9169 %}
9170 
9171 // Rotate Right by variable
9172 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9173 %{
9174   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9175 
9176   expand %{
9177     rorL_rReg_CL(dst, shift, cr);
9178   %}
9179 %}
9180 
9181 // Rotate Right by variable
9182 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9183 %{
9184   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9185 
9186   expand %{
9187     rorL_rReg_CL(dst, shift, cr);
9188   %}
9189 %}
9190 
9191 // Logical Instructions
9192 
9193 // Integer Logical Instructions
9194 
9195 // And Instructions
9196 // And Register with Register
9197 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9198 %{
9199   match(Set dst (AndI dst src));
9200   effect(KILL cr);
9201 
9202   format %{ "andl    $dst, $src\t# int" %}
9203   opcode(0x23);
9204   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9205   ins_pipe(ialu_reg_reg);
9206 %}
9207 
9208 // And Register with Immediate 255
9209 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9210 %{
9211   match(Set dst (AndI dst src));
9212 
9213   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9214   opcode(0x0F, 0xB6);
9215   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9216   ins_pipe(ialu_reg);
9217 %}
9218 
9219 // And Register with Immediate 255 and promote to long
9220 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9221 %{
9222   match(Set dst (ConvI2L (AndI src mask)));
9223 
9224   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9225   opcode(0x0F, 0xB6);
9226   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9227   ins_pipe(ialu_reg);
9228 %}
9229 
9230 // And Register with Immediate 65535
9231 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9232 %{
9233   match(Set dst (AndI dst src));
9234 
9235   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9236   opcode(0x0F, 0xB7);
9237   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9238   ins_pipe(ialu_reg);
9239 %}
9240 
9241 // And Register with Immediate 65535 and promote to long
9242 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9243 %{
9244   match(Set dst (ConvI2L (AndI src mask)));
9245 
9246   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9247   opcode(0x0F, 0xB7);
9248   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9249   ins_pipe(ialu_reg);
9250 %}
9251 
9252 // And Register with Immediate
9253 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9254 %{
9255   match(Set dst (AndI dst src));
9256   effect(KILL cr);
9257 
9258   format %{ "andl    $dst, $src\t# int" %}
9259   opcode(0x81, 0x04); /* Opcode 81 /4 */
9260   ins_encode(OpcSErm(dst, src), Con8or32(src));
9261   ins_pipe(ialu_reg);
9262 %}
9263 
9264 // And Register with Memory
9265 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9266 %{
9267   match(Set dst (AndI dst (LoadI src)));
9268   effect(KILL cr);
9269 
9270   ins_cost(125);
9271   format %{ "andl    $dst, $src\t# int" %}
9272   opcode(0x23);
9273   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9274   ins_pipe(ialu_reg_mem);
9275 %}
9276 
9277 // And Memory with Register
9278 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9279 %{
9280   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
9281   effect(KILL cr);
9282 
9283   ins_cost(150);
9284   format %{ "andb    $dst, $src\t# byte" %}
9285   opcode(0x20);
9286   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9287   ins_pipe(ialu_mem_reg);
9288 %}
9289 
9290 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9291 %{
9292   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9293   effect(KILL cr);
9294 
9295   ins_cost(150);
9296   format %{ "andl    $dst, $src\t# int" %}
9297   opcode(0x21); /* Opcode 21 /r */
9298   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9299   ins_pipe(ialu_mem_reg);
9300 %}
9301 
9302 // And Memory with Immediate
9303 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9304 %{
9305   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9306   effect(KILL cr);
9307 
9308   ins_cost(125);
9309   format %{ "andl    $dst, $src\t# int" %}
9310   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9311   ins_encode(REX_mem(dst), OpcSE(src),
9312              RM_opc_mem(secondary, dst), Con8or32(src));
9313   ins_pipe(ialu_mem_imm);
9314 %}
9315 
9316 // BMI1 instructions
9317 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
9318   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
9319   predicate(UseBMI1Instructions);
9320   effect(KILL cr);
9321 
9322   ins_cost(125);
9323   format %{ "andnl  $dst, $src1, $src2" %}
9324 
9325   ins_encode %{
9326     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
9327   %}
9328   ins_pipe(ialu_reg_mem);
9329 %}
9330 
9331 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
9332   match(Set dst (AndI (XorI src1 minus_1) src2));
9333   predicate(UseBMI1Instructions);
9334   effect(KILL cr);
9335 
9336   format %{ "andnl  $dst, $src1, $src2" %}
9337 
9338   ins_encode %{
9339     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
9340   %}
9341   ins_pipe(ialu_reg);
9342 %}
9343 
9344 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
9345   match(Set dst (AndI (SubI imm_zero src) src));
9346   predicate(UseBMI1Instructions);
9347   effect(KILL cr);
9348 
9349   format %{ "blsil  $dst, $src" %}
9350 
9351   ins_encode %{
9352     __ blsil($dst$$Register, $src$$Register);
9353   %}
9354   ins_pipe(ialu_reg);
9355 %}
9356 
9357 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
9358   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
9359   predicate(UseBMI1Instructions);
9360   effect(KILL cr);
9361 
9362   ins_cost(125);
9363   format %{ "blsil  $dst, $src" %}
9364 
9365   ins_encode %{
9366     __ blsil($dst$$Register, $src$$Address);
9367   %}
9368   ins_pipe(ialu_reg_mem);
9369 %}
9370 
9371 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9372 %{
9373   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
9374   predicate(UseBMI1Instructions);
9375   effect(KILL cr);
9376 
9377   ins_cost(125);
9378   format %{ "blsmskl $dst, $src" %}
9379 
9380   ins_encode %{
9381     __ blsmskl($dst$$Register, $src$$Address);
9382   %}
9383   ins_pipe(ialu_reg_mem);
9384 %}
9385 
9386 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9387 %{
9388   match(Set dst (XorI (AddI src minus_1) src));
9389   predicate(UseBMI1Instructions);
9390   effect(KILL cr);
9391 
9392   format %{ "blsmskl $dst, $src" %}
9393 
9394   ins_encode %{
9395     __ blsmskl($dst$$Register, $src$$Register);
9396   %}
9397 
9398   ins_pipe(ialu_reg);
9399 %}
9400 
9401 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9402 %{
9403   match(Set dst (AndI (AddI src minus_1) src) );
9404   predicate(UseBMI1Instructions);
9405   effect(KILL cr);
9406 
9407   format %{ "blsrl  $dst, $src" %}
9408 
9409   ins_encode %{
9410     __ blsrl($dst$$Register, $src$$Register);
9411   %}
9412 
9413   ins_pipe(ialu_reg_mem);
9414 %}
9415 
9416 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9417 %{
9418   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
9419   predicate(UseBMI1Instructions);
9420   effect(KILL cr);
9421 
9422   ins_cost(125);
9423   format %{ "blsrl  $dst, $src" %}
9424 
9425   ins_encode %{
9426     __ blsrl($dst$$Register, $src$$Address);
9427   %}
9428 
9429   ins_pipe(ialu_reg);
9430 %}
9431 
9432 // Or Instructions
9433 // Or Register with Register
9434 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9435 %{
9436   match(Set dst (OrI dst src));
9437   effect(KILL cr);
9438 
9439   format %{ "orl     $dst, $src\t# int" %}
9440   opcode(0x0B);
9441   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9442   ins_pipe(ialu_reg_reg);
9443 %}
9444 
9445 // Or Register with Immediate
9446 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9447 %{
9448   match(Set dst (OrI dst src));
9449   effect(KILL cr);
9450 
9451   format %{ "orl     $dst, $src\t# int" %}
9452   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9453   ins_encode(OpcSErm(dst, src), Con8or32(src));
9454   ins_pipe(ialu_reg);
9455 %}
9456 
9457 // Or Register with Memory
9458 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9459 %{
9460   match(Set dst (OrI dst (LoadI src)));
9461   effect(KILL cr);
9462 
9463   ins_cost(125);
9464   format %{ "orl     $dst, $src\t# int" %}
9465   opcode(0x0B);
9466   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9467   ins_pipe(ialu_reg_mem);
9468 %}
9469 
9470 // Or Memory with Register
9471 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9472 %{
9473   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
9474   effect(KILL cr);
9475 
9476   ins_cost(150);
9477   format %{ "orb    $dst, $src\t# byte" %}
9478   opcode(0x08);
9479   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9480   ins_pipe(ialu_mem_reg);
9481 %}
9482 
9483 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9484 %{
9485   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9486   effect(KILL cr);
9487 
9488   ins_cost(150);
9489   format %{ "orl     $dst, $src\t# int" %}
9490   opcode(0x09); /* Opcode 09 /r */
9491   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9492   ins_pipe(ialu_mem_reg);
9493 %}
9494 
9495 // Or Memory with Immediate
9496 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9497 %{
9498   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9499   effect(KILL cr);
9500 
9501   ins_cost(125);
9502   format %{ "orl     $dst, $src\t# int" %}
9503   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9504   ins_encode(REX_mem(dst), OpcSE(src),
9505              RM_opc_mem(secondary, dst), Con8or32(src));
9506   ins_pipe(ialu_mem_imm);
9507 %}
9508 
9509 // Xor Instructions
9510 // Xor Register with Register
9511 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9512 %{
9513   match(Set dst (XorI dst src));
9514   effect(KILL cr);
9515 
9516   format %{ "xorl    $dst, $src\t# int" %}
9517   opcode(0x33);
9518   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9519   ins_pipe(ialu_reg_reg);
9520 %}
9521 
9522 // Xor Register with Immediate -1
9523 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9524   match(Set dst (XorI dst imm));
9525 
9526   format %{ "not    $dst" %}
9527   ins_encode %{
9528      __ notl($dst$$Register);
9529   %}
9530   ins_pipe(ialu_reg);
9531 %}
9532 
9533 // Xor Register with Immediate
9534 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9535 %{
9536   match(Set dst (XorI dst src));
9537   effect(KILL cr);
9538 
9539   format %{ "xorl    $dst, $src\t# int" %}
9540   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9541   ins_encode(OpcSErm(dst, src), Con8or32(src));
9542   ins_pipe(ialu_reg);
9543 %}
9544 
9545 // Xor Register with Memory
9546 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9547 %{
9548   match(Set dst (XorI dst (LoadI src)));
9549   effect(KILL cr);
9550 
9551   ins_cost(125);
9552   format %{ "xorl    $dst, $src\t# int" %}
9553   opcode(0x33);
9554   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9555   ins_pipe(ialu_reg_mem);
9556 %}
9557 
9558 // Xor Memory with Register
9559 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9560 %{
9561   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
9562   effect(KILL cr);
9563 
9564   ins_cost(150);
9565   format %{ "xorb    $dst, $src\t# byte" %}
9566   opcode(0x30);
9567   ins_encode(REX_breg_mem(src, dst), OpcP, reg_mem(src, dst));
9568   ins_pipe(ialu_mem_reg);
9569 %}
9570 
9571 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9572 %{
9573   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9574   effect(KILL cr);
9575 
9576   ins_cost(150);
9577   format %{ "xorl    $dst, $src\t# int" %}
9578   opcode(0x31); /* Opcode 31 /r */
9579   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9580   ins_pipe(ialu_mem_reg);
9581 %}
9582 
9583 // Xor Memory with Immediate
9584 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9585 %{
9586   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9587   effect(KILL cr);
9588 
9589   ins_cost(125);
9590   format %{ "xorl    $dst, $src\t# int" %}
9591   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9592   ins_encode(REX_mem(dst), OpcSE(src),
9593              RM_opc_mem(secondary, dst), Con8or32(src));
9594   ins_pipe(ialu_mem_imm);
9595 %}
9596 
9597 
9598 // Long Logical Instructions
9599 
9600 // And Instructions
9601 // And Register with Register
9602 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9603 %{
9604   match(Set dst (AndL dst src));
9605   effect(KILL cr);
9606 
9607   format %{ "andq    $dst, $src\t# long" %}
9608   opcode(0x23);
9609   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9610   ins_pipe(ialu_reg_reg);
9611 %}
9612 
9613 // And Register with Immediate 255
9614 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9615 %{
9616   match(Set dst (AndL dst src));
9617 
9618   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9619   opcode(0x0F, 0xB6);
9620   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9621   ins_pipe(ialu_reg);
9622 %}
9623 
9624 // And Register with Immediate 65535
9625 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9626 %{
9627   match(Set dst (AndL dst src));
9628 
9629   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9630   opcode(0x0F, 0xB7);
9631   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9632   ins_pipe(ialu_reg);
9633 %}
9634 
9635 // And Register with Immediate
9636 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9637 %{
9638   match(Set dst (AndL dst src));
9639   effect(KILL cr);
9640 
9641   format %{ "andq    $dst, $src\t# long" %}
9642   opcode(0x81, 0x04); /* Opcode 81 /4 */
9643   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9644   ins_pipe(ialu_reg);
9645 %}
9646 
9647 // And Register with Memory
9648 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9649 %{
9650   match(Set dst (AndL dst (LoadL src)));
9651   effect(KILL cr);
9652 
9653   ins_cost(125);
9654   format %{ "andq    $dst, $src\t# long" %}
9655   opcode(0x23);
9656   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9657   ins_pipe(ialu_reg_mem);
9658 %}
9659 
9660 // And Memory with Register
9661 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9662 %{
9663   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9664   effect(KILL cr);
9665 
9666   ins_cost(150);
9667   format %{ "andq    $dst, $src\t# long" %}
9668   opcode(0x21); /* Opcode 21 /r */
9669   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9670   ins_pipe(ialu_mem_reg);
9671 %}
9672 
9673 // And Memory with Immediate
9674 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9675 %{
9676   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9677   effect(KILL cr);
9678 
9679   ins_cost(125);
9680   format %{ "andq    $dst, $src\t# long" %}
9681   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9682   ins_encode(REX_mem_wide(dst), OpcSE(src),
9683              RM_opc_mem(secondary, dst), Con8or32(src));
9684   ins_pipe(ialu_mem_imm);
9685 %}
9686 
9687 // BMI1 instructions
9688 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
9689   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
9690   predicate(UseBMI1Instructions);
9691   effect(KILL cr);
9692 
9693   ins_cost(125);
9694   format %{ "andnq  $dst, $src1, $src2" %}
9695 
9696   ins_encode %{
9697     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
9698   %}
9699   ins_pipe(ialu_reg_mem);
9700 %}
9701 
9702 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
9703   match(Set dst (AndL (XorL src1 minus_1) src2));
9704   predicate(UseBMI1Instructions);
9705   effect(KILL cr);
9706 
9707   format %{ "andnq  $dst, $src1, $src2" %}
9708 
9709   ins_encode %{
9710   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
9711   %}
9712   ins_pipe(ialu_reg_mem);
9713 %}
9714 
9715 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
9716   match(Set dst (AndL (SubL imm_zero src) src));
9717   predicate(UseBMI1Instructions);
9718   effect(KILL cr);
9719 
9720   format %{ "blsiq  $dst, $src" %}
9721 
9722   ins_encode %{
9723     __ blsiq($dst$$Register, $src$$Register);
9724   %}
9725   ins_pipe(ialu_reg);
9726 %}
9727 
9728 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
9729   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9730   predicate(UseBMI1Instructions);
9731   effect(KILL cr);
9732 
9733   ins_cost(125);
9734   format %{ "blsiq  $dst, $src" %}
9735 
9736   ins_encode %{
9737     __ blsiq($dst$$Register, $src$$Address);
9738   %}
9739   ins_pipe(ialu_reg_mem);
9740 %}
9741 
9742 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
9743 %{
9744   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
9745   predicate(UseBMI1Instructions);
9746   effect(KILL cr);
9747 
9748   ins_cost(125);
9749   format %{ "blsmskq $dst, $src" %}
9750 
9751   ins_encode %{
9752     __ blsmskq($dst$$Register, $src$$Address);
9753   %}
9754   ins_pipe(ialu_reg_mem);
9755 %}
9756 
9757 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
9758 %{
9759   match(Set dst (XorL (AddL src minus_1) src));
9760   predicate(UseBMI1Instructions);
9761   effect(KILL cr);
9762 
9763   format %{ "blsmskq $dst, $src" %}
9764 
9765   ins_encode %{
9766     __ blsmskq($dst$$Register, $src$$Register);
9767   %}
9768 
9769   ins_pipe(ialu_reg);
9770 %}
9771 
9772 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
9773 %{
9774   match(Set dst (AndL (AddL src minus_1) src) );
9775   predicate(UseBMI1Instructions);
9776   effect(KILL cr);
9777 
9778   format %{ "blsrq  $dst, $src" %}
9779 
9780   ins_encode %{
9781     __ blsrq($dst$$Register, $src$$Register);
9782   %}
9783 
9784   ins_pipe(ialu_reg);
9785 %}
9786 
9787 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
9788 %{
9789   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
9790   predicate(UseBMI1Instructions);
9791   effect(KILL cr);
9792 
9793   ins_cost(125);
9794   format %{ "blsrq  $dst, $src" %}
9795 
9796   ins_encode %{
9797     __ blsrq($dst$$Register, $src$$Address);
9798   %}
9799 
9800   ins_pipe(ialu_reg);
9801 %}
9802 
9803 // Or Instructions
9804 // Or Register with Register
9805 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9806 %{
9807   match(Set dst (OrL dst src));
9808   effect(KILL cr);
9809 
9810   format %{ "orq     $dst, $src\t# long" %}
9811   opcode(0x0B);
9812   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9813   ins_pipe(ialu_reg_reg);
9814 %}
9815 
9816 // Use any_RegP to match R15 (TLS register) without spilling.
9817 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9818   match(Set dst (OrL dst (CastP2X src)));
9819   effect(KILL cr);
9820 
9821   format %{ "orq     $dst, $src\t# long" %}
9822   opcode(0x0B);
9823   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9824   ins_pipe(ialu_reg_reg);
9825 %}
9826 
9827 
9828 // Or Register with Immediate
9829 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9830 %{
9831   match(Set dst (OrL dst src));
9832   effect(KILL cr);
9833 
9834   format %{ "orq     $dst, $src\t# long" %}
9835   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9836   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9837   ins_pipe(ialu_reg);
9838 %}
9839 
9840 // Or Register with Memory
9841 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9842 %{
9843   match(Set dst (OrL dst (LoadL src)));
9844   effect(KILL cr);
9845 
9846   ins_cost(125);
9847   format %{ "orq     $dst, $src\t# long" %}
9848   opcode(0x0B);
9849   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9850   ins_pipe(ialu_reg_mem);
9851 %}
9852 
9853 // Or Memory with Register
9854 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9855 %{
9856   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9857   effect(KILL cr);
9858 
9859   ins_cost(150);
9860   format %{ "orq     $dst, $src\t# long" %}
9861   opcode(0x09); /* Opcode 09 /r */
9862   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9863   ins_pipe(ialu_mem_reg);
9864 %}
9865 
9866 // Or Memory with Immediate
9867 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9868 %{
9869   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9870   effect(KILL cr);
9871 
9872   ins_cost(125);
9873   format %{ "orq     $dst, $src\t# long" %}
9874   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9875   ins_encode(REX_mem_wide(dst), OpcSE(src),
9876              RM_opc_mem(secondary, dst), Con8or32(src));
9877   ins_pipe(ialu_mem_imm);
9878 %}
9879 
9880 // Xor Instructions
9881 // Xor Register with Register
9882 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9883 %{
9884   match(Set dst (XorL dst src));
9885   effect(KILL cr);
9886 
9887   format %{ "xorq    $dst, $src\t# long" %}
9888   opcode(0x33);
9889   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9890   ins_pipe(ialu_reg_reg);
9891 %}
9892 
9893 // Xor Register with Immediate -1
9894 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9895   match(Set dst (XorL dst imm));
9896 
9897   format %{ "notq   $dst" %}
9898   ins_encode %{
9899      __ notq($dst$$Register);
9900   %}
9901   ins_pipe(ialu_reg);
9902 %}
9903 
9904 // Xor Register with Immediate
9905 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9906 %{
9907   match(Set dst (XorL dst src));
9908   effect(KILL cr);
9909 
9910   format %{ "xorq    $dst, $src\t# long" %}
9911   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9912   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9913   ins_pipe(ialu_reg);
9914 %}
9915 
9916 // Xor Register with Memory
9917 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9918 %{
9919   match(Set dst (XorL dst (LoadL src)));
9920   effect(KILL cr);
9921 
9922   ins_cost(125);
9923   format %{ "xorq    $dst, $src\t# long" %}
9924   opcode(0x33);
9925   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9926   ins_pipe(ialu_reg_mem);
9927 %}
9928 
9929 // Xor Memory with Register
9930 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9931 %{
9932   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9933   effect(KILL cr);
9934 
9935   ins_cost(150);
9936   format %{ "xorq    $dst, $src\t# long" %}
9937   opcode(0x31); /* Opcode 31 /r */
9938   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9939   ins_pipe(ialu_mem_reg);
9940 %}
9941 
9942 // Xor Memory with Immediate
9943 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9944 %{
9945   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9946   effect(KILL cr);
9947 
9948   ins_cost(125);
9949   format %{ "xorq    $dst, $src\t# long" %}
9950   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9951   ins_encode(REX_mem_wide(dst), OpcSE(src),
9952              RM_opc_mem(secondary, dst), Con8or32(src));
9953   ins_pipe(ialu_mem_imm);
9954 %}
9955 
9956 // Convert Int to Boolean
9957 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9958 %{
9959   match(Set dst (Conv2B src));
9960   effect(KILL cr);
9961 
9962   format %{ "testl   $src, $src\t# ci2b\n\t"
9963             "setnz   $dst\n\t"
9964             "movzbl  $dst, $dst" %}
9965   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9966              setNZ_reg(dst),
9967              REX_reg_breg(dst, dst), // movzbl
9968              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9969   ins_pipe(pipe_slow); // XXX
9970 %}
9971 
9972 // Convert Pointer to Boolean
9973 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9974 %{
9975   match(Set dst (Conv2B src));
9976   effect(KILL cr);
9977 
9978   format %{ "testq   $src, $src\t# cp2b\n\t"
9979             "setnz   $dst\n\t"
9980             "movzbl  $dst, $dst" %}
9981   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9982              setNZ_reg(dst),
9983              REX_reg_breg(dst, dst), // movzbl
9984              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9985   ins_pipe(pipe_slow); // XXX
9986 %}
9987 
9988 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9989 %{
9990   match(Set dst (CmpLTMask p q));
9991   effect(KILL cr);
9992 
9993   ins_cost(400);
9994   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9995             "setlt   $dst\n\t"
9996             "movzbl  $dst, $dst\n\t"
9997             "negl    $dst" %}
9998   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9999              setLT_reg(dst),
10000              REX_reg_breg(dst, dst), // movzbl
10001              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
10002              neg_reg(dst));
10003   ins_pipe(pipe_slow);
10004 %}
10005 
10006 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
10007 %{
10008   match(Set dst (CmpLTMask dst zero));
10009   effect(KILL cr);
10010 
10011   ins_cost(100);
10012   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10013   ins_encode %{
10014   __ sarl($dst$$Register, 31);
10015   %}
10016   ins_pipe(ialu_reg);
10017 %}
10018 
10019 /* Better to save a register than avoid a branch */
10020 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10021 %{
10022   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10023   effect(KILL cr);
10024   ins_cost(300);
10025   format %{ "subl   $p,$q\t# cadd_cmpLTMask\n\t"
10026             "jge    done\n\t"
10027             "addl   $p,$y\n"
10028             "done:  " %}
10029   ins_encode %{
10030     Register Rp = $p$$Register;
10031     Register Rq = $q$$Register;
10032     Register Ry = $y$$Register;
10033     Label done;
10034     __ subl(Rp, Rq);
10035     __ jccb(Assembler::greaterEqual, done);
10036     __ addl(Rp, Ry);
10037     __ bind(done);
10038   %}
10039   ins_pipe(pipe_cmplt);
10040 %}
10041 
10042 /* Better to save a register than avoid a branch */
10043 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10044 %{
10045   match(Set y (AndI (CmpLTMask p q) y));
10046   effect(KILL cr);
10047 
10048   ins_cost(300);
10049 
10050   format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
10051             "jlt      done\n\t"
10052             "xorl     $y, $y\n"
10053             "done:  " %}
10054   ins_encode %{
10055     Register Rp = $p$$Register;
10056     Register Rq = $q$$Register;
10057     Register Ry = $y$$Register;
10058     Label done;
10059     __ cmpl(Rp, Rq);
10060     __ jccb(Assembler::less, done);
10061     __ xorl(Ry, Ry);
10062     __ bind(done);
10063   %}
10064   ins_pipe(pipe_cmplt);
10065 %}
10066 
10067 
10068 //---------- FP Instructions------------------------------------------------
10069 
10070 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10071 %{
10072   match(Set cr (CmpF src1 src2));
10073 
10074   ins_cost(145);
10075   format %{ "ucomiss $src1, $src2\n\t"
10076             "jnp,s   exit\n\t"
10077             "pushfq\t# saw NaN, set CF\n\t"
10078             "andq    [rsp], #0xffffff2b\n\t"
10079             "popfq\n"
10080     "exit:" %}
10081   ins_encode %{
10082     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10083     emit_cmpfp_fixup(_masm);
10084   %}
10085   ins_pipe(pipe_slow);
10086 %}
10087 
10088 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10089   match(Set cr (CmpF src1 src2));
10090 
10091   ins_cost(100);
10092   format %{ "ucomiss $src1, $src2" %}
10093   ins_encode %{
10094     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10095   %}
10096   ins_pipe(pipe_slow);
10097 %}
10098 
10099 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
10100 %{
10101   match(Set cr (CmpF src1 (LoadF src2)));
10102 
10103   ins_cost(145);
10104   format %{ "ucomiss $src1, $src2\n\t"
10105             "jnp,s   exit\n\t"
10106             "pushfq\t# saw NaN, set CF\n\t"
10107             "andq    [rsp], #0xffffff2b\n\t"
10108             "popfq\n"
10109     "exit:" %}
10110   ins_encode %{
10111     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10112     emit_cmpfp_fixup(_masm);
10113   %}
10114   ins_pipe(pipe_slow);
10115 %}
10116 
10117 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10118   match(Set cr (CmpF src1 (LoadF src2)));
10119 
10120   ins_cost(100);
10121   format %{ "ucomiss $src1, $src2" %}
10122   ins_encode %{
10123     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10124   %}
10125   ins_pipe(pipe_slow);
10126 %}
10127 
10128 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10129   match(Set cr (CmpF src con));
10130 
10131   ins_cost(145);
10132   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10133             "jnp,s   exit\n\t"
10134             "pushfq\t# saw NaN, set CF\n\t"
10135             "andq    [rsp], #0xffffff2b\n\t"
10136             "popfq\n"
10137     "exit:" %}
10138   ins_encode %{
10139     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10140     emit_cmpfp_fixup(_masm);
10141   %}
10142   ins_pipe(pipe_slow);
10143 %}
10144 
10145 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10146   match(Set cr (CmpF src con));
10147   ins_cost(100);
10148   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10149   ins_encode %{
10150     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10151   %}
10152   ins_pipe(pipe_slow);
10153 %}
10154 
10155 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10156 %{
10157   match(Set cr (CmpD src1 src2));
10158 
10159   ins_cost(145);
10160   format %{ "ucomisd $src1, $src2\n\t"
10161             "jnp,s   exit\n\t"
10162             "pushfq\t# saw NaN, set CF\n\t"
10163             "andq    [rsp], #0xffffff2b\n\t"
10164             "popfq\n"
10165     "exit:" %}
10166   ins_encode %{
10167     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10168     emit_cmpfp_fixup(_masm);
10169   %}
10170   ins_pipe(pipe_slow);
10171 %}
10172 
10173 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10174   match(Set cr (CmpD src1 src2));
10175 
10176   ins_cost(100);
10177   format %{ "ucomisd $src1, $src2 test" %}
10178   ins_encode %{
10179     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10180   %}
10181   ins_pipe(pipe_slow);
10182 %}
10183 
10184 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10185 %{
10186   match(Set cr (CmpD src1 (LoadD src2)));
10187 
10188   ins_cost(145);
10189   format %{ "ucomisd $src1, $src2\n\t"
10190             "jnp,s   exit\n\t"
10191             "pushfq\t# saw NaN, set CF\n\t"
10192             "andq    [rsp], #0xffffff2b\n\t"
10193             "popfq\n"
10194     "exit:" %}
10195   ins_encode %{
10196     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10197     emit_cmpfp_fixup(_masm);
10198   %}
10199   ins_pipe(pipe_slow);
10200 %}
10201 
10202 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10203   match(Set cr (CmpD src1 (LoadD src2)));
10204 
10205   ins_cost(100);
10206   format %{ "ucomisd $src1, $src2" %}
10207   ins_encode %{
10208     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10209   %}
10210   ins_pipe(pipe_slow);
10211 %}
10212 
10213 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10214   match(Set cr (CmpD src con));
10215 
10216   ins_cost(145);
10217   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10218             "jnp,s   exit\n\t"
10219             "pushfq\t# saw NaN, set CF\n\t"
10220             "andq    [rsp], #0xffffff2b\n\t"
10221             "popfq\n"
10222     "exit:" %}
10223   ins_encode %{
10224     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10225     emit_cmpfp_fixup(_masm);
10226   %}
10227   ins_pipe(pipe_slow);
10228 %}
10229 
10230 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10231   match(Set cr (CmpD src con));
10232   ins_cost(100);
10233   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10234   ins_encode %{
10235     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10236   %}
10237   ins_pipe(pipe_slow);
10238 %}
10239 
10240 // Compare into -1,0,1
10241 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10242 %{
10243   match(Set dst (CmpF3 src1 src2));
10244   effect(KILL cr);
10245 
10246   ins_cost(275);
10247   format %{ "ucomiss $src1, $src2\n\t"
10248             "movl    $dst, #-1\n\t"
10249             "jp,s    done\n\t"
10250             "jb,s    done\n\t"
10251             "setne   $dst\n\t"
10252             "movzbl  $dst, $dst\n"
10253     "done:" %}
10254   ins_encode %{
10255     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10256     emit_cmpfp3(_masm, $dst$$Register);
10257   %}
10258   ins_pipe(pipe_slow);
10259 %}
10260 
10261 // Compare into -1,0,1
10262 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10263 %{
10264   match(Set dst (CmpF3 src1 (LoadF src2)));
10265   effect(KILL cr);
10266 
10267   ins_cost(275);
10268   format %{ "ucomiss $src1, $src2\n\t"
10269             "movl    $dst, #-1\n\t"
10270             "jp,s    done\n\t"
10271             "jb,s    done\n\t"
10272             "setne   $dst\n\t"
10273             "movzbl  $dst, $dst\n"
10274     "done:" %}
10275   ins_encode %{
10276     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10277     emit_cmpfp3(_masm, $dst$$Register);
10278   %}
10279   ins_pipe(pipe_slow);
10280 %}
10281 
10282 // Compare into -1,0,1
10283 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10284   match(Set dst (CmpF3 src con));
10285   effect(KILL cr);
10286 
10287   ins_cost(275);
10288   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10289             "movl    $dst, #-1\n\t"
10290             "jp,s    done\n\t"
10291             "jb,s    done\n\t"
10292             "setne   $dst\n\t"
10293             "movzbl  $dst, $dst\n"
10294     "done:" %}
10295   ins_encode %{
10296     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10297     emit_cmpfp3(_masm, $dst$$Register);
10298   %}
10299   ins_pipe(pipe_slow);
10300 %}
10301 
10302 // Compare into -1,0,1
10303 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10304 %{
10305   match(Set dst (CmpD3 src1 src2));
10306   effect(KILL cr);
10307 
10308   ins_cost(275);
10309   format %{ "ucomisd $src1, $src2\n\t"
10310             "movl    $dst, #-1\n\t"
10311             "jp,s    done\n\t"
10312             "jb,s    done\n\t"
10313             "setne   $dst\n\t"
10314             "movzbl  $dst, $dst\n"
10315     "done:" %}
10316   ins_encode %{
10317     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10318     emit_cmpfp3(_masm, $dst$$Register);
10319   %}
10320   ins_pipe(pipe_slow);
10321 %}
10322 
10323 // Compare into -1,0,1
10324 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10325 %{
10326   match(Set dst (CmpD3 src1 (LoadD src2)));
10327   effect(KILL cr);
10328 
10329   ins_cost(275);
10330   format %{ "ucomisd $src1, $src2\n\t"
10331             "movl    $dst, #-1\n\t"
10332             "jp,s    done\n\t"
10333             "jb,s    done\n\t"
10334             "setne   $dst\n\t"
10335             "movzbl  $dst, $dst\n"
10336     "done:" %}
10337   ins_encode %{
10338     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10339     emit_cmpfp3(_masm, $dst$$Register);
10340   %}
10341   ins_pipe(pipe_slow);
10342 %}
10343 
10344 // Compare into -1,0,1
10345 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10346   match(Set dst (CmpD3 src con));
10347   effect(KILL cr);
10348 
10349   ins_cost(275);
10350   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10351             "movl    $dst, #-1\n\t"
10352             "jp,s    done\n\t"
10353             "jb,s    done\n\t"
10354             "setne   $dst\n\t"
10355             "movzbl  $dst, $dst\n"
10356     "done:" %}
10357   ins_encode %{
10358     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10359     emit_cmpfp3(_masm, $dst$$Register);
10360   %}
10361   ins_pipe(pipe_slow);
10362 %}
10363 
10364 //----------Arithmetic Conversion Instructions---------------------------------
10365 
10366 instruct roundFloat_nop(regF dst)
10367 %{
10368   match(Set dst (RoundFloat dst));
10369 
10370   ins_cost(0);
10371   ins_encode();
10372   ins_pipe(empty);
10373 %}
10374 
10375 instruct roundDouble_nop(regD dst)
10376 %{
10377   match(Set dst (RoundDouble dst));
10378 
10379   ins_cost(0);
10380   ins_encode();
10381   ins_pipe(empty);
10382 %}
10383 
10384 instruct convF2D_reg_reg(regD dst, regF src)
10385 %{
10386   match(Set dst (ConvF2D src));
10387 
10388   format %{ "cvtss2sd $dst, $src" %}
10389   ins_encode %{
10390     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10391   %}
10392   ins_pipe(pipe_slow); // XXX
10393 %}
10394 
10395 instruct convF2D_reg_mem(regD dst, memory src)
10396 %{
10397   match(Set dst (ConvF2D (LoadF src)));
10398 
10399   format %{ "cvtss2sd $dst, $src" %}
10400   ins_encode %{
10401     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
10402   %}
10403   ins_pipe(pipe_slow); // XXX
10404 %}
10405 
10406 instruct convD2F_reg_reg(regF dst, regD src)
10407 %{
10408   match(Set dst (ConvD2F src));
10409 
10410   format %{ "cvtsd2ss $dst, $src" %}
10411   ins_encode %{
10412     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10413   %}
10414   ins_pipe(pipe_slow); // XXX
10415 %}
10416 
10417 instruct convD2F_reg_mem(regF dst, memory src)
10418 %{
10419   match(Set dst (ConvD2F (LoadD src)));
10420 
10421   format %{ "cvtsd2ss $dst, $src" %}
10422   ins_encode %{
10423     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
10424   %}
10425   ins_pipe(pipe_slow); // XXX
10426 %}
10427 
10428 // XXX do mem variants
10429 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10430 %{
10431   match(Set dst (ConvF2I src));
10432   effect(KILL cr);
10433 
10434   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10435             "cmpl    $dst, #0x80000000\n\t"
10436             "jne,s   done\n\t"
10437             "subq    rsp, #8\n\t"
10438             "movss   [rsp], $src\n\t"
10439             "call    f2i_fixup\n\t"
10440             "popq    $dst\n"
10441     "done:   "%}
10442   ins_encode %{
10443     Label done;
10444     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10445     __ cmpl($dst$$Register, 0x80000000);
10446     __ jccb(Assembler::notEqual, done);
10447     __ subptr(rsp, 8);
10448     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10449     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
10450     __ pop($dst$$Register);
10451     __ bind(done);
10452   %}
10453   ins_pipe(pipe_slow);
10454 %}
10455 
10456 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10457 %{
10458   match(Set dst (ConvF2L src));
10459   effect(KILL cr);
10460 
10461   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10462             "cmpq    $dst, [0x8000000000000000]\n\t"
10463             "jne,s   done\n\t"
10464             "subq    rsp, #8\n\t"
10465             "movss   [rsp], $src\n\t"
10466             "call    f2l_fixup\n\t"
10467             "popq    $dst\n"
10468     "done:   "%}
10469   ins_encode %{
10470     Label done;
10471     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
10472     __ cmp64($dst$$Register,
10473              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10474     __ jccb(Assembler::notEqual, done);
10475     __ subptr(rsp, 8);
10476     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10477     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
10478     __ pop($dst$$Register);
10479     __ bind(done);
10480   %}
10481   ins_pipe(pipe_slow);
10482 %}
10483 
10484 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10485 %{
10486   match(Set dst (ConvD2I src));
10487   effect(KILL cr);
10488 
10489   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10490             "cmpl    $dst, #0x80000000\n\t"
10491             "jne,s   done\n\t"
10492             "subq    rsp, #8\n\t"
10493             "movsd   [rsp], $src\n\t"
10494             "call    d2i_fixup\n\t"
10495             "popq    $dst\n"
10496     "done:   "%}
10497   ins_encode %{
10498     Label done;
10499     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10500     __ cmpl($dst$$Register, 0x80000000);
10501     __ jccb(Assembler::notEqual, done);
10502     __ subptr(rsp, 8);
10503     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10504     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
10505     __ pop($dst$$Register);
10506     __ bind(done);
10507   %}
10508   ins_pipe(pipe_slow);
10509 %}
10510 
10511 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10512 %{
10513   match(Set dst (ConvD2L src));
10514   effect(KILL cr);
10515 
10516   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10517             "cmpq    $dst, [0x8000000000000000]\n\t"
10518             "jne,s   done\n\t"
10519             "subq    rsp, #8\n\t"
10520             "movsd   [rsp], $src\n\t"
10521             "call    d2l_fixup\n\t"
10522             "popq    $dst\n"
10523     "done:   "%}
10524   ins_encode %{
10525     Label done;
10526     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
10527     __ cmp64($dst$$Register,
10528              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10529     __ jccb(Assembler::notEqual, done);
10530     __ subptr(rsp, 8);
10531     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10532     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
10533     __ pop($dst$$Register);
10534     __ bind(done);
10535   %}
10536   ins_pipe(pipe_slow);
10537 %}
10538 
10539 instruct convI2F_reg_reg(regF dst, rRegI src)
10540 %{
10541   predicate(!UseXmmI2F);
10542   match(Set dst (ConvI2F src));
10543 
10544   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10545   ins_encode %{
10546     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10547   %}
10548   ins_pipe(pipe_slow); // XXX
10549 %}
10550 
10551 instruct convI2F_reg_mem(regF dst, memory src)
10552 %{
10553   match(Set dst (ConvI2F (LoadI src)));
10554 
10555   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10556   ins_encode %{
10557     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10558   %}
10559   ins_pipe(pipe_slow); // XXX
10560 %}
10561 
10562 instruct convI2D_reg_reg(regD dst, rRegI src)
10563 %{
10564   predicate(!UseXmmI2D);
10565   match(Set dst (ConvI2D src));
10566 
10567   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10568   ins_encode %{
10569     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10570   %}
10571   ins_pipe(pipe_slow); // XXX
10572 %}
10573 
10574 instruct convI2D_reg_mem(regD dst, memory src)
10575 %{
10576   match(Set dst (ConvI2D (LoadI src)));
10577 
10578   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10579   ins_encode %{
10580     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10581   %}
10582   ins_pipe(pipe_slow); // XXX
10583 %}
10584 
10585 instruct convXI2F_reg(regF dst, rRegI src)
10586 %{
10587   predicate(UseXmmI2F);
10588   match(Set dst (ConvI2F src));
10589 
10590   format %{ "movdl $dst, $src\n\t"
10591             "cvtdq2psl $dst, $dst\t# i2f" %}
10592   ins_encode %{
10593     __ movdl($dst$$XMMRegister, $src$$Register);
10594     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10595   %}
10596   ins_pipe(pipe_slow); // XXX
10597 %}
10598 
10599 instruct convXI2D_reg(regD dst, rRegI src)
10600 %{
10601   predicate(UseXmmI2D);
10602   match(Set dst (ConvI2D src));
10603 
10604   format %{ "movdl $dst, $src\n\t"
10605             "cvtdq2pdl $dst, $dst\t# i2d" %}
10606   ins_encode %{
10607     __ movdl($dst$$XMMRegister, $src$$Register);
10608     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10609   %}
10610   ins_pipe(pipe_slow); // XXX
10611 %}
10612 
10613 instruct convL2F_reg_reg(regF dst, rRegL src)
10614 %{
10615   match(Set dst (ConvL2F src));
10616 
10617   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10618   ins_encode %{
10619     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10620   %}
10621   ins_pipe(pipe_slow); // XXX
10622 %}
10623 
10624 instruct convL2F_reg_mem(regF dst, memory src)
10625 %{
10626   match(Set dst (ConvL2F (LoadL src)));
10627 
10628   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10629   ins_encode %{
10630     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10631   %}
10632   ins_pipe(pipe_slow); // XXX
10633 %}
10634 
10635 instruct convL2D_reg_reg(regD dst, rRegL src)
10636 %{
10637   match(Set dst (ConvL2D src));
10638 
10639   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10640   ins_encode %{
10641     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10642   %}
10643   ins_pipe(pipe_slow); // XXX
10644 %}
10645 
10646 instruct convL2D_reg_mem(regD dst, memory src)
10647 %{
10648   match(Set dst (ConvL2D (LoadL src)));
10649 
10650   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10651   ins_encode %{
10652     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10653   %}
10654   ins_pipe(pipe_slow); // XXX
10655 %}
10656 
10657 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10658 %{
10659   match(Set dst (ConvI2L src));
10660 
10661   ins_cost(125);
10662   format %{ "movslq  $dst, $src\t# i2l" %}
10663   ins_encode %{
10664     __ movslq($dst$$Register, $src$$Register);
10665   %}
10666   ins_pipe(ialu_reg_reg);
10667 %}
10668 
10669 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10670 // %{
10671 //   match(Set dst (ConvI2L src));
10672 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10673 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10674 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10675 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10676 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10677 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10678 
10679 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10680 //   ins_encode(enc_copy(dst, src));
10681 // //   opcode(0x63); // needs REX.W
10682 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10683 //   ins_pipe(ialu_reg_reg);
10684 // %}
10685 
10686 // Zero-extend convert int to long
10687 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10688 %{
10689   match(Set dst (AndL (ConvI2L src) mask));
10690 
10691   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10692   ins_encode %{
10693     if ($dst$$reg != $src$$reg) {
10694       __ movl($dst$$Register, $src$$Register);
10695     }
10696   %}
10697   ins_pipe(ialu_reg_reg);
10698 %}
10699 
10700 // Zero-extend convert int to long
10701 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10702 %{
10703   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10704 
10705   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10706   ins_encode %{
10707     __ movl($dst$$Register, $src$$Address);
10708   %}
10709   ins_pipe(ialu_reg_mem);
10710 %}
10711 
10712 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10713 %{
10714   match(Set dst (AndL src mask));
10715 
10716   format %{ "movl    $dst, $src\t# zero-extend long" %}
10717   ins_encode %{
10718     __ movl($dst$$Register, $src$$Register);
10719   %}
10720   ins_pipe(ialu_reg_reg);
10721 %}
10722 
10723 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10724 %{
10725   match(Set dst (ConvL2I src));
10726 
10727   format %{ "movl    $dst, $src\t# l2i" %}
10728   ins_encode %{
10729     __ movl($dst$$Register, $src$$Register);
10730   %}
10731   ins_pipe(ialu_reg_reg);
10732 %}
10733 
10734 
10735 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10736   match(Set dst (MoveF2I src));
10737   effect(DEF dst, USE src);
10738 
10739   ins_cost(125);
10740   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10741   ins_encode %{
10742     __ movl($dst$$Register, Address(rsp, $src$$disp));
10743   %}
10744   ins_pipe(ialu_reg_mem);
10745 %}
10746 
10747 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10748   match(Set dst (MoveI2F src));
10749   effect(DEF dst, USE src);
10750 
10751   ins_cost(125);
10752   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10753   ins_encode %{
10754     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10755   %}
10756   ins_pipe(pipe_slow);
10757 %}
10758 
10759 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10760   match(Set dst (MoveD2L src));
10761   effect(DEF dst, USE src);
10762 
10763   ins_cost(125);
10764   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10765   ins_encode %{
10766     __ movq($dst$$Register, Address(rsp, $src$$disp));
10767   %}
10768   ins_pipe(ialu_reg_mem);
10769 %}
10770 
10771 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10772   predicate(!UseXmmLoadAndClearUpper);
10773   match(Set dst (MoveL2D src));
10774   effect(DEF dst, USE src);
10775 
10776   ins_cost(125);
10777   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10778   ins_encode %{
10779     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10780   %}
10781   ins_pipe(pipe_slow);
10782 %}
10783 
10784 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10785   predicate(UseXmmLoadAndClearUpper);
10786   match(Set dst (MoveL2D src));
10787   effect(DEF dst, USE src);
10788 
10789   ins_cost(125);
10790   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10791   ins_encode %{
10792     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10793   %}
10794   ins_pipe(pipe_slow);
10795 %}
10796 
10797 
10798 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10799   match(Set dst (MoveF2I src));
10800   effect(DEF dst, USE src);
10801 
10802   ins_cost(95); // XXX
10803   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10804   ins_encode %{
10805     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10806   %}
10807   ins_pipe(pipe_slow);
10808 %}
10809 
10810 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10811   match(Set dst (MoveI2F src));
10812   effect(DEF dst, USE src);
10813 
10814   ins_cost(100);
10815   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10816   ins_encode %{
10817     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10818   %}
10819   ins_pipe( ialu_mem_reg );
10820 %}
10821 
10822 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10823   match(Set dst (MoveD2L src));
10824   effect(DEF dst, USE src);
10825 
10826   ins_cost(95); // XXX
10827   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10828   ins_encode %{
10829     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10830   %}
10831   ins_pipe(pipe_slow);
10832 %}
10833 
10834 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10835   match(Set dst (MoveL2D src));
10836   effect(DEF dst, USE src);
10837 
10838   ins_cost(100);
10839   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10840   ins_encode %{
10841     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10842   %}
10843   ins_pipe(ialu_mem_reg);
10844 %}
10845 
10846 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10847   match(Set dst (MoveF2I src));
10848   effect(DEF dst, USE src);
10849   ins_cost(85);
10850   format %{ "movd    $dst,$src\t# MoveF2I" %}
10851   ins_encode %{
10852     __ movdl($dst$$Register, $src$$XMMRegister);
10853   %}
10854   ins_pipe( pipe_slow );
10855 %}
10856 
10857 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10858   match(Set dst (MoveD2L src));
10859   effect(DEF dst, USE src);
10860   ins_cost(85);
10861   format %{ "movd    $dst,$src\t# MoveD2L" %}
10862   ins_encode %{
10863     __ movdq($dst$$Register, $src$$XMMRegister);
10864   %}
10865   ins_pipe( pipe_slow );
10866 %}
10867 
10868 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10869   match(Set dst (MoveI2F src));
10870   effect(DEF dst, USE src);
10871   ins_cost(100);
10872   format %{ "movd    $dst,$src\t# MoveI2F" %}
10873   ins_encode %{
10874     __ movdl($dst$$XMMRegister, $src$$Register);
10875   %}
10876   ins_pipe( pipe_slow );
10877 %}
10878 
10879 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10880   match(Set dst (MoveL2D src));
10881   effect(DEF dst, USE src);
10882   ins_cost(100);
10883   format %{ "movd    $dst,$src\t# MoveL2D" %}
10884   ins_encode %{
10885      __ movdq($dst$$XMMRegister, $src$$Register);
10886   %}
10887   ins_pipe( pipe_slow );
10888 %}
10889 
10890 
10891 // =======================================================================
10892 // fast clearing of an array
10893 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10894                   Universe dummy, rFlagsReg cr)
10895 %{
10896   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
10897   match(Set dummy (ClearArray (Binary cnt base) val));
10898   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10899 
10900   format %{ $$template
10901     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10902     $$emit$$"jg      LARGE\n\t"
10903     $$emit$$"dec     rcx\n\t"
10904     $$emit$$"js      DONE\t# Zero length\n\t"
10905     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10906     $$emit$$"dec     rcx\n\t"
10907     $$emit$$"jge     LOOP\n\t"
10908     $$emit$$"jmp     DONE\n\t"
10909     $$emit$$"# LARGE:\n\t"
10910     if (UseFastStosb) {
10911        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10912        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10913     } else if (UseXMMForObjInit) {
10914        $$emit$$"movdq   $tmp, $val\n\t"
10915        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10916        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10917        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10918        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10919        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10920        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10921        $$emit$$"add     0x40,rax\n\t"
10922        $$emit$$"# L_zero_64_bytes:\n\t"
10923        $$emit$$"sub     0x8,rcx\n\t"
10924        $$emit$$"jge     L_loop\n\t"
10925        $$emit$$"add     0x4,rcx\n\t"
10926        $$emit$$"jl      L_tail\n\t"
10927        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10928        $$emit$$"add     0x20,rax\n\t"
10929        $$emit$$"sub     0x4,rcx\n\t"
10930        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10931        $$emit$$"add     0x4,rcx\n\t"
10932        $$emit$$"jle     L_end\n\t"
10933        $$emit$$"dec     rcx\n\t"
10934        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10935        $$emit$$"vmovq   xmm0,(rax)\n\t"
10936        $$emit$$"add     0x8,rax\n\t"
10937        $$emit$$"dec     rcx\n\t"
10938        $$emit$$"jge     L_sloop\n\t"
10939        $$emit$$"# L_end:\n\t"
10940     } else {
10941        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10942     }
10943     $$emit$$"# DONE"
10944   %}
10945   ins_encode %{
10946     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10947                  $tmp$$XMMRegister, false, false);
10948   %}
10949   ins_pipe(pipe_slow);
10950 %}
10951 
10952 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10953                   Universe dummy, rFlagsReg cr)
10954 %{
10955   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
10956   match(Set dummy (ClearArray (Binary cnt base) val));
10957   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10958 
10959   format %{ $$template
10960     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10961     $$emit$$"jg      LARGE\n\t"
10962     $$emit$$"dec     rcx\n\t"
10963     $$emit$$"js      DONE\t# Zero length\n\t"
10964     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10965     $$emit$$"dec     rcx\n\t"
10966     $$emit$$"jge     LOOP\n\t"
10967     $$emit$$"jmp     DONE\n\t"
10968     $$emit$$"# LARGE:\n\t"
10969     if (UseXMMForObjInit) {
10970        $$emit$$"movdq   $tmp, $val\n\t"
10971        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10972        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10973        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10974        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10975        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10976        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10977        $$emit$$"add     0x40,rax\n\t"
10978        $$emit$$"# L_zero_64_bytes:\n\t"
10979        $$emit$$"sub     0x8,rcx\n\t"
10980        $$emit$$"jge     L_loop\n\t"
10981        $$emit$$"add     0x4,rcx\n\t"
10982        $$emit$$"jl      L_tail\n\t"
10983        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10984        $$emit$$"add     0x20,rax\n\t"
10985        $$emit$$"sub     0x4,rcx\n\t"
10986        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10987        $$emit$$"add     0x4,rcx\n\t"
10988        $$emit$$"jle     L_end\n\t"
10989        $$emit$$"dec     rcx\n\t"
10990        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10991        $$emit$$"vmovq   xmm0,(rax)\n\t"
10992        $$emit$$"add     0x8,rax\n\t"
10993        $$emit$$"dec     rcx\n\t"
10994        $$emit$$"jge     L_sloop\n\t"
10995        $$emit$$"# L_end:\n\t"
10996     } else {
10997        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10998     }
10999     $$emit$$"# DONE"
11000   %}
11001   ins_encode %{
11002     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11003                  $tmp$$XMMRegister, false, true);
11004   %}
11005   ins_pipe(pipe_slow);
11006 %}
11007 
11008 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11009                         Universe dummy, rFlagsReg cr)
11010 %{
11011   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
11012   match(Set dummy (ClearArray (Binary cnt base) val));
11013   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11014 
11015   format %{ $$template
11016     if (UseFastStosb) {
11017        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11018        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11019     } else if (UseXMMForObjInit) {
11020        $$emit$$"movdq   $tmp, $val\n\t"
11021        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11022        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11023        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11024        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11025        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11026        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11027        $$emit$$"add     0x40,rax\n\t"
11028        $$emit$$"# L_zero_64_bytes:\n\t"
11029        $$emit$$"sub     0x8,rcx\n\t"
11030        $$emit$$"jge     L_loop\n\t"
11031        $$emit$$"add     0x4,rcx\n\t"
11032        $$emit$$"jl      L_tail\n\t"
11033        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11034        $$emit$$"add     0x20,rax\n\t"
11035        $$emit$$"sub     0x4,rcx\n\t"
11036        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11037        $$emit$$"add     0x4,rcx\n\t"
11038        $$emit$$"jle     L_end\n\t"
11039        $$emit$$"dec     rcx\n\t"
11040        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11041        $$emit$$"vmovq   xmm0,(rax)\n\t"
11042        $$emit$$"add     0x8,rax\n\t"
11043        $$emit$$"dec     rcx\n\t"
11044        $$emit$$"jge     L_sloop\n\t"
11045        $$emit$$"# L_end:\n\t"
11046     } else {
11047        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11048     }
11049   %}
11050   ins_encode %{
11051     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11052                  $tmp$$XMMRegister, true, false);
11053   %}
11054   ins_pipe(pipe_slow);
11055 %}
11056 
11057 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, 
11058                         Universe dummy, rFlagsReg cr)
11059 %{
11060   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
11061   match(Set dummy (ClearArray (Binary cnt base) val));
11062   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11063 
11064   format %{ $$template
11065     if (UseXMMForObjInit) {
11066        $$emit$$"movdq   $tmp, $val\n\t"
11067        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11068        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11069        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11070        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11071        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11072        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11073        $$emit$$"add     0x40,rax\n\t"
11074        $$emit$$"# L_zero_64_bytes:\n\t"
11075        $$emit$$"sub     0x8,rcx\n\t"
11076        $$emit$$"jge     L_loop\n\t"
11077        $$emit$$"add     0x4,rcx\n\t"
11078        $$emit$$"jl      L_tail\n\t"
11079        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11080        $$emit$$"add     0x20,rax\n\t"
11081        $$emit$$"sub     0x4,rcx\n\t"
11082        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11083        $$emit$$"add     0x4,rcx\n\t"
11084        $$emit$$"jle     L_end\n\t"
11085        $$emit$$"dec     rcx\n\t"
11086        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11087        $$emit$$"vmovq   xmm0,(rax)\n\t"
11088        $$emit$$"add     0x8,rax\n\t"
11089        $$emit$$"dec     rcx\n\t"
11090        $$emit$$"jge     L_sloop\n\t"
11091        $$emit$$"# L_end:\n\t"
11092     } else {
11093        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11094     }
11095   %}
11096   ins_encode %{
11097     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, 
11098                  $tmp$$XMMRegister, true, true);
11099   %}
11100   ins_pipe(pipe_slow);
11101 %}
11102 
11103 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11104                          rax_RegI result, legVecS tmp1, rFlagsReg cr)
11105 %{
11106   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11107   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11108   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11109 
11110   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11111   ins_encode %{
11112     __ string_compare($str1$$Register, $str2$$Register,
11113                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11114                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11115   %}
11116   ins_pipe( pipe_slow );
11117 %}
11118 
11119 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11120                          rax_RegI result, legVecS tmp1, rFlagsReg cr)
11121 %{
11122   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11123   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11124   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11125 
11126   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11127   ins_encode %{
11128     __ string_compare($str1$$Register, $str2$$Register,
11129                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11130                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11131   %}
11132   ins_pipe( pipe_slow );
11133 %}
11134 
11135 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11136                           rax_RegI result, legVecS tmp1, rFlagsReg cr)
11137 %{
11138   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11139   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11140   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11141 
11142   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11143   ins_encode %{
11144     __ string_compare($str1$$Register, $str2$$Register,
11145                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11146                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11147   %}
11148   ins_pipe( pipe_slow );
11149 %}
11150 
11151 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11152                           rax_RegI result, legVecS tmp1, rFlagsReg cr)
11153 %{
11154   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11155   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11156   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11157 
11158   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11159   ins_encode %{
11160     __ string_compare($str2$$Register, $str1$$Register,
11161                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11162                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11163   %}
11164   ins_pipe( pipe_slow );
11165 %}
11166 
11167 // fast search of substring with known size.
11168 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11169                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11170 %{
11171   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11172   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11173   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11174 
11175   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11176   ins_encode %{
11177     int icnt2 = (int)$int_cnt2$$constant;
11178     if (icnt2 >= 16) {
11179       // IndexOf for constant substrings with size >= 16 elements
11180       // which don't need to be loaded through stack.
11181       __ string_indexofC8($str1$$Register, $str2$$Register,
11182                           $cnt1$$Register, $cnt2$$Register,
11183                           icnt2, $result$$Register,
11184                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11185     } else {
11186       // Small strings are loaded through stack if they cross page boundary.
11187       __ string_indexof($str1$$Register, $str2$$Register,
11188                         $cnt1$$Register, $cnt2$$Register,
11189                         icnt2, $result$$Register,
11190                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11191     }
11192   %}
11193   ins_pipe( pipe_slow );
11194 %}
11195 
11196 // fast search of substring with known size.
11197 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11198                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11199 %{
11200   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11201   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11202   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11203 
11204   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11205   ins_encode %{
11206     int icnt2 = (int)$int_cnt2$$constant;
11207     if (icnt2 >= 8) {
11208       // IndexOf for constant substrings with size >= 8 elements
11209       // which don't need to be loaded through stack.
11210       __ string_indexofC8($str1$$Register, $str2$$Register,
11211                           $cnt1$$Register, $cnt2$$Register,
11212                           icnt2, $result$$Register,
11213                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11214     } else {
11215       // Small strings are loaded through stack if they cross page boundary.
11216       __ string_indexof($str1$$Register, $str2$$Register,
11217                         $cnt1$$Register, $cnt2$$Register,
11218                         icnt2, $result$$Register,
11219                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11220     }
11221   %}
11222   ins_pipe( pipe_slow );
11223 %}
11224 
11225 // fast search of substring with known size.
11226 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11227                              rbx_RegI result, legVecS vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11228 %{
11229   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11230   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11231   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11232 
11233   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11234   ins_encode %{
11235     int icnt2 = (int)$int_cnt2$$constant;
11236     if (icnt2 >= 8) {
11237       // IndexOf for constant substrings with size >= 8 elements
11238       // which don't need to be loaded through stack.
11239       __ string_indexofC8($str1$$Register, $str2$$Register,
11240                           $cnt1$$Register, $cnt2$$Register,
11241                           icnt2, $result$$Register,
11242                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11243     } else {
11244       // Small strings are loaded through stack if they cross page boundary.
11245       __ string_indexof($str1$$Register, $str2$$Register,
11246                         $cnt1$$Register, $cnt2$$Register,
11247                         icnt2, $result$$Register,
11248                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11249     }
11250   %}
11251   ins_pipe( pipe_slow );
11252 %}
11253 
11254 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11255                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11256 %{
11257   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11258   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11259   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11260 
11261   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11262   ins_encode %{
11263     __ string_indexof($str1$$Register, $str2$$Register,
11264                       $cnt1$$Register, $cnt2$$Register,
11265                       (-1), $result$$Register,
11266                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11267   %}
11268   ins_pipe( pipe_slow );
11269 %}
11270 
11271 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11272                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11273 %{
11274   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11275   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11276   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11277 
11278   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11279   ins_encode %{
11280     __ string_indexof($str1$$Register, $str2$$Register,
11281                       $cnt1$$Register, $cnt2$$Register,
11282                       (-1), $result$$Register,
11283                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11284   %}
11285   ins_pipe( pipe_slow );
11286 %}
11287 
11288 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11289                          rbx_RegI result, legVecS vec, rcx_RegI tmp, rFlagsReg cr)
11290 %{
11291   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11292   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11293   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11294 
11295   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11296   ins_encode %{
11297     __ string_indexof($str1$$Register, $str2$$Register,
11298                       $cnt1$$Register, $cnt2$$Register,
11299                       (-1), $result$$Register,
11300                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11301   %}
11302   ins_pipe( pipe_slow );
11303 %}
11304 
11305 instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11306                               rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
11307 %{
11308   predicate(UseSSE42Intrinsics);
11309   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11310   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11311   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11312   ins_encode %{
11313     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11314                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11315   %}
11316   ins_pipe( pipe_slow );
11317 %}
11318 
11319 // fast string equals
11320 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11321                        legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
11322 %{
11323   match(Set result (StrEquals (Binary str1 str2) cnt));
11324   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11325 
11326   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11327   ins_encode %{
11328     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11329                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11330                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11331   %}
11332   ins_pipe( pipe_slow );
11333 %}
11334 
11335 // fast array equals
11336 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11337                        legVecS tmp1, legVecS tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11338 %{
11339   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11340   match(Set result (AryEq ary1 ary2));
11341   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11342 
11343   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11344   ins_encode %{
11345     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11346                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11347                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11348   %}
11349   ins_pipe( pipe_slow );
11350 %}
11351 
11352 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11353                       legVecS tmp1, legVecS tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11354 %{
11355   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11356   match(Set result (AryEq ary1 ary2));
11357   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11358 
11359   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11360   ins_encode %{
11361     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11362                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11363                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11364   %}
11365   ins_pipe( pipe_slow );
11366 %}
11367 
11368 instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
11369                       legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
11370 %{
11371   match(Set result (HasNegatives ary1 len));
11372   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11373 
11374   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11375   ins_encode %{
11376     __ has_negatives($ary1$$Register, $len$$Register,
11377                      $result$$Register, $tmp3$$Register,
11378                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11379   %}
11380   ins_pipe( pipe_slow );
11381 %}
11382 
11383 // fast char[] to byte[] compression
11384 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legVecS tmp1, legVecS tmp2, legVecS tmp3, legVecS tmp4,
11385                          rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11386   match(Set result (StrCompressedCopy src (Binary dst len)));
11387   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11388 
11389   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11390   ins_encode %{
11391     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11392                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11393                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11394   %}
11395   ins_pipe( pipe_slow );
11396 %}
11397 
11398 // fast byte[] to char[] inflation
11399 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11400                         legVecS tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
11401   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11402   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11403 
11404   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11405   ins_encode %{
11406     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11407                           $tmp1$$XMMRegister, $tmp2$$Register);
11408   %}
11409   ins_pipe( pipe_slow );
11410 %}
11411 
11412 // encode char[] to byte[] in ISO_8859_1
11413 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11414                           legVecS tmp1, legVecS tmp2, legVecS tmp3, legVecS tmp4,
11415                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11416   match(Set result (EncodeISOArray src (Binary dst len)));
11417   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11418 
11419   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
11420   ins_encode %{
11421     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11422                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11423                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11424   %}
11425   ins_pipe( pipe_slow );
11426 %}
11427 
11428 //----------Overflow Math Instructions-----------------------------------------
11429 
11430 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11431 %{
11432   match(Set cr (OverflowAddI op1 op2));
11433   effect(DEF cr, USE_KILL op1, USE op2);
11434 
11435   format %{ "addl    $op1, $op2\t# overflow check int" %}
11436 
11437   ins_encode %{
11438     __ addl($op1$$Register, $op2$$Register);
11439   %}
11440   ins_pipe(ialu_reg_reg);
11441 %}
11442 
11443 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
11444 %{
11445   match(Set cr (OverflowAddI op1 op2));
11446   effect(DEF cr, USE_KILL op1, USE op2);
11447 
11448   format %{ "addl    $op1, $op2\t# overflow check int" %}
11449 
11450   ins_encode %{
11451     __ addl($op1$$Register, $op2$$constant);
11452   %}
11453   ins_pipe(ialu_reg_reg);
11454 %}
11455 
11456 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11457 %{
11458   match(Set cr (OverflowAddL op1 op2));
11459   effect(DEF cr, USE_KILL op1, USE op2);
11460 
11461   format %{ "addq    $op1, $op2\t# overflow check long" %}
11462   ins_encode %{
11463     __ addq($op1$$Register, $op2$$Register);
11464   %}
11465   ins_pipe(ialu_reg_reg);
11466 %}
11467 
11468 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
11469 %{
11470   match(Set cr (OverflowAddL op1 op2));
11471   effect(DEF cr, USE_KILL op1, USE op2);
11472 
11473   format %{ "addq    $op1, $op2\t# overflow check long" %}
11474   ins_encode %{
11475     __ addq($op1$$Register, $op2$$constant);
11476   %}
11477   ins_pipe(ialu_reg_reg);
11478 %}
11479 
11480 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11481 %{
11482   match(Set cr (OverflowSubI op1 op2));
11483 
11484   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11485   ins_encode %{
11486     __ cmpl($op1$$Register, $op2$$Register);
11487   %}
11488   ins_pipe(ialu_reg_reg);
11489 %}
11490 
11491 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11492 %{
11493   match(Set cr (OverflowSubI op1 op2));
11494 
11495   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11496   ins_encode %{
11497     __ cmpl($op1$$Register, $op2$$constant);
11498   %}
11499   ins_pipe(ialu_reg_reg);
11500 %}
11501 
11502 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11503 %{
11504   match(Set cr (OverflowSubL op1 op2));
11505 
11506   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11507   ins_encode %{
11508     __ cmpq($op1$$Register, $op2$$Register);
11509   %}
11510   ins_pipe(ialu_reg_reg);
11511 %}
11512 
11513 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11514 %{
11515   match(Set cr (OverflowSubL op1 op2));
11516 
11517   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11518   ins_encode %{
11519     __ cmpq($op1$$Register, $op2$$constant);
11520   %}
11521   ins_pipe(ialu_reg_reg);
11522 %}
11523 
11524 instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
11525 %{
11526   match(Set cr (OverflowSubI zero op2));
11527   effect(DEF cr, USE_KILL op2);
11528 
11529   format %{ "negl    $op2\t# overflow check int" %}
11530   ins_encode %{
11531     __ negl($op2$$Register);
11532   %}
11533   ins_pipe(ialu_reg_reg);
11534 %}
11535 
11536 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
11537 %{
11538   match(Set cr (OverflowSubL zero op2));
11539   effect(DEF cr, USE_KILL op2);
11540 
11541   format %{ "negq    $op2\t# overflow check long" %}
11542   ins_encode %{
11543     __ negq($op2$$Register);
11544   %}
11545   ins_pipe(ialu_reg_reg);
11546 %}
11547 
11548 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11549 %{
11550   match(Set cr (OverflowMulI op1 op2));
11551   effect(DEF cr, USE_KILL op1, USE op2);
11552 
11553   format %{ "imull    $op1, $op2\t# overflow check int" %}
11554   ins_encode %{
11555     __ imull($op1$$Register, $op2$$Register);
11556   %}
11557   ins_pipe(ialu_reg_reg_alu0);
11558 %}
11559 
11560 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
11561 %{
11562   match(Set cr (OverflowMulI op1 op2));
11563   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11564 
11565   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
11566   ins_encode %{
11567     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
11568   %}
11569   ins_pipe(ialu_reg_reg_alu0);
11570 %}
11571 
11572 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11573 %{
11574   match(Set cr (OverflowMulL op1 op2));
11575   effect(DEF cr, USE_KILL op1, USE op2);
11576 
11577   format %{ "imulq    $op1, $op2\t# overflow check long" %}
11578   ins_encode %{
11579     __ imulq($op1$$Register, $op2$$Register);
11580   %}
11581   ins_pipe(ialu_reg_reg_alu0);
11582 %}
11583 
11584 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
11585 %{
11586   match(Set cr (OverflowMulL op1 op2));
11587   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11588 
11589   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
11590   ins_encode %{
11591     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
11592   %}
11593   ins_pipe(ialu_reg_reg_alu0);
11594 %}
11595 
11596 
11597 //----------Control Flow Instructions------------------------------------------
11598 // Signed compare Instructions
11599 
11600 // XXX more variants!!
11601 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11602 %{
11603   match(Set cr (CmpI op1 op2));
11604   effect(DEF cr, USE op1, USE op2);
11605 
11606   format %{ "cmpl    $op1, $op2" %}
11607   opcode(0x3B);  /* Opcode 3B /r */
11608   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11609   ins_pipe(ialu_cr_reg_reg);
11610 %}
11611 
11612 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11613 %{
11614   match(Set cr (CmpI op1 op2));
11615 
11616   format %{ "cmpl    $op1, $op2" %}
11617   opcode(0x81, 0x07); /* Opcode 81 /7 */
11618   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11619   ins_pipe(ialu_cr_reg_imm);
11620 %}
11621 
11622 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11623 %{
11624   match(Set cr (CmpI op1 (LoadI op2)));
11625 
11626   ins_cost(500); // XXX
11627   format %{ "cmpl    $op1, $op2" %}
11628   opcode(0x3B); /* Opcode 3B /r */
11629   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11630   ins_pipe(ialu_cr_reg_mem);
11631 %}
11632 
11633 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11634 %{
11635   match(Set cr (CmpI src zero));
11636 
11637   format %{ "testl   $src, $src" %}
11638   opcode(0x85);
11639   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11640   ins_pipe(ialu_cr_reg_imm);
11641 %}
11642 
11643 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11644 %{
11645   match(Set cr (CmpI (AndI src con) zero));
11646 
11647   format %{ "testl   $src, $con" %}
11648   opcode(0xF7, 0x00);
11649   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11650   ins_pipe(ialu_cr_reg_imm);
11651 %}
11652 
11653 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11654 %{
11655   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11656 
11657   format %{ "testl   $src, $mem" %}
11658   opcode(0x85);
11659   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11660   ins_pipe(ialu_cr_reg_mem);
11661 %}
11662 
11663 // Unsigned compare Instructions; really, same as signed except they
11664 // produce an rFlagsRegU instead of rFlagsReg.
11665 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11666 %{
11667   match(Set cr (CmpU op1 op2));
11668 
11669   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11670   opcode(0x3B); /* Opcode 3B /r */
11671   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11672   ins_pipe(ialu_cr_reg_reg);
11673 %}
11674 
11675 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11676 %{
11677   match(Set cr (CmpU op1 op2));
11678 
11679   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11680   opcode(0x81,0x07); /* Opcode 81 /7 */
11681   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11682   ins_pipe(ialu_cr_reg_imm);
11683 %}
11684 
11685 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11686 %{
11687   match(Set cr (CmpU op1 (LoadI op2)));
11688 
11689   ins_cost(500); // XXX
11690   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11691   opcode(0x3B); /* Opcode 3B /r */
11692   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11693   ins_pipe(ialu_cr_reg_mem);
11694 %}
11695 
11696 // // // Cisc-spilled version of cmpU_rReg
11697 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11698 // //%{
11699 // //  match(Set cr (CmpU (LoadI op1) op2));
11700 // //
11701 // //  format %{ "CMPu   $op1,$op2" %}
11702 // //  ins_cost(500);
11703 // //  opcode(0x39);  /* Opcode 39 /r */
11704 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11705 // //%}
11706 
11707 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11708 %{
11709   match(Set cr (CmpU src zero));
11710 
11711   format %{ "testl  $src, $src\t# unsigned" %}
11712   opcode(0x85);
11713   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11714   ins_pipe(ialu_cr_reg_imm);
11715 %}
11716 
11717 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11718 %{
11719   match(Set cr (CmpP op1 op2));
11720 
11721   format %{ "cmpq    $op1, $op2\t# ptr" %}
11722   opcode(0x3B); /* Opcode 3B /r */
11723   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11724   ins_pipe(ialu_cr_reg_reg);
11725 %}
11726 
11727 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11728 %{
11729   match(Set cr (CmpP op1 (LoadP op2)));
11730 
11731   ins_cost(500); // XXX
11732   format %{ "cmpq    $op1, $op2\t# ptr" %}
11733   opcode(0x3B); /* Opcode 3B /r */
11734   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11735   ins_pipe(ialu_cr_reg_mem);
11736 %}
11737 
11738 // // // Cisc-spilled version of cmpP_rReg
11739 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11740 // //%{
11741 // //  match(Set cr (CmpP (LoadP op1) op2));
11742 // //
11743 // //  format %{ "CMPu   $op1,$op2" %}
11744 // //  ins_cost(500);
11745 // //  opcode(0x39);  /* Opcode 39 /r */
11746 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11747 // //%}
11748 
11749 // XXX this is generalized by compP_rReg_mem???
11750 // Compare raw pointer (used in out-of-heap check).
11751 // Only works because non-oop pointers must be raw pointers
11752 // and raw pointers have no anti-dependencies.
11753 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11754 %{
11755   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
11756   match(Set cr (CmpP op1 (LoadP op2)));
11757 
11758   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11759   opcode(0x3B); /* Opcode 3B /r */
11760   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11761   ins_pipe(ialu_cr_reg_mem);
11762 %}
11763 
11764 // This will generate a signed flags result. This should be OK since
11765 // any compare to a zero should be eq/neq.
11766 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11767 %{
11768   match(Set cr (CmpP src zero));
11769 
11770   format %{ "testq   $src, $src\t# ptr" %}
11771   opcode(0x85);
11772   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11773   ins_pipe(ialu_cr_reg_imm);
11774 %}
11775 
11776 // This will generate a signed flags result. This should be OK since
11777 // any compare to a zero should be eq/neq.
11778 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11779 %{
11780   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11781   match(Set cr (CmpP (LoadP op) zero));
11782 
11783   ins_cost(500); // XXX
11784   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11785   opcode(0xF7); /* Opcode F7 /0 */
11786   ins_encode(REX_mem_wide(op),
11787              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11788   ins_pipe(ialu_cr_reg_imm);
11789 %}
11790 
11791 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11792 %{
11793   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
11794   match(Set cr (CmpP (LoadP mem) zero));
11795 
11796   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11797   ins_encode %{
11798     __ cmpq(r12, $mem$$Address);
11799   %}
11800   ins_pipe(ialu_cr_reg_mem);
11801 %}
11802 
11803 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11804 %{
11805   match(Set cr (CmpN op1 op2));
11806 
11807   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11808   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11809   ins_pipe(ialu_cr_reg_reg);
11810 %}
11811 
11812 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11813 %{
11814   match(Set cr (CmpN src (LoadN mem)));
11815 
11816   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11817   ins_encode %{
11818     __ cmpl($src$$Register, $mem$$Address);
11819   %}
11820   ins_pipe(ialu_cr_reg_mem);
11821 %}
11822 
11823 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11824   match(Set cr (CmpN op1 op2));
11825 
11826   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11827   ins_encode %{
11828     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11829   %}
11830   ins_pipe(ialu_cr_reg_imm);
11831 %}
11832 
11833 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11834 %{
11835   match(Set cr (CmpN src (LoadN mem)));
11836 
11837   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11838   ins_encode %{
11839     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11840   %}
11841   ins_pipe(ialu_cr_reg_mem);
11842 %}
11843 
11844 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
11845   match(Set cr (CmpN op1 op2));
11846 
11847   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
11848   ins_encode %{
11849     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
11850   %}
11851   ins_pipe(ialu_cr_reg_imm);
11852 %}
11853 
11854 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
11855 %{
11856   match(Set cr (CmpN src (LoadNKlass mem)));
11857 
11858   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
11859   ins_encode %{
11860     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
11861   %}
11862   ins_pipe(ialu_cr_reg_mem);
11863 %}
11864 
11865 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11866   match(Set cr (CmpN src zero));
11867 
11868   format %{ "testl   $src, $src\t# compressed ptr" %}
11869   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11870   ins_pipe(ialu_cr_reg_imm);
11871 %}
11872 
11873 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11874 %{
11875   predicate(Universe::narrow_oop_base() != NULL);
11876   match(Set cr (CmpN (LoadN mem) zero));
11877 
11878   ins_cost(500); // XXX
11879   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11880   ins_encode %{
11881     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11882   %}
11883   ins_pipe(ialu_cr_reg_mem);
11884 %}
11885 
11886 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11887 %{
11888   predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
11889   match(Set cr (CmpN (LoadN mem) zero));
11890 
11891   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11892   ins_encode %{
11893     __ cmpl(r12, $mem$$Address);
11894   %}
11895   ins_pipe(ialu_cr_reg_mem);
11896 %}
11897 
11898 // Yanked all unsigned pointer compare operations.
11899 // Pointer compares are done with CmpP which is already unsigned.
11900 
11901 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11902 %{
11903   match(Set cr (CmpL op1 op2));
11904 
11905   format %{ "cmpq    $op1, $op2" %}
11906   opcode(0x3B);  /* Opcode 3B /r */
11907   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11908   ins_pipe(ialu_cr_reg_reg);
11909 %}
11910 
11911 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11912 %{
11913   match(Set cr (CmpL op1 op2));
11914 
11915   format %{ "cmpq    $op1, $op2" %}
11916   opcode(0x81, 0x07); /* Opcode 81 /7 */
11917   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11918   ins_pipe(ialu_cr_reg_imm);
11919 %}
11920 
11921 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11922 %{
11923   match(Set cr (CmpL op1 (LoadL op2)));
11924 
11925   format %{ "cmpq    $op1, $op2" %}
11926   opcode(0x3B); /* Opcode 3B /r */
11927   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11928   ins_pipe(ialu_cr_reg_mem);
11929 %}
11930 
11931 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11932 %{
11933   match(Set cr (CmpL src zero));
11934 
11935   format %{ "testq   $src, $src" %}
11936   opcode(0x85);
11937   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11938   ins_pipe(ialu_cr_reg_imm);
11939 %}
11940 
11941 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11942 %{
11943   match(Set cr (CmpL (AndL src con) zero));
11944 
11945   format %{ "testq   $src, $con\t# long" %}
11946   opcode(0xF7, 0x00);
11947   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11948   ins_pipe(ialu_cr_reg_imm);
11949 %}
11950 
11951 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11952 %{
11953   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11954 
11955   format %{ "testq   $src, $mem" %}
11956   opcode(0x85);
11957   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11958   ins_pipe(ialu_cr_reg_mem);
11959 %}
11960 
11961 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
11962 %{
11963   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
11964 
11965   format %{ "testq   $src, $mem" %}
11966   opcode(0x85);
11967   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11968   ins_pipe(ialu_cr_reg_mem);
11969 %}
11970 
11971 // Manifest a CmpL result in an integer register.  Very painful.
11972 // This is the test to avoid.
11973 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11974 %{
11975   match(Set dst (CmpL3 src1 src2));
11976   effect(KILL flags);
11977 
11978   ins_cost(275); // XXX
11979   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11980             "movl    $dst, -1\n\t"
11981             "jl,s    done\n\t"
11982             "setne   $dst\n\t"
11983             "movzbl  $dst, $dst\n\t"
11984     "done:" %}
11985   ins_encode(cmpl3_flag(src1, src2, dst));
11986   ins_pipe(pipe_slow);
11987 %}
11988 
11989 // Unsigned long compare Instructions; really, same as signed long except they
11990 // produce an rFlagsRegU instead of rFlagsReg.
11991 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
11992 %{
11993   match(Set cr (CmpUL op1 op2));
11994 
11995   format %{ "cmpq    $op1, $op2\t# unsigned" %}
11996   opcode(0x3B);  /* Opcode 3B /r */
11997   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11998   ins_pipe(ialu_cr_reg_reg);
11999 %}
12000 
12001 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
12002 %{
12003   match(Set cr (CmpUL op1 op2));
12004 
12005   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12006   opcode(0x81, 0x07); /* Opcode 81 /7 */
12007   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
12008   ins_pipe(ialu_cr_reg_imm);
12009 %}
12010 
12011 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
12012 %{
12013   match(Set cr (CmpUL op1 (LoadL op2)));
12014 
12015   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12016   opcode(0x3B); /* Opcode 3B /r */
12017   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
12018   ins_pipe(ialu_cr_reg_mem);
12019 %}
12020 
12021 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
12022 %{
12023   match(Set cr (CmpUL src zero));
12024 
12025   format %{ "testq   $src, $src\t# unsigned" %}
12026   opcode(0x85);
12027   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
12028   ins_pipe(ialu_cr_reg_imm);
12029 %}
12030 
12031 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
12032 %{
12033   match(Set cr (CmpI (LoadB mem) imm));
12034 
12035   ins_cost(125);
12036   format %{ "cmpb    $mem, $imm" %}
12037   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
12038   ins_pipe(ialu_cr_reg_mem);
12039 %}
12040 
12041 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU8 imm, immI0 zero)
12042 %{
12043   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
12044 
12045   ins_cost(125);
12046   format %{ "testb   $mem, $imm\t# ubyte" %}
12047   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12048   ins_pipe(ialu_cr_reg_mem);
12049 %}
12050 
12051 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero)
12052 %{
12053   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
12054 
12055   ins_cost(125);
12056   format %{ "testb   $mem, $imm\t# byte" %}
12057   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12058   ins_pipe(ialu_cr_reg_mem);
12059 %}
12060 
12061 //----------Max and Min--------------------------------------------------------
12062 // Min Instructions
12063 
12064 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12065 %{
12066   effect(USE_DEF dst, USE src, USE cr);
12067 
12068   format %{ "cmovlgt $dst, $src\t# min" %}
12069   opcode(0x0F, 0x4F);
12070   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12071   ins_pipe(pipe_cmov_reg);
12072 %}
12073 
12074 
12075 instruct minI_rReg(rRegI dst, rRegI src)
12076 %{
12077   match(Set dst (MinI dst src));
12078 
12079   ins_cost(200);
12080   expand %{
12081     rFlagsReg cr;
12082     compI_rReg(cr, dst, src);
12083     cmovI_reg_g(dst, src, cr);
12084   %}
12085 %}
12086 
12087 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12088 %{
12089   effect(USE_DEF dst, USE src, USE cr);
12090 
12091   format %{ "cmovllt $dst, $src\t# max" %}
12092   opcode(0x0F, 0x4C);
12093   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12094   ins_pipe(pipe_cmov_reg);
12095 %}
12096 
12097 
12098 instruct maxI_rReg(rRegI dst, rRegI src)
12099 %{
12100   match(Set dst (MaxI dst src));
12101 
12102   ins_cost(200);
12103   expand %{
12104     rFlagsReg cr;
12105     compI_rReg(cr, dst, src);
12106     cmovI_reg_l(dst, src, cr);
12107   %}
12108 %}
12109 
12110 // ============================================================================
12111 // Branch Instructions
12112 
12113 // Jump Direct - Label defines a relative address from JMP+1
12114 instruct jmpDir(label labl)
12115 %{
12116   match(Goto);
12117   effect(USE labl);
12118 
12119   ins_cost(300);
12120   format %{ "jmp     $labl" %}
12121   size(5);
12122   ins_encode %{
12123     Label* L = $labl$$label;
12124     __ jmp(*L, false); // Always long jump
12125   %}
12126   ins_pipe(pipe_jmp);
12127 %}
12128 
12129 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12130 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12131 %{
12132   match(If cop cr);
12133   effect(USE labl);
12134 
12135   ins_cost(300);
12136   format %{ "j$cop     $labl" %}
12137   size(6);
12138   ins_encode %{
12139     Label* L = $labl$$label;
12140     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12141   %}
12142   ins_pipe(pipe_jcc);
12143 %}
12144 
12145 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12146 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12147 %{
12148   predicate(!n->has_vector_mask_set());
12149   match(CountedLoopEnd cop cr);
12150   effect(USE labl);
12151 
12152   ins_cost(300);
12153   format %{ "j$cop     $labl\t# loop end" %}
12154   size(6);
12155   ins_encode %{
12156     Label* L = $labl$$label;
12157     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12158   %}
12159   ins_pipe(pipe_jcc);
12160 %}
12161 
12162 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12163 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12164   predicate(!n->has_vector_mask_set());
12165   match(CountedLoopEnd cop cmp);
12166   effect(USE labl);
12167 
12168   ins_cost(300);
12169   format %{ "j$cop,u   $labl\t# loop end" %}
12170   size(6);
12171   ins_encode %{
12172     Label* L = $labl$$label;
12173     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12174   %}
12175   ins_pipe(pipe_jcc);
12176 %}
12177 
12178 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12179   predicate(!n->has_vector_mask_set());
12180   match(CountedLoopEnd cop cmp);
12181   effect(USE labl);
12182 
12183   ins_cost(200);
12184   format %{ "j$cop,u   $labl\t# loop end" %}
12185   size(6);
12186   ins_encode %{
12187     Label* L = $labl$$label;
12188     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12189   %}
12190   ins_pipe(pipe_jcc);
12191 %}
12192 
12193 // mask version
12194 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12195 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
12196 %{
12197   predicate(n->has_vector_mask_set());
12198   match(CountedLoopEnd cop cr);
12199   effect(USE labl);
12200 
12201   ins_cost(400);
12202   format %{ "j$cop     $labl\t# loop end\n\t"
12203             "restorevectmask \t# vector mask restore for loops" %}
12204   size(10);
12205   ins_encode %{
12206     Label* L = $labl$$label;
12207     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12208     __ restorevectmask();
12209   %}
12210   ins_pipe(pipe_jcc);
12211 %}
12212 
12213 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12214 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12215   predicate(n->has_vector_mask_set());
12216   match(CountedLoopEnd cop cmp);
12217   effect(USE labl);
12218 
12219   ins_cost(400);
12220   format %{ "j$cop,u   $labl\t# loop end\n\t"
12221             "restorevectmask \t# vector mask restore for loops" %}
12222   size(10);
12223   ins_encode %{
12224     Label* L = $labl$$label;
12225     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12226     __ restorevectmask();
12227   %}
12228   ins_pipe(pipe_jcc);
12229 %}
12230 
12231 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12232   predicate(n->has_vector_mask_set());
12233   match(CountedLoopEnd cop cmp);
12234   effect(USE labl);
12235 
12236   ins_cost(300);
12237   format %{ "j$cop,u   $labl\t# loop end\n\t"
12238             "restorevectmask \t# vector mask restore for loops" %}
12239   size(10);
12240   ins_encode %{
12241     Label* L = $labl$$label;
12242     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12243     __ restorevectmask();
12244   %}
12245   ins_pipe(pipe_jcc);
12246 %}
12247 
12248 // Jump Direct Conditional - using unsigned comparison
12249 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12250   match(If cop cmp);
12251   effect(USE labl);
12252 
12253   ins_cost(300);
12254   format %{ "j$cop,u  $labl" %}
12255   size(6);
12256   ins_encode %{
12257     Label* L = $labl$$label;
12258     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12259   %}
12260   ins_pipe(pipe_jcc);
12261 %}
12262 
12263 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12264   match(If cop cmp);
12265   effect(USE labl);
12266 
12267   ins_cost(200);
12268   format %{ "j$cop,u  $labl" %}
12269   size(6);
12270   ins_encode %{
12271     Label* L = $labl$$label;
12272     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12273   %}
12274   ins_pipe(pipe_jcc);
12275 %}
12276 
12277 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12278   match(If cop cmp);
12279   effect(USE labl);
12280 
12281   ins_cost(200);
12282   format %{ $$template
12283     if ($cop$$cmpcode == Assembler::notEqual) {
12284       $$emit$$"jp,u   $labl\n\t"
12285       $$emit$$"j$cop,u   $labl"
12286     } else {
12287       $$emit$$"jp,u   done\n\t"
12288       $$emit$$"j$cop,u   $labl\n\t"
12289       $$emit$$"done:"
12290     }
12291   %}
12292   ins_encode %{
12293     Label* l = $labl$$label;
12294     if ($cop$$cmpcode == Assembler::notEqual) {
12295       __ jcc(Assembler::parity, *l, false);
12296       __ jcc(Assembler::notEqual, *l, false);
12297     } else if ($cop$$cmpcode == Assembler::equal) {
12298       Label done;
12299       __ jccb(Assembler::parity, done);
12300       __ jcc(Assembler::equal, *l, false);
12301       __ bind(done);
12302     } else {
12303        ShouldNotReachHere();
12304     }
12305   %}
12306   ins_pipe(pipe_jcc);
12307 %}
12308 
12309 // ============================================================================
12310 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12311 // superklass array for an instance of the superklass.  Set a hidden
12312 // internal cache on a hit (cache is checked with exposed code in
12313 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12314 // encoding ALSO sets flags.
12315 
12316 instruct partialSubtypeCheck(rdi_RegP result,
12317                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12318                              rFlagsReg cr)
12319 %{
12320   match(Set result (PartialSubtypeCheck sub super));
12321   effect(KILL rcx, KILL cr);
12322 
12323   ins_cost(1100);  // slightly larger than the next version
12324   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12325             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12326             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12327             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12328             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12329             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12330             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12331     "miss:\t" %}
12332 
12333   opcode(0x1); // Force a XOR of RDI
12334   ins_encode(enc_PartialSubtypeCheck());
12335   ins_pipe(pipe_slow);
12336 %}
12337 
12338 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12339                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12340                                      immP0 zero,
12341                                      rdi_RegP result)
12342 %{
12343   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12344   effect(KILL rcx, KILL result);
12345 
12346   ins_cost(1000);
12347   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12348             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12349             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12350             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12351             "jne,s   miss\t\t# Missed: flags nz\n\t"
12352             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12353     "miss:\t" %}
12354 
12355   opcode(0x0); // No need to XOR RDI
12356   ins_encode(enc_PartialSubtypeCheck());
12357   ins_pipe(pipe_slow);
12358 %}
12359 
12360 // ============================================================================
12361 // Branch Instructions -- short offset versions
12362 //
12363 // These instructions are used to replace jumps of a long offset (the default
12364 // match) with jumps of a shorter offset.  These instructions are all tagged
12365 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12366 // match rules in general matching.  Instead, the ADLC generates a conversion
12367 // method in the MachNode which can be used to do in-place replacement of the
12368 // long variant with the shorter variant.  The compiler will determine if a
12369 // branch can be taken by the is_short_branch_offset() predicate in the machine
12370 // specific code section of the file.
12371 
12372 // Jump Direct - Label defines a relative address from JMP+1
12373 instruct jmpDir_short(label labl) %{
12374   match(Goto);
12375   effect(USE labl);
12376 
12377   ins_cost(300);
12378   format %{ "jmp,s   $labl" %}
12379   size(2);
12380   ins_encode %{
12381     Label* L = $labl$$label;
12382     __ jmpb(*L);
12383   %}
12384   ins_pipe(pipe_jmp);
12385   ins_short_branch(1);
12386 %}
12387 
12388 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12389 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12390   match(If cop cr);
12391   effect(USE labl);
12392 
12393   ins_cost(300);
12394   format %{ "j$cop,s   $labl" %}
12395   size(2);
12396   ins_encode %{
12397     Label* L = $labl$$label;
12398     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12399   %}
12400   ins_pipe(pipe_jcc);
12401   ins_short_branch(1);
12402 %}
12403 
12404 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12405 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12406   match(CountedLoopEnd cop cr);
12407   effect(USE labl);
12408 
12409   ins_cost(300);
12410   format %{ "j$cop,s   $labl\t# loop end" %}
12411   size(2);
12412   ins_encode %{
12413     Label* L = $labl$$label;
12414     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12415   %}
12416   ins_pipe(pipe_jcc);
12417   ins_short_branch(1);
12418 %}
12419 
12420 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12421 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12422   match(CountedLoopEnd cop cmp);
12423   effect(USE labl);
12424 
12425   ins_cost(300);
12426   format %{ "j$cop,us  $labl\t# loop end" %}
12427   size(2);
12428   ins_encode %{
12429     Label* L = $labl$$label;
12430     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12431   %}
12432   ins_pipe(pipe_jcc);
12433   ins_short_branch(1);
12434 %}
12435 
12436 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12437   match(CountedLoopEnd cop cmp);
12438   effect(USE labl);
12439 
12440   ins_cost(300);
12441   format %{ "j$cop,us  $labl\t# loop end" %}
12442   size(2);
12443   ins_encode %{
12444     Label* L = $labl$$label;
12445     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12446   %}
12447   ins_pipe(pipe_jcc);
12448   ins_short_branch(1);
12449 %}
12450 
12451 // Jump Direct Conditional - using unsigned comparison
12452 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12453   match(If cop cmp);
12454   effect(USE labl);
12455 
12456   ins_cost(300);
12457   format %{ "j$cop,us  $labl" %}
12458   size(2);
12459   ins_encode %{
12460     Label* L = $labl$$label;
12461     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12462   %}
12463   ins_pipe(pipe_jcc);
12464   ins_short_branch(1);
12465 %}
12466 
12467 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12468   match(If cop cmp);
12469   effect(USE labl);
12470 
12471   ins_cost(300);
12472   format %{ "j$cop,us  $labl" %}
12473   size(2);
12474   ins_encode %{
12475     Label* L = $labl$$label;
12476     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12477   %}
12478   ins_pipe(pipe_jcc);
12479   ins_short_branch(1);
12480 %}
12481 
12482 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12483   match(If cop cmp);
12484   effect(USE labl);
12485 
12486   ins_cost(300);
12487   format %{ $$template
12488     if ($cop$$cmpcode == Assembler::notEqual) {
12489       $$emit$$"jp,u,s   $labl\n\t"
12490       $$emit$$"j$cop,u,s   $labl"
12491     } else {
12492       $$emit$$"jp,u,s   done\n\t"
12493       $$emit$$"j$cop,u,s  $labl\n\t"
12494       $$emit$$"done:"
12495     }
12496   %}
12497   size(4);
12498   ins_encode %{
12499     Label* l = $labl$$label;
12500     if ($cop$$cmpcode == Assembler::notEqual) {
12501       __ jccb(Assembler::parity, *l);
12502       __ jccb(Assembler::notEqual, *l);
12503     } else if ($cop$$cmpcode == Assembler::equal) {
12504       Label done;
12505       __ jccb(Assembler::parity, done);
12506       __ jccb(Assembler::equal, *l);
12507       __ bind(done);
12508     } else {
12509        ShouldNotReachHere();
12510     }
12511   %}
12512   ins_pipe(pipe_jcc);
12513   ins_short_branch(1);
12514 %}
12515 
12516 // ============================================================================
12517 // inlined locking and unlocking
12518 
12519 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
12520   predicate(Compile::current()->use_rtm());
12521   match(Set cr (FastLock object box));
12522   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12523   ins_cost(300);
12524   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12525   ins_encode %{
12526     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12527                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12528                  _counters, _rtm_counters, _stack_rtm_counters,
12529                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12530                  true, ra_->C->profile_rtm());
12531   %}
12532   ins_pipe(pipe_slow);
12533 %}
12534 
12535 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
12536   predicate(!Compile::current()->use_rtm());
12537   match(Set cr (FastLock object box));
12538   effect(TEMP tmp, TEMP scr, USE_KILL box);
12539   ins_cost(300);
12540   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
12541   ins_encode %{
12542     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12543                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12544   %}
12545   ins_pipe(pipe_slow);
12546 %}
12547 
12548 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
12549   match(Set cr (FastUnlock object box));
12550   effect(TEMP tmp, USE_KILL box);
12551   ins_cost(300);
12552   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
12553   ins_encode %{
12554     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12555   %}
12556   ins_pipe(pipe_slow);
12557 %}
12558 
12559 
12560 // ============================================================================
12561 // Safepoint Instructions
12562 instruct safePoint_poll(rFlagsReg cr)
12563 %{
12564   predicate(!Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
12565   match(SafePoint);
12566   effect(KILL cr);
12567 
12568   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12569             "# Safepoint: poll for GC" %}
12570   ins_cost(125);
12571   ins_encode %{
12572     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12573     __ testl(rax, addr);
12574   %}
12575   ins_pipe(ialu_reg_mem);
12576 %}
12577 
12578 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12579 %{
12580   predicate(Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
12581   match(SafePoint poll);
12582   effect(KILL cr, USE poll);
12583 
12584   format %{ "testl  rax, [$poll]\t"
12585             "# Safepoint: poll for GC" %}
12586   ins_cost(125);
12587   ins_encode %{
12588     __ relocate(relocInfo::poll_type);
12589     __ testl(rax, Address($poll$$Register, 0));
12590   %}
12591   ins_pipe(ialu_reg_mem);
12592 %}
12593 
12594 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
12595 %{
12596   predicate(SafepointMechanism::uses_thread_local_poll());
12597   match(SafePoint poll);
12598   effect(KILL cr, USE poll);
12599 
12600   format %{ "testl  rax, [$poll]\t"
12601             "# Safepoint: poll for GC" %}
12602   ins_cost(125);
12603   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
12604   ins_encode %{
12605     __ relocate(relocInfo::poll_type);
12606     address pre_pc = __ pc();
12607     __ testl(rax, Address($poll$$Register, 0));
12608     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
12609   %}
12610   ins_pipe(ialu_reg_mem);
12611 %}
12612 
12613 // ============================================================================
12614 // Procedure Call/Return Instructions
12615 // Call Java Static Instruction
12616 // Note: If this code changes, the corresponding ret_addr_offset() and
12617 //       compute_padding() functions will have to be adjusted.
12618 instruct CallStaticJavaDirect(method meth) %{
12619   match(CallStaticJava);
12620   effect(USE meth);
12621 
12622   ins_cost(300);
12623   format %{ "call,static " %}
12624   opcode(0xE8); /* E8 cd */
12625   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
12626   ins_pipe(pipe_slow);
12627   ins_alignment(4);
12628 %}
12629 
12630 // Call Java Dynamic Instruction
12631 // Note: If this code changes, the corresponding ret_addr_offset() and
12632 //       compute_padding() functions will have to be adjusted.
12633 instruct CallDynamicJavaDirect(method meth)
12634 %{
12635   match(CallDynamicJava);
12636   effect(USE meth);
12637 
12638   ins_cost(300);
12639   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12640             "call,dynamic " %}
12641   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
12642   ins_pipe(pipe_slow);
12643   ins_alignment(4);
12644 %}
12645 
12646 // Call Runtime Instruction
12647 instruct CallRuntimeDirect(method meth)
12648 %{
12649   match(CallRuntime);
12650   effect(USE meth);
12651 
12652   ins_cost(300);
12653   format %{ "call,runtime " %}
12654   ins_encode(clear_avx, Java_To_Runtime(meth));
12655   ins_pipe(pipe_slow);
12656 %}
12657 
12658 // Call runtime without safepoint
12659 instruct CallLeafDirect(method meth)
12660 %{
12661   match(CallLeaf);
12662   effect(USE meth);
12663 
12664   ins_cost(300);
12665   format %{ "call_leaf,runtime " %}
12666   ins_encode(clear_avx, Java_To_Runtime(meth));
12667   ins_pipe(pipe_slow);
12668 %}
12669 
12670 // Call runtime without safepoint
12671 // entry point is null, target holds the address to call
12672 instruct CallLeafNoFPInDirect(rRegP target)
12673 %{
12674   predicate(n->as_Call()->entry_point() == NULL);
12675   match(CallLeafNoFP target);
12676 
12677   ins_cost(300);
12678   format %{ "call_leaf_nofp,runtime indirect " %}
12679   ins_encode %{
12680      __ call($target$$Register);
12681   %}
12682 
12683   ins_pipe(pipe_slow);
12684 %}
12685 
12686 instruct CallLeafNoFPDirect(method meth)
12687 %{
12688   predicate(n->as_Call()->entry_point() != NULL);
12689   match(CallLeafNoFP);
12690   effect(USE meth);
12691 
12692   ins_cost(300);
12693   format %{ "call_leaf_nofp,runtime " %}
12694   ins_encode(clear_avx, Java_To_Runtime(meth));
12695   ins_pipe(pipe_slow);
12696 %}
12697 
12698 // Return Instruction
12699 // Remove the return address & jump to it.
12700 // Notice: We always emit a nop after a ret to make sure there is room
12701 // for safepoint patching
12702 instruct Ret()
12703 %{
12704   match(Return);
12705 
12706   format %{ "ret" %}
12707   opcode(0xC3);
12708   ins_encode(OpcP);
12709   ins_pipe(pipe_jmp);
12710 %}
12711 
12712 // Tail Call; Jump from runtime stub to Java code.
12713 // Also known as an 'interprocedural jump'.
12714 // Target of jump will eventually return to caller.
12715 // TailJump below removes the return address.
12716 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12717 %{
12718   match(TailCall jump_target method_oop);
12719 
12720   ins_cost(300);
12721   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12722   opcode(0xFF, 0x4); /* Opcode FF /4 */
12723   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12724   ins_pipe(pipe_jmp);
12725 %}
12726 
12727 // Tail Jump; remove the return address; jump to target.
12728 // TailCall above leaves the return address around.
12729 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12730 %{
12731   match(TailJump jump_target ex_oop);
12732 
12733   ins_cost(300);
12734   format %{ "popq    rdx\t# pop return address\n\t"
12735             "jmp     $jump_target" %}
12736   opcode(0xFF, 0x4); /* Opcode FF /4 */
12737   ins_encode(Opcode(0x5a), // popq rdx
12738              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12739   ins_pipe(pipe_jmp);
12740 %}
12741 
12742 // Create exception oop: created by stack-crawling runtime code.
12743 // Created exception is now available to this handler, and is setup
12744 // just prior to jumping to this handler.  No code emitted.
12745 instruct CreateException(rax_RegP ex_oop)
12746 %{
12747   match(Set ex_oop (CreateEx));
12748 
12749   size(0);
12750   // use the following format syntax
12751   format %{ "# exception oop is in rax; no code emitted" %}
12752   ins_encode();
12753   ins_pipe(empty);
12754 %}
12755 
12756 // Rethrow exception:
12757 // The exception oop will come in the first argument position.
12758 // Then JUMP (not call) to the rethrow stub code.
12759 instruct RethrowException()
12760 %{
12761   match(Rethrow);
12762 
12763   // use the following format syntax
12764   format %{ "jmp     rethrow_stub" %}
12765   ins_encode(enc_rethrow);
12766   ins_pipe(pipe_jmp);
12767 %}
12768 
12769 //
12770 // Execute ZGC load barrier (strong) slow path
12771 //
12772 
12773 // When running without XMM regs
12774 instruct loadBarrierSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{
12775 
12776   match(Set dst (LoadBarrierSlowReg mem));
12777   predicate(MaxVectorSize < 16);
12778 
12779   effect(DEF dst, KILL cr);
12780 
12781   format %{"LoadBarrierSlowRegNoVec $dst, $mem" %}
12782   ins_encode %{
12783 #if INCLUDE_ZGC
12784     Register d = $dst$$Register;
12785     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12786 
12787     assert(d != r12, "Can't be R12!");
12788     assert(d != r15, "Can't be R15!");
12789     assert(d != rsp, "Can't be RSP!");
12790 
12791     __ lea(d, $mem$$Address);
12792     __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
12793 #else
12794     ShouldNotReachHere();
12795 #endif
12796   %}
12797   ins_pipe(pipe_slow);
12798 %}
12799 
12800 // For XMM and YMM enabled processors
12801 instruct loadBarrierSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr,
12802                                      rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
12803                                      rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
12804                                      rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
12805                                      rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
12806 
12807   match(Set dst (LoadBarrierSlowReg mem));
12808   predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16));
12809 
12810   effect(DEF dst, KILL cr,
12811          KILL x0, KILL x1, KILL x2, KILL x3,
12812          KILL x4, KILL x5, KILL x6, KILL x7,
12813          KILL x8, KILL x9, KILL x10, KILL x11,
12814          KILL x12, KILL x13, KILL x14, KILL x15);
12815 
12816   format %{"LoadBarrierSlowRegXmm $dst, $mem" %}
12817   ins_encode %{
12818 #if INCLUDE_ZGC
12819     Register d = $dst$$Register;
12820     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12821 
12822     assert(d != r12, "Can't be R12!");
12823     assert(d != r15, "Can't be R15!");
12824     assert(d != rsp, "Can't be RSP!");
12825 
12826     __ lea(d, $mem$$Address);
12827     __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
12828 #else
12829     ShouldNotReachHere();
12830 #endif
12831   %}
12832   ins_pipe(pipe_slow);
12833 %}
12834 
12835 // For ZMM enabled processors
12836 instruct loadBarrierSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr,
12837                                rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
12838                                rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
12839                                rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
12840                                rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
12841                                rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
12842                                rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
12843                                rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
12844                                rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
12845 
12846   match(Set dst (LoadBarrierSlowReg mem));
12847   predicate((UseAVX == 3) && (MaxVectorSize >= 16));
12848 
12849   effect(DEF dst, KILL cr,
12850          KILL x0, KILL x1, KILL x2, KILL x3,
12851          KILL x4, KILL x5, KILL x6, KILL x7,
12852          KILL x8, KILL x9, KILL x10, KILL x11,
12853          KILL x12, KILL x13, KILL x14, KILL x15,
12854          KILL x16, KILL x17, KILL x18, KILL x19,
12855          KILL x20, KILL x21, KILL x22, KILL x23,
12856          KILL x24, KILL x25, KILL x26, KILL x27,
12857          KILL x28, KILL x29, KILL x30, KILL x31);
12858 
12859   format %{"LoadBarrierSlowRegZmm $dst, $mem" %}
12860   ins_encode %{
12861 #if INCLUDE_ZGC
12862     Register d = $dst$$Register;
12863     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12864 
12865     assert(d != r12, "Can't be R12!");
12866     assert(d != r15, "Can't be R15!");
12867     assert(d != rsp, "Can't be RSP!");
12868 
12869     __ lea(d, $mem$$Address);
12870     __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
12871 #else
12872     ShouldNotReachHere();
12873 #endif
12874   %}
12875   ins_pipe(pipe_slow);
12876 %}
12877 
12878 //
12879 // Execute ZGC load barrier (weak) slow path
12880 //
12881 
12882 // When running without XMM regs
12883 instruct loadBarrierWeakSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{
12884 
12885   match(Set dst (LoadBarrierSlowReg mem));
12886   predicate(MaxVectorSize < 16);
12887 
12888   effect(DEF dst, KILL cr);
12889 
12890   format %{"LoadBarrierSlowRegNoVec $dst, $mem" %}
12891   ins_encode %{
12892 #if INCLUDE_ZGC
12893     Register d = $dst$$Register;
12894     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12895 
12896     assert(d != r12, "Can't be R12!");
12897     assert(d != r15, "Can't be R15!");
12898     assert(d != rsp, "Can't be RSP!");
12899 
12900     __ lea(d, $mem$$Address);
12901     __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
12902 #else
12903     ShouldNotReachHere();
12904 #endif
12905   %}
12906   ins_pipe(pipe_slow);
12907 %}
12908 
12909 // For XMM and YMM enabled processors
12910 instruct loadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr,
12911                                          rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
12912                                          rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
12913                                          rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
12914                                          rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
12915 
12916   match(Set dst (LoadBarrierWeakSlowReg mem));
12917   predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16));
12918 
12919   effect(DEF dst, KILL cr,
12920          KILL x0, KILL x1, KILL x2, KILL x3,
12921          KILL x4, KILL x5, KILL x6, KILL x7,
12922          KILL x8, KILL x9, KILL x10, KILL x11,
12923          KILL x12, KILL x13, KILL x14, KILL x15);
12924 
12925   format %{"LoadBarrierWeakSlowRegXmm $dst, $mem" %}
12926   ins_encode %{
12927 #if INCLUDE_ZGC
12928     Register d = $dst$$Register;
12929     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12930 
12931     assert(d != r12, "Can't be R12!");
12932     assert(d != r15, "Can't be R15!");
12933     assert(d != rsp, "Can't be RSP!");
12934 
12935     __ lea(d,$mem$$Address);
12936     __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
12937 #else
12938     ShouldNotReachHere();
12939 #endif
12940   %}
12941   ins_pipe(pipe_slow);
12942 %}
12943 
12944 // For ZMM enabled processors
12945 instruct loadBarrierWeakSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr,
12946                                    rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
12947                                    rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
12948                                    rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
12949                                    rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
12950                                    rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
12951                                    rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
12952                                    rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
12953                                    rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
12954 
12955   match(Set dst (LoadBarrierWeakSlowReg mem));
12956   predicate((UseAVX == 3) && (MaxVectorSize >= 16));
12957 
12958   effect(DEF dst, KILL cr,
12959          KILL x0, KILL x1, KILL x2, KILL x3,
12960          KILL x4, KILL x5, KILL x6, KILL x7,
12961          KILL x8, KILL x9, KILL x10, KILL x11,
12962          KILL x12, KILL x13, KILL x14, KILL x15,
12963          KILL x16, KILL x17, KILL x18, KILL x19,
12964          KILL x20, KILL x21, KILL x22, KILL x23,
12965          KILL x24, KILL x25, KILL x26, KILL x27,
12966          KILL x28, KILL x29, KILL x30, KILL x31);
12967 
12968   format %{"LoadBarrierWeakSlowRegZmm $dst, $mem" %}
12969   ins_encode %{
12970 #if INCLUDE_ZGC
12971     Register d = $dst$$Register;
12972     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12973 
12974     assert(d != r12, "Can't be R12!");
12975     assert(d != r15, "Can't be R15!");
12976     assert(d != rsp, "Can't be RSP!");
12977 
12978     __ lea(d,$mem$$Address);
12979     __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
12980 #else
12981     ShouldNotReachHere();
12982 #endif
12983   %}
12984   ins_pipe(pipe_slow);
12985 %}
12986 
12987 // ============================================================================
12988 // This name is KNOWN by the ADLC and cannot be changed.
12989 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12990 // for this guy.
12991 instruct tlsLoadP(r15_RegP dst) %{
12992   match(Set dst (ThreadLocal));
12993   effect(DEF dst);
12994 
12995   size(0);
12996   format %{ "# TLS is in R15" %}
12997   ins_encode( /*empty encoding*/ );
12998   ins_pipe(ialu_reg_reg);
12999 %}
13000 
13001 
13002 //----------PEEPHOLE RULES-----------------------------------------------------
13003 // These must follow all instruction definitions as they use the names
13004 // defined in the instructions definitions.
13005 //
13006 // peepmatch ( root_instr_name [preceding_instruction]* );
13007 //
13008 // peepconstraint %{
13009 // (instruction_number.operand_name relational_op instruction_number.operand_name
13010 //  [, ...] );
13011 // // instruction numbers are zero-based using left to right order in peepmatch
13012 //
13013 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13014 // // provide an instruction_number.operand_name for each operand that appears
13015 // // in the replacement instruction's match rule
13016 //
13017 // ---------VM FLAGS---------------------------------------------------------
13018 //
13019 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13020 //
13021 // Each peephole rule is given an identifying number starting with zero and
13022 // increasing by one in the order seen by the parser.  An individual peephole
13023 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13024 // on the command-line.
13025 //
13026 // ---------CURRENT LIMITATIONS----------------------------------------------
13027 //
13028 // Only match adjacent instructions in same basic block
13029 // Only equality constraints
13030 // Only constraints between operands, not (0.dest_reg == RAX_enc)
13031 // Only one replacement instruction
13032 //
13033 // ---------EXAMPLE----------------------------------------------------------
13034 //
13035 // // pertinent parts of existing instructions in architecture description
13036 // instruct movI(rRegI dst, rRegI src)
13037 // %{
13038 //   match(Set dst (CopyI src));
13039 // %}
13040 //
13041 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
13042 // %{
13043 //   match(Set dst (AddI dst src));
13044 //   effect(KILL cr);
13045 // %}
13046 //
13047 // // Change (inc mov) to lea
13048 // peephole %{
13049 //   // increment preceeded by register-register move
13050 //   peepmatch ( incI_rReg movI );
13051 //   // require that the destination register of the increment
13052 //   // match the destination register of the move
13053 //   peepconstraint ( 0.dst == 1.dst );
13054 //   // construct a replacement instruction that sets
13055 //   // the destination to ( move's source register + one )
13056 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13057 // %}
13058 //
13059 
13060 // Implementation no longer uses movX instructions since
13061 // machine-independent system no longer uses CopyX nodes.
13062 //
13063 // peephole
13064 // %{
13065 //   peepmatch (incI_rReg movI);
13066 //   peepconstraint (0.dst == 1.dst);
13067 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13068 // %}
13069 
13070 // peephole
13071 // %{
13072 //   peepmatch (decI_rReg movI);
13073 //   peepconstraint (0.dst == 1.dst);
13074 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13075 // %}
13076 
13077 // peephole
13078 // %{
13079 //   peepmatch (addI_rReg_imm movI);
13080 //   peepconstraint (0.dst == 1.dst);
13081 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13082 // %}
13083 
13084 // peephole
13085 // %{
13086 //   peepmatch (incL_rReg movL);
13087 //   peepconstraint (0.dst == 1.dst);
13088 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13089 // %}
13090 
13091 // peephole
13092 // %{
13093 //   peepmatch (decL_rReg movL);
13094 //   peepconstraint (0.dst == 1.dst);
13095 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13096 // %}
13097 
13098 // peephole
13099 // %{
13100 //   peepmatch (addL_rReg_imm movL);
13101 //   peepconstraint (0.dst == 1.dst);
13102 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13103 // %}
13104 
13105 // peephole
13106 // %{
13107 //   peepmatch (addP_rReg_imm movP);
13108 //   peepconstraint (0.dst == 1.dst);
13109 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
13110 // %}
13111 
13112 // // Change load of spilled value to only a spill
13113 // instruct storeI(memory mem, rRegI src)
13114 // %{
13115 //   match(Set mem (StoreI mem src));
13116 // %}
13117 //
13118 // instruct loadI(rRegI dst, memory mem)
13119 // %{
13120 //   match(Set dst (LoadI mem));
13121 // %}
13122 //
13123 
13124 peephole
13125 %{
13126   peepmatch (loadI storeI);
13127   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13128   peepreplace (storeI(1.mem 1.mem 1.src));
13129 %}
13130 
13131 peephole
13132 %{
13133   peepmatch (loadL storeL);
13134   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13135   peepreplace (storeL(1.mem 1.mem 1.src));
13136 %}
13137 
13138 //----------SMARTSPILL RULES---------------------------------------------------
13139 // These must follow all instruction definitions as they use the names
13140 // defined in the instructions definitions.