1 //
   2 // Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // AMD64 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
  64 // used as byte registers)
  65 
  66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
  67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
  69 
  70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
  71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
  72 
  73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
  74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
  75 
  76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
  77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
  78 
  79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
  80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
  81 
  82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
  83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
  84 
  85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
  86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
  87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
  88 
  89 #ifdef _WIN64
  90 
  91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
  92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
  93 
  94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
  95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
  96 
  97 #else
  98 
  99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
 100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
 101 
 102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
 103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
 104 
 105 #endif
 106 
 107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
 108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
 109 
 110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
 111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
 112 
 113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
 114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
 115 
 116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
 117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 118 
 119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
 120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
 121 
 122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
 123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
 124 
 125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
 126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
 127 
 128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
 130 
 131 
 132 // Floating Point Registers
 133 
 134 // Specify priority of register selection within phases of register
 135 // allocation.  Highest priority is first.  A useful heuristic is to
 136 // give registers a low priority when they are required by machine
 137 // instructions, like EAX and EDX on I486, and choose no-save registers
 138 // before save-on-call, & save-on-call before save-on-entry.  Registers
 139 // which participate in fixed calling sequences should come last.
 140 // Registers which are used as pairs must fall on an even boundary.
 141 
 142 alloc_class chunk0(R10,         R10_H,
 143                    R11,         R11_H,
 144                    R8,          R8_H,
 145                    R9,          R9_H,
 146                    R12,         R12_H,
 147                    RCX,         RCX_H,
 148                    RBX,         RBX_H,
 149                    RDI,         RDI_H,
 150                    RDX,         RDX_H,
 151                    RSI,         RSI_H,
 152                    RAX,         RAX_H,
 153                    RBP,         RBP_H,
 154                    R13,         R13_H,
 155                    R14,         R14_H,
 156                    R15,         R15_H,
 157                    RSP,         RSP_H);
 158 
 159 
 160 //----------Architecture Description Register Classes--------------------------
 161 // Several register classes are automatically defined based upon information in
 162 // this architecture description.
 163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 164 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 165 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 166 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 167 //
 168 
 169 // Empty register class.
 170 reg_class no_reg();
 171 
 172 // Class for all pointer registers (including RSP and RBP)
 173 reg_class any_reg_with_rbp(RAX, RAX_H,
 174                            RDX, RDX_H,
 175                            RBP, RBP_H,
 176                            RDI, RDI_H,
 177                            RSI, RSI_H,
 178                            RCX, RCX_H,
 179                            RBX, RBX_H,
 180                            RSP, RSP_H,
 181                            R8,  R8_H,
 182                            R9,  R9_H,
 183                            R10, R10_H,
 184                            R11, R11_H,
 185                            R12, R12_H,
 186                            R13, R13_H,
 187                            R14, R14_H,
 188                            R15, R15_H);
 189 
 190 // Class for all pointer registers (including RSP, but excluding RBP)
 191 reg_class any_reg_no_rbp(RAX, RAX_H,
 192                          RDX, RDX_H,
 193                          RDI, RDI_H,
 194                          RSI, RSI_H,
 195                          RCX, RCX_H,
 196                          RBX, RBX_H,
 197                          RSP, RSP_H,
 198                          R8,  R8_H,
 199                          R9,  R9_H,
 200                          R10, R10_H,
 201                          R11, R11_H,
 202                          R12, R12_H,
 203                          R13, R13_H,
 204                          R14, R14_H,
 205                          R15, R15_H);
 206 
 207 // Dynamic register class that selects at runtime between register classes
 208 // any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer).
 209 // Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp;
 210 reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %});
 211 
 212 // Class for all pointer registers (excluding RSP)
 213 reg_class ptr_reg_with_rbp(RAX, RAX_H,
 214                            RDX, RDX_H,
 215                            RBP, RBP_H,
 216                            RDI, RDI_H,
 217                            RSI, RSI_H,
 218                            RCX, RCX_H,
 219                            RBX, RBX_H,
 220                            R8,  R8_H,
 221                            R9,  R9_H,
 222                            R10, R10_H,
 223                            R11, R11_H,
 224                            R13, R13_H,
 225                            R14, R14_H);
 226 
 227 // Class for all pointer registers (excluding RSP and RBP)
 228 reg_class ptr_reg_no_rbp(RAX, RAX_H,
 229                          RDX, RDX_H,
 230                          RDI, RDI_H,
 231                          RSI, RSI_H,
 232                          RCX, RCX_H,
 233                          RBX, RBX_H,
 234                          R8,  R8_H,
 235                          R9,  R9_H,
 236                          R10, R10_H,
 237                          R11, R11_H,
 238                          R13, R13_H,
 239                          R14, R14_H);
 240 
 241 // Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp.
 242 reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %});
 243 
 244 // Class for all pointer registers (excluding RAX and RSP)
 245 reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H,
 246                                   RBP, RBP_H,
 247                                   RDI, RDI_H,
 248                                   RSI, RSI_H,
 249                                   RCX, RCX_H,
 250                                   RBX, RBX_H,
 251                                   R8,  R8_H,
 252                                   R9,  R9_H,
 253                                   R10, R10_H,
 254                                   R11, R11_H,
 255                                   R13, R13_H,
 256                                   R14, R14_H);
 257 
 258 // Class for all pointer registers (excluding RAX, RSP, and RBP)
 259 reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H,
 260                                 RDI, RDI_H,
 261                                 RSI, RSI_H,
 262                                 RCX, RCX_H,
 263                                 RBX, RBX_H,
 264                                 R8,  R8_H,
 265                                 R9,  R9_H,
 266                                 R10, R10_H,
 267                                 R11, R11_H,
 268                                 R13, R13_H,
 269                                 R14, R14_H);
 270 
 271 // Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp.
 272 reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %});
 273 
 274 // Class for all pointer registers (excluding RAX, RBX, and RSP)
 275 reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H,
 276                                       RBP, RBP_H,
 277                                       RDI, RDI_H,
 278                                       RSI, RSI_H,
 279                                       RCX, RCX_H,
 280                                       R8,  R8_H,
 281                                       R9,  R9_H,
 282                                       R10, R10_H,
 283                                       R11, R11_H,
 284                                       R13, R13_H,
 285                                       R14, R14_H);
 286 
 287 // Class for all pointer registers (excluding RAX, RBX, RSP, and RBP)
 288 reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H,
 289                                     RDI, RDI_H,
 290                                     RSI, RSI_H,
 291                                     RCX, RCX_H,
 292                                     R8,  R8_H,
 293                                     R9,  R9_H,
 294                                     R10, R10_H,
 295                                     R11, R11_H,
 296                                     R13, R13_H,
 297                                     R14, R14_H);
 298 
 299 // Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp.
 300 reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
 301 
 302 // Singleton class for RAX pointer register
 303 reg_class ptr_rax_reg(RAX, RAX_H);
 304 
 305 // Singleton class for RBX pointer register
 306 reg_class ptr_rbx_reg(RBX, RBX_H);
 307 
 308 // Singleton class for RSI pointer register
 309 reg_class ptr_rsi_reg(RSI, RSI_H);
 310 
 311 // Singleton class for RDI pointer register
 312 reg_class ptr_rdi_reg(RDI, RDI_H);
 313 
 314 // Singleton class for stack pointer
 315 reg_class ptr_rsp_reg(RSP, RSP_H);
 316 
 317 // Singleton class for TLS pointer
 318 reg_class ptr_r15_reg(R15, R15_H);
 319 
 320 // Class for all long registers (excluding RSP)
 321 reg_class long_reg_with_rbp(RAX, RAX_H,
 322                             RDX, RDX_H,
 323                             RBP, RBP_H,
 324                             RDI, RDI_H,
 325                             RSI, RSI_H,
 326                             RCX, RCX_H,
 327                             RBX, RBX_H,
 328                             R8,  R8_H,
 329                             R9,  R9_H,
 330                             R10, R10_H,
 331                             R11, R11_H,
 332                             R13, R13_H,
 333                             R14, R14_H);
 334 
 335 // Class for all long registers (excluding RSP and RBP)
 336 reg_class long_reg_no_rbp(RAX, RAX_H,
 337                           RDX, RDX_H,
 338                           RDI, RDI_H,
 339                           RSI, RSI_H,
 340                           RCX, RCX_H,
 341                           RBX, RBX_H,
 342                           R8,  R8_H,
 343                           R9,  R9_H,
 344                           R10, R10_H,
 345                           R11, R11_H,
 346                           R13, R13_H,
 347                           R14, R14_H);
 348 
 349 // Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp.
 350 reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %});
 351 
 352 // Class for all long registers (excluding RAX, RDX and RSP)
 353 reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H,
 354                                        RDI, RDI_H,
 355                                        RSI, RSI_H,
 356                                        RCX, RCX_H,
 357                                        RBX, RBX_H,
 358                                        R8,  R8_H,
 359                                        R9,  R9_H,
 360                                        R10, R10_H,
 361                                        R11, R11_H,
 362                                        R13, R13_H,
 363                                        R14, R14_H);
 364 
 365 // Class for all long registers (excluding RAX, RDX, RSP, and RBP)
 366 reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H,
 367                                      RSI, RSI_H,
 368                                      RCX, RCX_H,
 369                                      RBX, RBX_H,
 370                                      R8,  R8_H,
 371                                      R9,  R9_H,
 372                                      R10, R10_H,
 373                                      R11, R11_H,
 374                                      R13, R13_H,
 375                                      R14, R14_H);
 376 
 377 // Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp.
 378 reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 379 
 380 // Class for all long registers (excluding RCX and RSP)
 381 reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H,
 382                                    RDI, RDI_H,
 383                                    RSI, RSI_H,
 384                                    RAX, RAX_H,
 385                                    RDX, RDX_H,
 386                                    RBX, RBX_H,
 387                                    R8,  R8_H,
 388                                    R9,  R9_H,
 389                                    R10, R10_H,
 390                                    R11, R11_H,
 391                                    R13, R13_H,
 392                                    R14, R14_H);
 393 
 394 // Class for all long registers (excluding RCX, RSP, and RBP)
 395 reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H,
 396                                  RSI, RSI_H,
 397                                  RAX, RAX_H,
 398                                  RDX, RDX_H,
 399                                  RBX, RBX_H,
 400                                  R8,  R8_H,
 401                                  R9,  R9_H,
 402                                  R10, R10_H,
 403                                  R11, R11_H,
 404                                  R13, R13_H,
 405                                  R14, R14_H);
 406 
 407 // Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp.
 408 reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
 409 
 410 // Singleton class for RAX long register
 411 reg_class long_rax_reg(RAX, RAX_H);
 412 
 413 // Singleton class for RCX long register
 414 reg_class long_rcx_reg(RCX, RCX_H);
 415 
 416 // Singleton class for RDX long register
 417 reg_class long_rdx_reg(RDX, RDX_H);
 418 
 419 // Class for all int registers (excluding RSP)
 420 reg_class int_reg_with_rbp(RAX,
 421                            RDX,
 422                            RBP,
 423                            RDI,
 424                            RSI,
 425                            RCX,
 426                            RBX,
 427                            R8,
 428                            R9,
 429                            R10,
 430                            R11,
 431                            R13,
 432                            R14);
 433 
 434 // Class for all int registers (excluding RSP and RBP)
 435 reg_class int_reg_no_rbp(RAX,
 436                          RDX,
 437                          RDI,
 438                          RSI,
 439                          RCX,
 440                          RBX,
 441                          R8,
 442                          R9,
 443                          R10,
 444                          R11,
 445                          R13,
 446                          R14);
 447 
 448 // Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp.
 449 reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %});
 450 
 451 // Class for all int registers (excluding RCX and RSP)
 452 reg_class int_no_rcx_reg_with_rbp(RAX,
 453                                   RDX,
 454                                   RBP,
 455                                   RDI,
 456                                   RSI,
 457                                   RBX,
 458                                   R8,
 459                                   R9,
 460                                   R10,
 461                                   R11,
 462                                   R13,
 463                                   R14);
 464 
 465 // Class for all int registers (excluding RCX, RSP, and RBP)
 466 reg_class int_no_rcx_reg_no_rbp(RAX,
 467                                 RDX,
 468                                 RDI,
 469                                 RSI,
 470                                 RBX,
 471                                 R8,
 472                                 R9,
 473                                 R10,
 474                                 R11,
 475                                 R13,
 476                                 R14);
 477 
 478 // Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp.
 479 reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
 480 
 481 // Class for all int registers (excluding RAX, RDX, and RSP)
 482 reg_class int_no_rax_rdx_reg_with_rbp(RBP,
 483                                       RDI,
 484                                       RSI,
 485                                       RCX,
 486                                       RBX,
 487                                       R8,
 488                                       R9,
 489                                       R10,
 490                                       R11,
 491                                       R13,
 492                                       R14);
 493 
 494 // Class for all int registers (excluding RAX, RDX, RSP, and RBP)
 495 reg_class int_no_rax_rdx_reg_no_rbp(RDI,
 496                                     RSI,
 497                                     RCX,
 498                                     RBX,
 499                                     R8,
 500                                     R9,
 501                                     R10,
 502                                     R11,
 503                                     R13,
 504                                     R14);
 505 
 506 // Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp.
 507 reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 508 
 509 // Singleton class for RAX int register
 510 reg_class int_rax_reg(RAX);
 511 
 512 // Singleton class for RBX int register
 513 reg_class int_rbx_reg(RBX);
 514 
 515 // Singleton class for RCX int register
 516 reg_class int_rcx_reg(RCX);
 517 
 518 // Singleton class for RCX int register
 519 reg_class int_rdx_reg(RDX);
 520 
 521 // Singleton class for RCX int register
 522 reg_class int_rdi_reg(RDI);
 523 
 524 // Singleton class for instruction pointer
 525 // reg_class ip_reg(RIP);
 526 
 527 %}
 528 
 529 source_hpp %{
 530 #if INCLUDE_ZGC
 531 #include "gc/z/zBarrierSetAssembler.hpp"
 532 #endif
 533 
 534 extern unsigned long followed_by_equals;
 535 extern unsigned long not_followed_by_equals;
 536 
 537 %}
 538 
 539 //----------SOURCE BLOCK-------------------------------------------------------
 540 // This is a block of C++ code which provides values, functions, and
 541 // definitions necessary in the rest of the architecture description
 542 source %{
 543   unsigned long followed_by_equals = 0;
 544   unsigned long not_followed_by_equals = 0;
 545 
 546 #define   RELOC_IMM64    Assembler::imm_operand
 547 #define   RELOC_DISP32   Assembler::disp32_operand
 548 
 549 #define __ _masm.
 550 
 551 static bool generate_vzeroupper(Compile* C) {
 552   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 553 }
 554 
 555 static int clear_avx_size() {
 556   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 557 }
 558 
 559 // !!!!! Special hack to get all types of calls to specify the byte offset
 560 //       from the start of the call to the point where the return address
 561 //       will point.
 562 int MachCallStaticJavaNode::ret_addr_offset()
 563 {
 564   int offset = 5; // 5 bytes from start of call to where return address points
 565   offset += clear_avx_size();
 566   return offset;
 567 }
 568 
 569 int MachCallDynamicJavaNode::ret_addr_offset()
 570 {
 571   int offset = 15; // 15 bytes from start of call to where return address points
 572   offset += clear_avx_size();
 573   return offset;
 574 }
 575 
 576 int MachCallRuntimeNode::ret_addr_offset() {
 577   int offset = 13; // movq r10,#addr; callq (r10)
 578   offset += clear_avx_size();
 579   return offset;
 580 }
 581 
 582 // Indicate if the safepoint node needs the polling page as an input,
 583 // it does if the polling page is more than disp32 away.
 584 bool SafePointNode::needs_polling_address_input()
 585 {
 586   return SafepointMechanism::uses_thread_local_poll() || Assembler::is_polling_page_far();
 587 }
 588 
 589 //
 590 // Compute padding required for nodes which need alignment
 591 //
 592 
 593 // The address of the call instruction needs to be 4-byte aligned to
 594 // ensure that it does not span a cache line so that it can be patched.
 595 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 596 {
 597   current_offset += clear_avx_size(); // skip vzeroupper
 598   current_offset += 1; // skip call opcode byte
 599   return align_up(current_offset, alignment_required()) - current_offset;
 600 }
 601 
 602 // The address of the call instruction needs to be 4-byte aligned to
 603 // ensure that it does not span a cache line so that it can be patched.
 604 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 605 {
 606   current_offset += clear_avx_size(); // skip vzeroupper
 607   current_offset += 11; // skip movq instruction + call opcode byte
 608   return align_up(current_offset, alignment_required()) - current_offset;
 609 }
 610 
 611 // EMIT_RM()
 612 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 613   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
 614   cbuf.insts()->emit_int8(c);
 615 }
 616 
 617 // EMIT_CC()
 618 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 619   unsigned char c = (unsigned char) (f1 | f2);
 620   cbuf.insts()->emit_int8(c);
 621 }
 622 
 623 // EMIT_OPCODE()
 624 void emit_opcode(CodeBuffer &cbuf, int code) {
 625   cbuf.insts()->emit_int8((unsigned char) code);
 626 }
 627 
 628 // EMIT_OPCODE() w/ relocation information
 629 void emit_opcode(CodeBuffer &cbuf,
 630                  int code, relocInfo::relocType reloc, int offset, int format)
 631 {
 632   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
 633   emit_opcode(cbuf, code);
 634 }
 635 
 636 // EMIT_D8()
 637 void emit_d8(CodeBuffer &cbuf, int d8) {
 638   cbuf.insts()->emit_int8((unsigned char) d8);
 639 }
 640 
 641 // EMIT_D16()
 642 void emit_d16(CodeBuffer &cbuf, int d16) {
 643   cbuf.insts()->emit_int16(d16);
 644 }
 645 
 646 // EMIT_D32()
 647 void emit_d32(CodeBuffer &cbuf, int d32) {
 648   cbuf.insts()->emit_int32(d32);
 649 }
 650 
 651 // EMIT_D64()
 652 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
 653   cbuf.insts()->emit_int64(d64);
 654 }
 655 
 656 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 657 void emit_d32_reloc(CodeBuffer& cbuf,
 658                     int d32,
 659                     relocInfo::relocType reloc,
 660                     int format)
 661 {
 662   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
 663   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 664   cbuf.insts()->emit_int32(d32);
 665 }
 666 
 667 // emit 32 bit value and construct relocation entry from RelocationHolder
 668 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
 669 #ifdef ASSERT
 670   if (rspec.reloc()->type() == relocInfo::oop_type &&
 671       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
 672     assert(Universe::heap()->is_in_reserved((address)(intptr_t)d32), "should be real oop");
 673     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop((intptr_t)d32))), "cannot embed scavengable oops in code");
 674   }
 675 #endif
 676   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 677   cbuf.insts()->emit_int32(d32);
 678 }
 679 
 680 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
 681   address next_ip = cbuf.insts_end() + 4;
 682   emit_d32_reloc(cbuf, (int) (addr - next_ip),
 683                  external_word_Relocation::spec(addr),
 684                  RELOC_DISP32);
 685 }
 686 
 687 
 688 // emit 64 bit value and construct relocation entry from relocInfo::relocType
 689 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
 690   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 691   cbuf.insts()->emit_int64(d64);
 692 }
 693 
 694 // emit 64 bit value and construct relocation entry from RelocationHolder
 695 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
 696 #ifdef ASSERT
 697   if (rspec.reloc()->type() == relocInfo::oop_type &&
 698       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
 699     assert(Universe::heap()->is_in_reserved((address)d64), "should be real oop");
 700     assert(oopDesc::is_oop(cast_to_oop(d64)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d64))),
 701            "cannot embed scavengable oops in code");
 702   }
 703 #endif
 704   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 705   cbuf.insts()->emit_int64(d64);
 706 }
 707 
 708 // Access stack slot for load or store
 709 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
 710 {
 711   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
 712   if (-0x80 <= disp && disp < 0x80) {
 713     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
 714     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 715     emit_d8(cbuf, disp);     // Displacement  // R/M byte
 716   } else {
 717     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
 718     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
 719     emit_d32(cbuf, disp);     // Displacement // R/M byte
 720   }
 721 }
 722 
 723    // rRegI ereg, memory mem) %{    // emit_reg_mem
 724 void encode_RegMem(CodeBuffer &cbuf,
 725                    int reg,
 726                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
 727 {
 728   assert(disp_reloc == relocInfo::none, "cannot have disp");
 729   int regenc = reg & 7;
 730   int baseenc = base & 7;
 731   int indexenc = index & 7;
 732 
 733   // There is no index & no scale, use form without SIB byte
 734   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
 735     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 736     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 737       emit_rm(cbuf, 0x0, regenc, baseenc); // *
 738     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 739       // If 8-bit displacement, mode 0x1
 740       emit_rm(cbuf, 0x1, regenc, baseenc); // *
 741       emit_d8(cbuf, disp);
 742     } else {
 743       // If 32-bit displacement
 744       if (base == -1) { // Special flag for absolute address
 745         emit_rm(cbuf, 0x0, regenc, 0x5); // *
 746         if (disp_reloc != relocInfo::none) {
 747           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 748         } else {
 749           emit_d32(cbuf, disp);
 750         }
 751       } else {
 752         // Normal base + offset
 753         emit_rm(cbuf, 0x2, regenc, baseenc); // *
 754         if (disp_reloc != relocInfo::none) {
 755           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 756         } else {
 757           emit_d32(cbuf, disp);
 758         }
 759       }
 760     }
 761   } else {
 762     // Else, encode with the SIB byte
 763     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
 764     if (disp == 0 && base != RBP_enc && base != R13_enc) {
 765       // If no displacement
 766       emit_rm(cbuf, 0x0, regenc, 0x4); // *
 767       emit_rm(cbuf, scale, indexenc, baseenc);
 768     } else {
 769       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
 770         // If 8-bit displacement, mode 0x1
 771         emit_rm(cbuf, 0x1, regenc, 0x4); // *
 772         emit_rm(cbuf, scale, indexenc, baseenc);
 773         emit_d8(cbuf, disp);
 774       } else {
 775         // If 32-bit displacement
 776         if (base == 0x04 ) {
 777           emit_rm(cbuf, 0x2, regenc, 0x4);
 778           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
 779         } else {
 780           emit_rm(cbuf, 0x2, regenc, 0x4);
 781           emit_rm(cbuf, scale, indexenc, baseenc); // *
 782         }
 783         if (disp_reloc != relocInfo::none) {
 784           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
 785         } else {
 786           emit_d32(cbuf, disp);
 787         }
 788       }
 789     }
 790   }
 791 }
 792 
 793 // This could be in MacroAssembler but it's fairly C2 specific
 794 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 795   Label exit;
 796   __ jccb(Assembler::noParity, exit);
 797   __ pushf();
 798   //
 799   // comiss/ucomiss instructions set ZF,PF,CF flags and
 800   // zero OF,AF,SF for NaN values.
 801   // Fixup flags by zeroing ZF,PF so that compare of NaN
 802   // values returns 'less than' result (CF is set).
 803   // Leave the rest of flags unchanged.
 804   //
 805   //    7 6 5 4 3 2 1 0
 806   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 807   //    0 0 1 0 1 0 1 1   (0x2B)
 808   //
 809   __ andq(Address(rsp, 0), 0xffffff2b);
 810   __ popf();
 811   __ bind(exit);
 812 }
 813 
 814 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 815   Label done;
 816   __ movl(dst, -1);
 817   __ jcc(Assembler::parity, done);
 818   __ jcc(Assembler::below, done);
 819   __ setb(Assembler::notEqual, dst);
 820   __ movzbl(dst, dst);
 821   __ bind(done);
 822 }
 823 
 824 
 825 //=============================================================================
 826 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 827 
 828 int Compile::ConstantTable::calculate_table_base_offset() const {
 829   return 0;  // absolute addressing, no offset
 830 }
 831 
 832 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 833 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 834   ShouldNotReachHere();
 835 }
 836 
 837 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 838   // Empty encoding
 839 }
 840 
 841 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 842   return 0;
 843 }
 844 
 845 #ifndef PRODUCT
 846 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 847   st->print("# MachConstantBaseNode (empty encoding)");
 848 }
 849 #endif
 850 
 851 
 852 //=============================================================================
 853 #ifndef PRODUCT
 854 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 855   Compile* C = ra_->C;
 856 
 857   int framesize = C->frame_size_in_bytes();
 858   int bangsize = C->bang_size_in_bytes();
 859   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 860   // Remove wordSize for return addr which is already pushed.
 861   framesize -= wordSize;
 862 
 863   if (C->need_stack_bang(bangsize)) {
 864     framesize -= wordSize;
 865     st->print("# stack bang (%d bytes)", bangsize);
 866     st->print("\n\t");
 867     st->print("pushq   rbp\t# Save rbp");
 868     if (PreserveFramePointer) {
 869         st->print("\n\t");
 870         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 871     }
 872     if (framesize) {
 873       st->print("\n\t");
 874       st->print("subq    rsp, #%d\t# Create frame",framesize);
 875     }
 876   } else {
 877     st->print("subq    rsp, #%d\t# Create frame",framesize);
 878     st->print("\n\t");
 879     framesize -= wordSize;
 880     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 881     if (PreserveFramePointer) {
 882       st->print("\n\t");
 883       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 884       if (framesize > 0) {
 885         st->print("\n\t");
 886         st->print("addq    rbp, #%d", framesize);
 887       }
 888     }
 889   }
 890 
 891   if (VerifyStackAtCalls) {
 892     st->print("\n\t");
 893     framesize -= wordSize;
 894     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 895 #ifdef ASSERT
 896     st->print("\n\t");
 897     st->print("# stack alignment check");
 898 #endif
 899   }
 900   st->cr();
 901 }
 902 #endif
 903 
 904 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 905   Compile* C = ra_->C;
 906   MacroAssembler _masm(&cbuf);
 907 
 908   int framesize = C->frame_size_in_bytes();
 909   int bangsize = C->bang_size_in_bytes();
 910 
 911   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, false);
 912 
 913   C->set_frame_complete(cbuf.insts_size());
 914 
 915   if (C->has_mach_constant_base_node()) {
 916     // NOTE: We set the table base offset here because users might be
 917     // emitted before MachConstantBaseNode.
 918     Compile::ConstantTable& constant_table = C->constant_table();
 919     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 920   }
 921 }
 922 
 923 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 924 {
 925   return MachNode::size(ra_); // too many variables; just compute it
 926                               // the hard way
 927 }
 928 
 929 int MachPrologNode::reloc() const
 930 {
 931   return 0; // a large enough number
 932 }
 933 
 934 //=============================================================================
 935 #ifndef PRODUCT
 936 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 937 {
 938   Compile* C = ra_->C;
 939   if (generate_vzeroupper(C)) {
 940     st->print("vzeroupper");
 941     st->cr(); st->print("\t");
 942   }
 943 
 944   int framesize = C->frame_size_in_bytes();
 945   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 946   // Remove word for return adr already pushed
 947   // and RBP
 948   framesize -= 2*wordSize;
 949 
 950   if (framesize) {
 951     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 952     st->print("\t");
 953   }
 954 
 955   st->print_cr("popq   rbp");
 956   if (do_polling() && C->is_method_compilation()) {
 957     st->print("\t");
 958     if (SafepointMechanism::uses_thread_local_poll()) {
 959       st->print_cr("movq   rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
 960                    "testl  rax, [rscratch1]\t"
 961                    "# Safepoint: poll for GC");
 962     } else if (Assembler::is_polling_page_far()) {
 963       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 964                    "testl  rax, [rscratch1]\t"
 965                    "# Safepoint: poll for GC");
 966     } else {
 967       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 968                    "# Safepoint: poll for GC");
 969     }
 970   }
 971 }
 972 #endif
 973 
 974 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 975 {
 976   Compile* C = ra_->C;
 977   MacroAssembler _masm(&cbuf);
 978 
 979   if (generate_vzeroupper(C)) {
 980     // Clear upper bits of YMM registers when current compiled code uses
 981     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 982     __ vzeroupper();
 983   }
 984 
 985   int framesize = C->frame_size_in_bytes();
 986   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 987   // Remove word for return adr already pushed
 988   // and RBP
 989   framesize -= 2*wordSize;
 990 
 991   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 992 
 993   if (framesize) {
 994     emit_opcode(cbuf, Assembler::REX_W);
 995     if (framesize < 0x80) {
 996       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 997       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 998       emit_d8(cbuf, framesize);
 999     } else {
1000       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
1001       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
1002       emit_d32(cbuf, framesize);
1003     }
1004   }
1005 
1006   // popq rbp
1007   emit_opcode(cbuf, 0x58 | RBP_enc);
1008 
1009   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1010     __ reserved_stack_check();
1011   }
1012 
1013   if (do_polling() && C->is_method_compilation()) {
1014     MacroAssembler _masm(&cbuf);
1015     if (SafepointMechanism::uses_thread_local_poll()) {
1016       __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
1017       __ relocate(relocInfo::poll_return_type);
1018       __ testl(rax, Address(rscratch1, 0));
1019     } else {
1020       AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1021       if (Assembler::is_polling_page_far()) {
1022         __ lea(rscratch1, polling_page);
1023         __ relocate(relocInfo::poll_return_type);
1024         __ testl(rax, Address(rscratch1, 0));
1025       } else {
1026         __ testl(rax, polling_page);
1027       }
1028     }
1029   }
1030 }
1031 
1032 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1033 {
1034   return MachNode::size(ra_); // too many variables; just compute it
1035                               // the hard way
1036 }
1037 
1038 int MachEpilogNode::reloc() const
1039 {
1040   return 2; // a large enough number
1041 }
1042 
1043 const Pipeline* MachEpilogNode::pipeline() const
1044 {
1045   return MachNode::pipeline_class();
1046 }
1047 
1048 int MachEpilogNode::safepoint_offset() const
1049 {
1050   return 0;
1051 }
1052 
1053 //=============================================================================
1054 
1055 enum RC {
1056   rc_bad,
1057   rc_int,
1058   rc_float,
1059   rc_stack
1060 };
1061 
1062 static enum RC rc_class(OptoReg::Name reg)
1063 {
1064   if( !OptoReg::is_valid(reg)  ) return rc_bad;
1065 
1066   if (OptoReg::is_stack(reg)) return rc_stack;
1067 
1068   VMReg r = OptoReg::as_VMReg(reg);
1069 
1070   if (r->is_Register()) return rc_int;
1071 
1072   assert(r->is_XMMRegister(), "must be");
1073   return rc_float;
1074 }
1075 
1076 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
1077 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
1078                           int src_hi, int dst_hi, uint ireg, outputStream* st);
1079 
1080 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1081                             int stack_offset, int reg, uint ireg, outputStream* st);
1082 
1083 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
1084                                       int dst_offset, uint ireg, outputStream* st) {
1085   if (cbuf) {
1086     MacroAssembler _masm(cbuf);
1087     switch (ireg) {
1088     case Op_VecS:
1089       __ movq(Address(rsp, -8), rax);
1090       __ movl(rax, Address(rsp, src_offset));
1091       __ movl(Address(rsp, dst_offset), rax);
1092       __ movq(rax, Address(rsp, -8));
1093       break;
1094     case Op_VecD:
1095       __ pushq(Address(rsp, src_offset));
1096       __ popq (Address(rsp, dst_offset));
1097       break;
1098     case Op_VecX:
1099       __ pushq(Address(rsp, src_offset));
1100       __ popq (Address(rsp, dst_offset));
1101       __ pushq(Address(rsp, src_offset+8));
1102       __ popq (Address(rsp, dst_offset+8));
1103       break;
1104     case Op_VecY:
1105       __ vmovdqu(Address(rsp, -32), xmm0);
1106       __ vmovdqu(xmm0, Address(rsp, src_offset));
1107       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1108       __ vmovdqu(xmm0, Address(rsp, -32));
1109       break;
1110     case Op_VecZ:
1111       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1112       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1113       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1114       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1115       break;
1116     default:
1117       ShouldNotReachHere();
1118     }
1119 #ifndef PRODUCT
1120   } else {
1121     switch (ireg) {
1122     case Op_VecS:
1123       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1124                 "movl    rax, [rsp + #%d]\n\t"
1125                 "movl    [rsp + #%d], rax\n\t"
1126                 "movq    rax, [rsp - #8]",
1127                 src_offset, dst_offset);
1128       break;
1129     case Op_VecD:
1130       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1131                 "popq    [rsp + #%d]",
1132                 src_offset, dst_offset);
1133       break;
1134      case Op_VecX:
1135       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
1136                 "popq    [rsp + #%d]\n\t"
1137                 "pushq   [rsp + #%d]\n\t"
1138                 "popq    [rsp + #%d]",
1139                 src_offset, dst_offset, src_offset+8, dst_offset+8);
1140       break;
1141     case Op_VecY:
1142       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1143                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1144                 "vmovdqu [rsp + #%d], xmm0\n\t"
1145                 "vmovdqu xmm0, [rsp - #32]",
1146                 src_offset, dst_offset);
1147       break;
1148     case Op_VecZ:
1149       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1150                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1151                 "vmovdqu [rsp + #%d], xmm0\n\t"
1152                 "vmovdqu xmm0, [rsp - #64]",
1153                 src_offset, dst_offset);
1154       break;
1155     default:
1156       ShouldNotReachHere();
1157     }
1158 #endif
1159   }
1160 }
1161 
1162 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1163                                        PhaseRegAlloc* ra_,
1164                                        bool do_size,
1165                                        outputStream* st) const {
1166   assert(cbuf != NULL || st  != NULL, "sanity");
1167   // Get registers to move
1168   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1169   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1170   OptoReg::Name dst_second = ra_->get_reg_second(this);
1171   OptoReg::Name dst_first = ra_->get_reg_first(this);
1172 
1173   enum RC src_second_rc = rc_class(src_second);
1174   enum RC src_first_rc = rc_class(src_first);
1175   enum RC dst_second_rc = rc_class(dst_second);
1176   enum RC dst_first_rc = rc_class(dst_first);
1177 
1178   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
1179          "must move at least 1 register" );
1180 
1181   if (src_first == dst_first && src_second == dst_second) {
1182     // Self copy, no move
1183     return 0;
1184   }
1185   if (bottom_type()->isa_vect() != NULL) {
1186     uint ireg = ideal_reg();
1187     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1188     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1189     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1190       // mem -> mem
1191       int src_offset = ra_->reg2offset(src_first);
1192       int dst_offset = ra_->reg2offset(dst_first);
1193       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
1194     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
1195       vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
1196     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1197       int stack_offset = ra_->reg2offset(dst_first);
1198       vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
1199     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
1200       int stack_offset = ra_->reg2offset(src_first);
1201       vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
1202     } else {
1203       ShouldNotReachHere();
1204     }
1205     return 0;
1206   }
1207   if (src_first_rc == rc_stack) {
1208     // mem ->
1209     if (dst_first_rc == rc_stack) {
1210       // mem -> mem
1211       assert(src_second != dst_first, "overlap");
1212       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1213           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1214         // 64-bit
1215         int src_offset = ra_->reg2offset(src_first);
1216         int dst_offset = ra_->reg2offset(dst_first);
1217         if (cbuf) {
1218           MacroAssembler _masm(cbuf);
1219           __ pushq(Address(rsp, src_offset));
1220           __ popq (Address(rsp, dst_offset));
1221 #ifndef PRODUCT
1222         } else {
1223           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1224                     "popq    [rsp + #%d]",
1225                      src_offset, dst_offset);
1226 #endif
1227         }
1228       } else {
1229         // 32-bit
1230         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1231         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1232         // No pushl/popl, so:
1233         int src_offset = ra_->reg2offset(src_first);
1234         int dst_offset = ra_->reg2offset(dst_first);
1235         if (cbuf) {
1236           MacroAssembler _masm(cbuf);
1237           __ movq(Address(rsp, -8), rax);
1238           __ movl(rax, Address(rsp, src_offset));
1239           __ movl(Address(rsp, dst_offset), rax);
1240           __ movq(rax, Address(rsp, -8));
1241 #ifndef PRODUCT
1242         } else {
1243           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1244                     "movl    rax, [rsp + #%d]\n\t"
1245                     "movl    [rsp + #%d], rax\n\t"
1246                     "movq    rax, [rsp - #8]",
1247                      src_offset, dst_offset);
1248 #endif
1249         }
1250       }
1251       return 0;
1252     } else if (dst_first_rc == rc_int) {
1253       // mem -> gpr
1254       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1255           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1256         // 64-bit
1257         int offset = ra_->reg2offset(src_first);
1258         if (cbuf) {
1259           MacroAssembler _masm(cbuf);
1260           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1261 #ifndef PRODUCT
1262         } else {
1263           st->print("movq    %s, [rsp + #%d]\t# spill",
1264                      Matcher::regName[dst_first],
1265                      offset);
1266 #endif
1267         }
1268       } else {
1269         // 32-bit
1270         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1271         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1272         int offset = ra_->reg2offset(src_first);
1273         if (cbuf) {
1274           MacroAssembler _masm(cbuf);
1275           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1276 #ifndef PRODUCT
1277         } else {
1278           st->print("movl    %s, [rsp + #%d]\t# spill",
1279                      Matcher::regName[dst_first],
1280                      offset);
1281 #endif
1282         }
1283       }
1284       return 0;
1285     } else if (dst_first_rc == rc_float) {
1286       // mem-> xmm
1287       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1288           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1289         // 64-bit
1290         int offset = ra_->reg2offset(src_first);
1291         if (cbuf) {
1292           MacroAssembler _masm(cbuf);
1293           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1294 #ifndef PRODUCT
1295         } else {
1296           st->print("%s  %s, [rsp + #%d]\t# spill",
1297                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1298                      Matcher::regName[dst_first],
1299                      offset);
1300 #endif
1301         }
1302       } else {
1303         // 32-bit
1304         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1305         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1306         int offset = ra_->reg2offset(src_first);
1307         if (cbuf) {
1308           MacroAssembler _masm(cbuf);
1309           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1310 #ifndef PRODUCT
1311         } else {
1312           st->print("movss   %s, [rsp + #%d]\t# spill",
1313                      Matcher::regName[dst_first],
1314                      offset);
1315 #endif
1316         }
1317       }
1318       return 0;
1319     }
1320   } else if (src_first_rc == rc_int) {
1321     // gpr ->
1322     if (dst_first_rc == rc_stack) {
1323       // gpr -> mem
1324       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1325           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1326         // 64-bit
1327         int offset = ra_->reg2offset(dst_first);
1328         if (cbuf) {
1329           MacroAssembler _masm(cbuf);
1330           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1331 #ifndef PRODUCT
1332         } else {
1333           st->print("movq    [rsp + #%d], %s\t# spill",
1334                      offset,
1335                      Matcher::regName[src_first]);
1336 #endif
1337         }
1338       } else {
1339         // 32-bit
1340         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1341         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1342         int offset = ra_->reg2offset(dst_first);
1343         if (cbuf) {
1344           MacroAssembler _masm(cbuf);
1345           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1346 #ifndef PRODUCT
1347         } else {
1348           st->print("movl    [rsp + #%d], %s\t# spill",
1349                      offset,
1350                      Matcher::regName[src_first]);
1351 #endif
1352         }
1353       }
1354       return 0;
1355     } else if (dst_first_rc == rc_int) {
1356       // gpr -> gpr
1357       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1358           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1359         // 64-bit
1360         if (cbuf) {
1361           MacroAssembler _masm(cbuf);
1362           __ movq(as_Register(Matcher::_regEncode[dst_first]),
1363                   as_Register(Matcher::_regEncode[src_first]));
1364 #ifndef PRODUCT
1365         } else {
1366           st->print("movq    %s, %s\t# spill",
1367                      Matcher::regName[dst_first],
1368                      Matcher::regName[src_first]);
1369 #endif
1370         }
1371         return 0;
1372       } else {
1373         // 32-bit
1374         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1375         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1376         if (cbuf) {
1377           MacroAssembler _masm(cbuf);
1378           __ movl(as_Register(Matcher::_regEncode[dst_first]),
1379                   as_Register(Matcher::_regEncode[src_first]));
1380 #ifndef PRODUCT
1381         } else {
1382           st->print("movl    %s, %s\t# spill",
1383                      Matcher::regName[dst_first],
1384                      Matcher::regName[src_first]);
1385 #endif
1386         }
1387         return 0;
1388       }
1389     } else if (dst_first_rc == rc_float) {
1390       // gpr -> xmm
1391       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1392           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1393         // 64-bit
1394         if (cbuf) {
1395           MacroAssembler _masm(cbuf);
1396           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1397 #ifndef PRODUCT
1398         } else {
1399           st->print("movdq   %s, %s\t# spill",
1400                      Matcher::regName[dst_first],
1401                      Matcher::regName[src_first]);
1402 #endif
1403         }
1404       } else {
1405         // 32-bit
1406         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1407         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1408         if (cbuf) {
1409           MacroAssembler _masm(cbuf);
1410           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1411 #ifndef PRODUCT
1412         } else {
1413           st->print("movdl   %s, %s\t# spill",
1414                      Matcher::regName[dst_first],
1415                      Matcher::regName[src_first]);
1416 #endif
1417         }
1418       }
1419       return 0;
1420     }
1421   } else if (src_first_rc == rc_float) {
1422     // xmm ->
1423     if (dst_first_rc == rc_stack) {
1424       // xmm -> mem
1425       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1426           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1427         // 64-bit
1428         int offset = ra_->reg2offset(dst_first);
1429         if (cbuf) {
1430           MacroAssembler _masm(cbuf);
1431           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1432 #ifndef PRODUCT
1433         } else {
1434           st->print("movsd   [rsp + #%d], %s\t# spill",
1435                      offset,
1436                      Matcher::regName[src_first]);
1437 #endif
1438         }
1439       } else {
1440         // 32-bit
1441         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1442         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1443         int offset = ra_->reg2offset(dst_first);
1444         if (cbuf) {
1445           MacroAssembler _masm(cbuf);
1446           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1447 #ifndef PRODUCT
1448         } else {
1449           st->print("movss   [rsp + #%d], %s\t# spill",
1450                      offset,
1451                      Matcher::regName[src_first]);
1452 #endif
1453         }
1454       }
1455       return 0;
1456     } else if (dst_first_rc == rc_int) {
1457       // xmm -> gpr
1458       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1459           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1460         // 64-bit
1461         if (cbuf) {
1462           MacroAssembler _masm(cbuf);
1463           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1464 #ifndef PRODUCT
1465         } else {
1466           st->print("movdq   %s, %s\t# spill",
1467                      Matcher::regName[dst_first],
1468                      Matcher::regName[src_first]);
1469 #endif
1470         }
1471       } else {
1472         // 32-bit
1473         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1474         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1475         if (cbuf) {
1476           MacroAssembler _masm(cbuf);
1477           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1478 #ifndef PRODUCT
1479         } else {
1480           st->print("movdl   %s, %s\t# spill",
1481                      Matcher::regName[dst_first],
1482                      Matcher::regName[src_first]);
1483 #endif
1484         }
1485       }
1486       return 0;
1487     } else if (dst_first_rc == rc_float) {
1488       // xmm -> xmm
1489       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1490           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1491         // 64-bit
1492         if (cbuf) {
1493           MacroAssembler _masm(cbuf);
1494           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1495 #ifndef PRODUCT
1496         } else {
1497           st->print("%s  %s, %s\t# spill",
1498                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1499                      Matcher::regName[dst_first],
1500                      Matcher::regName[src_first]);
1501 #endif
1502         }
1503       } else {
1504         // 32-bit
1505         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1506         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1507         if (cbuf) {
1508           MacroAssembler _masm(cbuf);
1509           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1510 #ifndef PRODUCT
1511         } else {
1512           st->print("%s  %s, %s\t# spill",
1513                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1514                      Matcher::regName[dst_first],
1515                      Matcher::regName[src_first]);
1516 #endif
1517         }
1518       }
1519       return 0;
1520     }
1521   }
1522 
1523   assert(0," foo ");
1524   Unimplemented();
1525   return 0;
1526 }
1527 
1528 #ifndef PRODUCT
1529 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1530   implementation(NULL, ra_, false, st);
1531 }
1532 #endif
1533 
1534 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1535   implementation(&cbuf, ra_, false, NULL);
1536 }
1537 
1538 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1539   return MachNode::size(ra_);
1540 }
1541 
1542 //=============================================================================
1543 #ifndef PRODUCT
1544 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1545 {
1546   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1547   int reg = ra_->get_reg_first(this);
1548   st->print("leaq    %s, [rsp + #%d]\t# box lock",
1549             Matcher::regName[reg], offset);
1550 }
1551 #endif
1552 
1553 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1554 {
1555   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1556   int reg = ra_->get_encode(this);
1557   if (offset >= 0x80) {
1558     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1559     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1560     emit_rm(cbuf, 0x2, reg & 7, 0x04);
1561     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1562     emit_d32(cbuf, offset);
1563   } else {
1564     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1565     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
1566     emit_rm(cbuf, 0x1, reg & 7, 0x04);
1567     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1568     emit_d8(cbuf, offset);
1569   }
1570 }
1571 
1572 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1573 {
1574   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1575   return (offset < 0x80) ? 5 : 8; // REX
1576 }
1577 
1578 //=============================================================================
1579 #ifndef PRODUCT
1580 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1581 {
1582   if (UseCompressedClassPointers) {
1583     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1584     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1585     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
1586   } else {
1587     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1588                  "# Inline cache check");
1589   }
1590   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
1591   st->print_cr("\tnop\t# nops to align entry point");
1592 }
1593 #endif
1594 
1595 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1596 {
1597   MacroAssembler masm(&cbuf);
1598   uint insts_size = cbuf.insts_size();
1599   if (UseCompressedClassPointers) {
1600     masm.load_klass(rscratch1, j_rarg0);
1601     masm.cmpptr(rax, rscratch1);
1602   } else {
1603     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1604   }
1605 
1606   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1607 
1608   /* WARNING these NOPs are critical so that verified entry point is properly
1609      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1610   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1611   if (OptoBreakpoint) {
1612     // Leave space for int3
1613     nops_cnt -= 1;
1614   }
1615   nops_cnt &= 0x3; // Do not add nops if code is aligned.
1616   if (nops_cnt > 0)
1617     masm.nop(nops_cnt);
1618 }
1619 
1620 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1621 {
1622   return MachNode::size(ra_); // too many variables; just compute it
1623                               // the hard way
1624 }
1625 
1626 
1627 //=============================================================================
1628 
1629 int Matcher::regnum_to_fpu_offset(int regnum)
1630 {
1631   return regnum - 32; // The FP registers are in the second chunk
1632 }
1633 
1634 // This is UltraSparc specific, true just means we have fast l2f conversion
1635 const bool Matcher::convL2FSupported(void) {
1636   return true;
1637 }
1638 
1639 // Is this branch offset short enough that a short branch can be used?
1640 //
1641 // NOTE: If the platform does not provide any short branch variants, then
1642 //       this method should return false for offset 0.
1643 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1644   // The passed offset is relative to address of the branch.
1645   // On 86 a branch displacement is calculated relative to address
1646   // of a next instruction.
1647   offset -= br_size;
1648 
1649   // the short version of jmpConUCF2 contains multiple branches,
1650   // making the reach slightly less
1651   if (rule == jmpConUCF2_rule)
1652     return (-126 <= offset && offset <= 125);
1653   return (-128 <= offset && offset <= 127);
1654 }
1655 
1656 const bool Matcher::isSimpleConstant64(jlong value) {
1657   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1658   //return value == (int) value;  // Cf. storeImmL and immL32.
1659 
1660   // Probably always true, even if a temp register is required.
1661   return true;
1662 }
1663 
1664 // The ecx parameter to rep stosq for the ClearArray node is in words.
1665 const bool Matcher::init_array_count_is_in_bytes = false;
1666 
1667 // No additional cost for CMOVL.
1668 const int Matcher::long_cmove_cost() { return 0; }
1669 
1670 // No CMOVF/CMOVD with SSE2
1671 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1672 
1673 // Does the CPU require late expand (see block.cpp for description of late expand)?
1674 const bool Matcher::require_postalloc_expand = false;
1675 
1676 // Do we need to mask the count passed to shift instructions or does
1677 // the cpu only look at the lower 5/6 bits anyway?
1678 const bool Matcher::need_masked_shift_count = false;
1679 
1680 bool Matcher::narrow_oop_use_complex_address() {
1681   assert(UseCompressedOops, "only for compressed oops code");
1682   return (LogMinObjAlignmentInBytes <= 3);
1683 }
1684 
1685 bool Matcher::narrow_klass_use_complex_address() {
1686   assert(UseCompressedClassPointers, "only for compressed klass code");
1687   return (LogKlassAlignmentInBytes <= 3);
1688 }
1689 
1690 bool Matcher::const_oop_prefer_decode() {
1691   // Prefer ConN+DecodeN over ConP.
1692   return true;
1693 }
1694 
1695 bool Matcher::const_klass_prefer_decode() {
1696   // TODO: Either support matching DecodeNKlass (heap-based) in operand
1697   //       or condisider the following:
1698   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
1699   //return Universe::narrow_klass_base() == NULL;
1700   return true;
1701 }
1702 
1703 // Is it better to copy float constants, or load them directly from
1704 // memory?  Intel can load a float constant from a direct address,
1705 // requiring no extra registers.  Most RISCs will have to materialize
1706 // an address into a register first, so they would do better to copy
1707 // the constant from stack.
1708 const bool Matcher::rematerialize_float_constants = true; // XXX
1709 
1710 // If CPU can load and store mis-aligned doubles directly then no
1711 // fixup is needed.  Else we split the double into 2 integer pieces
1712 // and move it piece-by-piece.  Only happens when passing doubles into
1713 // C code as the Java calling convention forces doubles to be aligned.
1714 const bool Matcher::misaligned_doubles_ok = true;
1715 
1716 // No-op on amd64
1717 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
1718 
1719 // Advertise here if the CPU requires explicit rounding operations to
1720 // implement the UseStrictFP mode.
1721 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1722 
1723 // Are floats conerted to double when stored to stack during deoptimization?
1724 // On x64 it is stored without convertion so we can use normal access.
1725 bool Matcher::float_in_double() { return false; }
1726 
1727 // Do ints take an entire long register or just half?
1728 const bool Matcher::int_in_long = true;
1729 
1730 // Return whether or not this register is ever used as an argument.
1731 // This function is used on startup to build the trampoline stubs in
1732 // generateOptoStub.  Registers not mentioned will be killed by the VM
1733 // call in the trampoline, and arguments in those registers not be
1734 // available to the callee.
1735 bool Matcher::can_be_java_arg(int reg)
1736 {
1737   return
1738     reg ==  RDI_num || reg == RDI_H_num ||
1739     reg ==  RSI_num || reg == RSI_H_num ||
1740     reg ==  RDX_num || reg == RDX_H_num ||
1741     reg ==  RCX_num || reg == RCX_H_num ||
1742     reg ==   R8_num || reg ==  R8_H_num ||
1743     reg ==   R9_num || reg ==  R9_H_num ||
1744     reg ==  R12_num || reg == R12_H_num ||
1745     reg == XMM0_num || reg == XMM0b_num ||
1746     reg == XMM1_num || reg == XMM1b_num ||
1747     reg == XMM2_num || reg == XMM2b_num ||
1748     reg == XMM3_num || reg == XMM3b_num ||
1749     reg == XMM4_num || reg == XMM4b_num ||
1750     reg == XMM5_num || reg == XMM5b_num ||
1751     reg == XMM6_num || reg == XMM6b_num ||
1752     reg == XMM7_num || reg == XMM7b_num;
1753 }
1754 
1755 bool Matcher::is_spillable_arg(int reg)
1756 {
1757   return can_be_java_arg(reg);
1758 }
1759 
1760 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1761   // In 64 bit mode a code which use multiply when
1762   // devisor is constant is faster than hardware
1763   // DIV instruction (it uses MulHiL).
1764   return false;
1765 }
1766 
1767 // Register for DIVI projection of divmodI
1768 RegMask Matcher::divI_proj_mask() {
1769   return INT_RAX_REG_mask();
1770 }
1771 
1772 // Register for MODI projection of divmodI
1773 RegMask Matcher::modI_proj_mask() {
1774   return INT_RDX_REG_mask();
1775 }
1776 
1777 // Register for DIVL projection of divmodL
1778 RegMask Matcher::divL_proj_mask() {
1779   return LONG_RAX_REG_mask();
1780 }
1781 
1782 // Register for MODL projection of divmodL
1783 RegMask Matcher::modL_proj_mask() {
1784   return LONG_RDX_REG_mask();
1785 }
1786 
1787 // Register for saving SP into on method handle invokes. Not used on x86_64.
1788 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1789     return NO_REG_mask();
1790 }
1791 
1792 %}
1793 
1794 //----------ENCODING BLOCK-----------------------------------------------------
1795 // This block specifies the encoding classes used by the compiler to
1796 // output byte streams.  Encoding classes are parameterized macros
1797 // used by Machine Instruction Nodes in order to generate the bit
1798 // encoding of the instruction.  Operands specify their base encoding
1799 // interface with the interface keyword.  There are currently
1800 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
1801 // COND_INTER.  REG_INTER causes an operand to generate a function
1802 // which returns its register number when queried.  CONST_INTER causes
1803 // an operand to generate a function which returns the value of the
1804 // constant when queried.  MEMORY_INTER causes an operand to generate
1805 // four functions which return the Base Register, the Index Register,
1806 // the Scale Value, and the Offset Value of the operand when queried.
1807 // COND_INTER causes an operand to generate six functions which return
1808 // the encoding code (ie - encoding bits for the instruction)
1809 // associated with each basic boolean condition for a conditional
1810 // instruction.
1811 //
1812 // Instructions specify two basic values for encoding.  Again, a
1813 // function is available to check if the constant displacement is an
1814 // oop. They use the ins_encode keyword to specify their encoding
1815 // classes (which must be a sequence of enc_class names, and their
1816 // parameters, specified in the encoding block), and they use the
1817 // opcode keyword to specify, in order, their primary, secondary, and
1818 // tertiary opcode.  Only the opcode sections which a particular
1819 // instruction needs for encoding need to be specified.
1820 encode %{
1821   // Build emit functions for each basic byte or larger field in the
1822   // intel encoding scheme (opcode, rm, sib, immediate), and call them
1823   // from C++ code in the enc_class source block.  Emit functions will
1824   // live in the main source block for now.  In future, we can
1825   // generalize this by adding a syntax that specifies the sizes of
1826   // fields in an order, so that the adlc can build the emit functions
1827   // automagically
1828 
1829   // Emit primary opcode
1830   enc_class OpcP
1831   %{
1832     emit_opcode(cbuf, $primary);
1833   %}
1834 
1835   // Emit secondary opcode
1836   enc_class OpcS
1837   %{
1838     emit_opcode(cbuf, $secondary);
1839   %}
1840 
1841   // Emit tertiary opcode
1842   enc_class OpcT
1843   %{
1844     emit_opcode(cbuf, $tertiary);
1845   %}
1846 
1847   // Emit opcode directly
1848   enc_class Opcode(immI d8)
1849   %{
1850     emit_opcode(cbuf, $d8$$constant);
1851   %}
1852 
1853   // Emit size prefix
1854   enc_class SizePrefix
1855   %{
1856     emit_opcode(cbuf, 0x66);
1857   %}
1858 
1859   enc_class reg(rRegI reg)
1860   %{
1861     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
1862   %}
1863 
1864   enc_class reg_reg(rRegI dst, rRegI src)
1865   %{
1866     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1867   %}
1868 
1869   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
1870   %{
1871     emit_opcode(cbuf, $opcode$$constant);
1872     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
1873   %}
1874 
1875   enc_class cdql_enc(no_rax_rdx_RegI div)
1876   %{
1877     // Full implementation of Java idiv and irem; checks for
1878     // special case as described in JVM spec., p.243 & p.271.
1879     //
1880     //         normal case                           special case
1881     //
1882     // input : rax: dividend                         min_int
1883     //         reg: divisor                          -1
1884     //
1885     // output: rax: quotient  (= rax idiv reg)       min_int
1886     //         rdx: remainder (= rax irem reg)       0
1887     //
1888     //  Code sequnce:
1889     //
1890     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
1891     //    5:   75 07/08                jne    e <normal>
1892     //    7:   33 d2                   xor    %edx,%edx
1893     //  [div >= 8 -> offset + 1]
1894     //  [REX_B]
1895     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
1896     //    c:   74 03/04                je     11 <done>
1897     // 000000000000000e <normal>:
1898     //    e:   99                      cltd
1899     //  [div >= 8 -> offset + 1]
1900     //  [REX_B]
1901     //    f:   f7 f9                   idiv   $div
1902     // 0000000000000011 <done>:
1903 
1904     // cmp    $0x80000000,%eax
1905     emit_opcode(cbuf, 0x3d);
1906     emit_d8(cbuf, 0x00);
1907     emit_d8(cbuf, 0x00);
1908     emit_d8(cbuf, 0x00);
1909     emit_d8(cbuf, 0x80);
1910 
1911     // jne    e <normal>
1912     emit_opcode(cbuf, 0x75);
1913     emit_d8(cbuf, $div$$reg < 8 ? 0x07 : 0x08);
1914 
1915     // xor    %edx,%edx
1916     emit_opcode(cbuf, 0x33);
1917     emit_d8(cbuf, 0xD2);
1918 
1919     // cmp    $0xffffffffffffffff,%ecx
1920     if ($div$$reg >= 8) {
1921       emit_opcode(cbuf, Assembler::REX_B);
1922     }
1923     emit_opcode(cbuf, 0x83);
1924     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1925     emit_d8(cbuf, 0xFF);
1926 
1927     // je     11 <done>
1928     emit_opcode(cbuf, 0x74);
1929     emit_d8(cbuf, $div$$reg < 8 ? 0x03 : 0x04);
1930 
1931     // <normal>
1932     // cltd
1933     emit_opcode(cbuf, 0x99);
1934 
1935     // idivl (note: must be emitted by the user of this rule)
1936     // <done>
1937   %}
1938 
1939   enc_class cdqq_enc(no_rax_rdx_RegL div)
1940   %{
1941     // Full implementation of Java ldiv and lrem; checks for
1942     // special case as described in JVM spec., p.243 & p.271.
1943     //
1944     //         normal case                           special case
1945     //
1946     // input : rax: dividend                         min_long
1947     //         reg: divisor                          -1
1948     //
1949     // output: rax: quotient  (= rax idiv reg)       min_long
1950     //         rdx: remainder (= rax irem reg)       0
1951     //
1952     //  Code sequnce:
1953     //
1954     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
1955     //    7:   00 00 80
1956     //    a:   48 39 d0                cmp    %rdx,%rax
1957     //    d:   75 08                   jne    17 <normal>
1958     //    f:   33 d2                   xor    %edx,%edx
1959     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
1960     //   15:   74 05                   je     1c <done>
1961     // 0000000000000017 <normal>:
1962     //   17:   48 99                   cqto
1963     //   19:   48 f7 f9                idiv   $div
1964     // 000000000000001c <done>:
1965 
1966     // mov    $0x8000000000000000,%rdx
1967     emit_opcode(cbuf, Assembler::REX_W);
1968     emit_opcode(cbuf, 0xBA);
1969     emit_d8(cbuf, 0x00);
1970     emit_d8(cbuf, 0x00);
1971     emit_d8(cbuf, 0x00);
1972     emit_d8(cbuf, 0x00);
1973     emit_d8(cbuf, 0x00);
1974     emit_d8(cbuf, 0x00);
1975     emit_d8(cbuf, 0x00);
1976     emit_d8(cbuf, 0x80);
1977 
1978     // cmp    %rdx,%rax
1979     emit_opcode(cbuf, Assembler::REX_W);
1980     emit_opcode(cbuf, 0x39);
1981     emit_d8(cbuf, 0xD0);
1982 
1983     // jne    17 <normal>
1984     emit_opcode(cbuf, 0x75);
1985     emit_d8(cbuf, 0x08);
1986 
1987     // xor    %edx,%edx
1988     emit_opcode(cbuf, 0x33);
1989     emit_d8(cbuf, 0xD2);
1990 
1991     // cmp    $0xffffffffffffffff,$div
1992     emit_opcode(cbuf, $div$$reg < 8 ? Assembler::REX_W : Assembler::REX_WB);
1993     emit_opcode(cbuf, 0x83);
1994     emit_rm(cbuf, 0x3, 0x7, $div$$reg & 7);
1995     emit_d8(cbuf, 0xFF);
1996 
1997     // je     1e <done>
1998     emit_opcode(cbuf, 0x74);
1999     emit_d8(cbuf, 0x05);
2000 
2001     // <normal>
2002     // cqto
2003     emit_opcode(cbuf, Assembler::REX_W);
2004     emit_opcode(cbuf, 0x99);
2005 
2006     // idivq (note: must be emitted by the user of this rule)
2007     // <done>
2008   %}
2009 
2010   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
2011   enc_class OpcSE(immI imm)
2012   %{
2013     // Emit primary opcode and set sign-extend bit
2014     // Check for 8-bit immediate, and set sign extend bit in opcode
2015     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2016       emit_opcode(cbuf, $primary | 0x02);
2017     } else {
2018       // 32-bit immediate
2019       emit_opcode(cbuf, $primary);
2020     }
2021   %}
2022 
2023   enc_class OpcSErm(rRegI dst, immI imm)
2024   %{
2025     // OpcSEr/m
2026     int dstenc = $dst$$reg;
2027     if (dstenc >= 8) {
2028       emit_opcode(cbuf, Assembler::REX_B);
2029       dstenc -= 8;
2030     }
2031     // Emit primary opcode and set sign-extend bit
2032     // Check for 8-bit immediate, and set sign extend bit in opcode
2033     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2034       emit_opcode(cbuf, $primary | 0x02);
2035     } else {
2036       // 32-bit immediate
2037       emit_opcode(cbuf, $primary);
2038     }
2039     // Emit r/m byte with secondary opcode, after primary opcode.
2040     emit_rm(cbuf, 0x3, $secondary, dstenc);
2041   %}
2042 
2043   enc_class OpcSErm_wide(rRegL dst, immI imm)
2044   %{
2045     // OpcSEr/m
2046     int dstenc = $dst$$reg;
2047     if (dstenc < 8) {
2048       emit_opcode(cbuf, Assembler::REX_W);
2049     } else {
2050       emit_opcode(cbuf, Assembler::REX_WB);
2051       dstenc -= 8;
2052     }
2053     // Emit primary opcode and set sign-extend bit
2054     // Check for 8-bit immediate, and set sign extend bit in opcode
2055     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2056       emit_opcode(cbuf, $primary | 0x02);
2057     } else {
2058       // 32-bit immediate
2059       emit_opcode(cbuf, $primary);
2060     }
2061     // Emit r/m byte with secondary opcode, after primary opcode.
2062     emit_rm(cbuf, 0x3, $secondary, dstenc);
2063   %}
2064 
2065   enc_class Con8or32(immI imm)
2066   %{
2067     // Check for 8-bit immediate, and set sign extend bit in opcode
2068     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
2069       $$$emit8$imm$$constant;
2070     } else {
2071       // 32-bit immediate
2072       $$$emit32$imm$$constant;
2073     }
2074   %}
2075 
2076   enc_class opc2_reg(rRegI dst)
2077   %{
2078     // BSWAP
2079     emit_cc(cbuf, $secondary, $dst$$reg);
2080   %}
2081 
2082   enc_class opc3_reg(rRegI dst)
2083   %{
2084     // BSWAP
2085     emit_cc(cbuf, $tertiary, $dst$$reg);
2086   %}
2087 
2088   enc_class reg_opc(rRegI div)
2089   %{
2090     // INC, DEC, IDIV, IMOD, JMP indirect, ...
2091     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
2092   %}
2093 
2094   enc_class enc_cmov(cmpOp cop)
2095   %{
2096     // CMOV
2097     $$$emit8$primary;
2098     emit_cc(cbuf, $secondary, $cop$$cmpcode);
2099   %}
2100 
2101   enc_class enc_PartialSubtypeCheck()
2102   %{
2103     Register Rrdi = as_Register(RDI_enc); // result register
2104     Register Rrax = as_Register(RAX_enc); // super class
2105     Register Rrcx = as_Register(RCX_enc); // killed
2106     Register Rrsi = as_Register(RSI_enc); // sub class
2107     Label miss;
2108     const bool set_cond_codes = true;
2109 
2110     MacroAssembler _masm(&cbuf);
2111     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
2112                                      NULL, &miss,
2113                                      /*set_cond_codes:*/ true);
2114     if ($primary) {
2115       __ xorptr(Rrdi, Rrdi);
2116     }
2117     __ bind(miss);
2118   %}
2119 
2120   enc_class clear_avx %{
2121     debug_only(int off0 = cbuf.insts_size());
2122     if (generate_vzeroupper(Compile::current())) {
2123       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
2124       // Clear upper bits of YMM registers when current compiled code uses
2125       // wide vectors to avoid AVX <-> SSE transition penalty during call.
2126       MacroAssembler _masm(&cbuf);
2127       __ vzeroupper();
2128     }
2129     debug_only(int off1 = cbuf.insts_size());
2130     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
2131   %}
2132 
2133   enc_class Java_To_Runtime(method meth) %{
2134     // No relocation needed
2135     MacroAssembler _masm(&cbuf);
2136     __ mov64(r10, (int64_t) $meth$$method);
2137     __ call(r10);
2138   %}
2139 
2140   enc_class Java_To_Interpreter(method meth)
2141   %{
2142     // CALL Java_To_Interpreter
2143     // This is the instruction starting address for relocation info.
2144     cbuf.set_insts_mark();
2145     $$$emit8$primary;
2146     // CALL directly to the runtime
2147     emit_d32_reloc(cbuf,
2148                    (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2149                    runtime_call_Relocation::spec(),
2150                    RELOC_DISP32);
2151   %}
2152 
2153   enc_class Java_Static_Call(method meth)
2154   %{
2155     // JAVA STATIC CALL
2156     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
2157     // determine who we intended to call.
2158     cbuf.set_insts_mark();
2159     $$$emit8$primary;
2160 
2161     if (!_method) {
2162       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2163                      runtime_call_Relocation::spec(),
2164                      RELOC_DISP32);
2165     } else {
2166       int method_index = resolved_method_index(cbuf);
2167       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
2168                                                   : static_call_Relocation::spec(method_index);
2169       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
2170                      rspec, RELOC_DISP32);
2171       // Emit stubs for static call.
2172       address mark = cbuf.insts_mark();
2173       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
2174       if (stub == NULL) {
2175         ciEnv::current()->record_failure("CodeCache is full");
2176         return;
2177       }
2178 #if INCLUDE_AOT
2179       CompiledStaticCall::emit_to_aot_stub(cbuf, mark);
2180 #endif
2181     }
2182   %}
2183 
2184   enc_class Java_Dynamic_Call(method meth) %{
2185     MacroAssembler _masm(&cbuf);
2186     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
2187   %}
2188 
2189   enc_class Java_Compiled_Call(method meth)
2190   %{
2191     // JAVA COMPILED CALL
2192     int disp = in_bytes(Method:: from_compiled_offset());
2193 
2194     // XXX XXX offset is 128 is 1.5 NON-PRODUCT !!!
2195     // assert(-0x80 <= disp && disp < 0x80, "compiled_code_offset isn't small");
2196 
2197     // callq *disp(%rax)
2198     cbuf.set_insts_mark();
2199     $$$emit8$primary;
2200     if (disp < 0x80) {
2201       emit_rm(cbuf, 0x01, $secondary, RAX_enc); // R/M byte
2202       emit_d8(cbuf, disp); // Displacement
2203     } else {
2204       emit_rm(cbuf, 0x02, $secondary, RAX_enc); // R/M byte
2205       emit_d32(cbuf, disp); // Displacement
2206     }
2207   %}
2208 
2209   enc_class reg_opc_imm(rRegI dst, immI8 shift)
2210   %{
2211     // SAL, SAR, SHR
2212     int dstenc = $dst$$reg;
2213     if (dstenc >= 8) {
2214       emit_opcode(cbuf, Assembler::REX_B);
2215       dstenc -= 8;
2216     }
2217     $$$emit8$primary;
2218     emit_rm(cbuf, 0x3, $secondary, dstenc);
2219     $$$emit8$shift$$constant;
2220   %}
2221 
2222   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
2223   %{
2224     // SAL, SAR, SHR
2225     int dstenc = $dst$$reg;
2226     if (dstenc < 8) {
2227       emit_opcode(cbuf, Assembler::REX_W);
2228     } else {
2229       emit_opcode(cbuf, Assembler::REX_WB);
2230       dstenc -= 8;
2231     }
2232     $$$emit8$primary;
2233     emit_rm(cbuf, 0x3, $secondary, dstenc);
2234     $$$emit8$shift$$constant;
2235   %}
2236 
2237   enc_class load_immI(rRegI dst, immI src)
2238   %{
2239     int dstenc = $dst$$reg;
2240     if (dstenc >= 8) {
2241       emit_opcode(cbuf, Assembler::REX_B);
2242       dstenc -= 8;
2243     }
2244     emit_opcode(cbuf, 0xB8 | dstenc);
2245     $$$emit32$src$$constant;
2246   %}
2247 
2248   enc_class load_immL(rRegL dst, immL src)
2249   %{
2250     int dstenc = $dst$$reg;
2251     if (dstenc < 8) {
2252       emit_opcode(cbuf, Assembler::REX_W);
2253     } else {
2254       emit_opcode(cbuf, Assembler::REX_WB);
2255       dstenc -= 8;
2256     }
2257     emit_opcode(cbuf, 0xB8 | dstenc);
2258     emit_d64(cbuf, $src$$constant);
2259   %}
2260 
2261   enc_class load_immUL32(rRegL dst, immUL32 src)
2262   %{
2263     // same as load_immI, but this time we care about zeroes in the high word
2264     int dstenc = $dst$$reg;
2265     if (dstenc >= 8) {
2266       emit_opcode(cbuf, Assembler::REX_B);
2267       dstenc -= 8;
2268     }
2269     emit_opcode(cbuf, 0xB8 | dstenc);
2270     $$$emit32$src$$constant;
2271   %}
2272 
2273   enc_class load_immL32(rRegL dst, immL32 src)
2274   %{
2275     int dstenc = $dst$$reg;
2276     if (dstenc < 8) {
2277       emit_opcode(cbuf, Assembler::REX_W);
2278     } else {
2279       emit_opcode(cbuf, Assembler::REX_WB);
2280       dstenc -= 8;
2281     }
2282     emit_opcode(cbuf, 0xC7);
2283     emit_rm(cbuf, 0x03, 0x00, dstenc);
2284     $$$emit32$src$$constant;
2285   %}
2286 
2287   enc_class load_immP31(rRegP dst, immP32 src)
2288   %{
2289     // same as load_immI, but this time we care about zeroes in the high word
2290     int dstenc = $dst$$reg;
2291     if (dstenc >= 8) {
2292       emit_opcode(cbuf, Assembler::REX_B);
2293       dstenc -= 8;
2294     }
2295     emit_opcode(cbuf, 0xB8 | dstenc);
2296     $$$emit32$src$$constant;
2297   %}
2298 
2299   enc_class load_immP(rRegP dst, immP src)
2300   %{
2301     int dstenc = $dst$$reg;
2302     if (dstenc < 8) {
2303       emit_opcode(cbuf, Assembler::REX_W);
2304     } else {
2305       emit_opcode(cbuf, Assembler::REX_WB);
2306       dstenc -= 8;
2307     }
2308     emit_opcode(cbuf, 0xB8 | dstenc);
2309     // This next line should be generated from ADLC
2310     if ($src->constant_reloc() != relocInfo::none) {
2311       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
2312     } else {
2313       emit_d64(cbuf, $src$$constant);
2314     }
2315   %}
2316 
2317   enc_class Con32(immI src)
2318   %{
2319     // Output immediate
2320     $$$emit32$src$$constant;
2321   %}
2322 
2323   enc_class Con32F_as_bits(immF src)
2324   %{
2325     // Output Float immediate bits
2326     jfloat jf = $src$$constant;
2327     jint jf_as_bits = jint_cast(jf);
2328     emit_d32(cbuf, jf_as_bits);
2329   %}
2330 
2331   enc_class Con16(immI src)
2332   %{
2333     // Output immediate
2334     $$$emit16$src$$constant;
2335   %}
2336 
2337   // How is this different from Con32??? XXX
2338   enc_class Con_d32(immI src)
2339   %{
2340     emit_d32(cbuf,$src$$constant);
2341   %}
2342 
2343   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
2344     // Output immediate memory reference
2345     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2346     emit_d32(cbuf, 0x00);
2347   %}
2348 
2349   enc_class lock_prefix()
2350   %{
2351     if (os::is_MP()) {
2352       emit_opcode(cbuf, 0xF0); // lock
2353     }
2354   %}
2355 
2356   enc_class REX_mem(memory mem)
2357   %{
2358     if ($mem$$base >= 8) {
2359       if ($mem$$index < 8) {
2360         emit_opcode(cbuf, Assembler::REX_B);
2361       } else {
2362         emit_opcode(cbuf, Assembler::REX_XB);
2363       }
2364     } else {
2365       if ($mem$$index >= 8) {
2366         emit_opcode(cbuf, Assembler::REX_X);
2367       }
2368     }
2369   %}
2370 
2371   enc_class REX_mem_wide(memory mem)
2372   %{
2373     if ($mem$$base >= 8) {
2374       if ($mem$$index < 8) {
2375         emit_opcode(cbuf, Assembler::REX_WB);
2376       } else {
2377         emit_opcode(cbuf, Assembler::REX_WXB);
2378       }
2379     } else {
2380       if ($mem$$index < 8) {
2381         emit_opcode(cbuf, Assembler::REX_W);
2382       } else {
2383         emit_opcode(cbuf, Assembler::REX_WX);
2384       }
2385     }
2386   %}
2387 
2388   // for byte regs
2389   enc_class REX_breg(rRegI reg)
2390   %{
2391     if ($reg$$reg >= 4) {
2392       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2393     }
2394   %}
2395 
2396   // for byte regs
2397   enc_class REX_reg_breg(rRegI dst, rRegI src)
2398   %{
2399     if ($dst$$reg < 8) {
2400       if ($src$$reg >= 4) {
2401         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
2402       }
2403     } else {
2404       if ($src$$reg < 8) {
2405         emit_opcode(cbuf, Assembler::REX_R);
2406       } else {
2407         emit_opcode(cbuf, Assembler::REX_RB);
2408       }
2409     }
2410   %}
2411 
2412   // for byte regs
2413   enc_class REX_breg_mem(rRegI reg, memory mem)
2414   %{
2415     if ($reg$$reg < 8) {
2416       if ($mem$$base < 8) {
2417         if ($mem$$index >= 8) {
2418           emit_opcode(cbuf, Assembler::REX_X);
2419         } else if ($reg$$reg >= 4) {
2420           emit_opcode(cbuf, Assembler::REX);
2421         }
2422       } else {
2423         if ($mem$$index < 8) {
2424           emit_opcode(cbuf, Assembler::REX_B);
2425         } else {
2426           emit_opcode(cbuf, Assembler::REX_XB);
2427         }
2428       }
2429     } else {
2430       if ($mem$$base < 8) {
2431         if ($mem$$index < 8) {
2432           emit_opcode(cbuf, Assembler::REX_R);
2433         } else {
2434           emit_opcode(cbuf, Assembler::REX_RX);
2435         }
2436       } else {
2437         if ($mem$$index < 8) {
2438           emit_opcode(cbuf, Assembler::REX_RB);
2439         } else {
2440           emit_opcode(cbuf, Assembler::REX_RXB);
2441         }
2442       }
2443     }
2444   %}
2445 
2446   enc_class REX_reg(rRegI reg)
2447   %{
2448     if ($reg$$reg >= 8) {
2449       emit_opcode(cbuf, Assembler::REX_B);
2450     }
2451   %}
2452 
2453   enc_class REX_reg_wide(rRegI reg)
2454   %{
2455     if ($reg$$reg < 8) {
2456       emit_opcode(cbuf, Assembler::REX_W);
2457     } else {
2458       emit_opcode(cbuf, Assembler::REX_WB);
2459     }
2460   %}
2461 
2462   enc_class REX_reg_reg(rRegI dst, rRegI src)
2463   %{
2464     if ($dst$$reg < 8) {
2465       if ($src$$reg >= 8) {
2466         emit_opcode(cbuf, Assembler::REX_B);
2467       }
2468     } else {
2469       if ($src$$reg < 8) {
2470         emit_opcode(cbuf, Assembler::REX_R);
2471       } else {
2472         emit_opcode(cbuf, Assembler::REX_RB);
2473       }
2474     }
2475   %}
2476 
2477   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
2478   %{
2479     if ($dst$$reg < 8) {
2480       if ($src$$reg < 8) {
2481         emit_opcode(cbuf, Assembler::REX_W);
2482       } else {
2483         emit_opcode(cbuf, Assembler::REX_WB);
2484       }
2485     } else {
2486       if ($src$$reg < 8) {
2487         emit_opcode(cbuf, Assembler::REX_WR);
2488       } else {
2489         emit_opcode(cbuf, Assembler::REX_WRB);
2490       }
2491     }
2492   %}
2493 
2494   enc_class REX_reg_mem(rRegI reg, memory mem)
2495   %{
2496     if ($reg$$reg < 8) {
2497       if ($mem$$base < 8) {
2498         if ($mem$$index >= 8) {
2499           emit_opcode(cbuf, Assembler::REX_X);
2500         }
2501       } else {
2502         if ($mem$$index < 8) {
2503           emit_opcode(cbuf, Assembler::REX_B);
2504         } else {
2505           emit_opcode(cbuf, Assembler::REX_XB);
2506         }
2507       }
2508     } else {
2509       if ($mem$$base < 8) {
2510         if ($mem$$index < 8) {
2511           emit_opcode(cbuf, Assembler::REX_R);
2512         } else {
2513           emit_opcode(cbuf, Assembler::REX_RX);
2514         }
2515       } else {
2516         if ($mem$$index < 8) {
2517           emit_opcode(cbuf, Assembler::REX_RB);
2518         } else {
2519           emit_opcode(cbuf, Assembler::REX_RXB);
2520         }
2521       }
2522     }
2523   %}
2524 
2525   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
2526   %{
2527     if ($reg$$reg < 8) {
2528       if ($mem$$base < 8) {
2529         if ($mem$$index < 8) {
2530           emit_opcode(cbuf, Assembler::REX_W);
2531         } else {
2532           emit_opcode(cbuf, Assembler::REX_WX);
2533         }
2534       } else {
2535         if ($mem$$index < 8) {
2536           emit_opcode(cbuf, Assembler::REX_WB);
2537         } else {
2538           emit_opcode(cbuf, Assembler::REX_WXB);
2539         }
2540       }
2541     } else {
2542       if ($mem$$base < 8) {
2543         if ($mem$$index < 8) {
2544           emit_opcode(cbuf, Assembler::REX_WR);
2545         } else {
2546           emit_opcode(cbuf, Assembler::REX_WRX);
2547         }
2548       } else {
2549         if ($mem$$index < 8) {
2550           emit_opcode(cbuf, Assembler::REX_WRB);
2551         } else {
2552           emit_opcode(cbuf, Assembler::REX_WRXB);
2553         }
2554       }
2555     }
2556   %}
2557 
2558   enc_class reg_mem(rRegI ereg, memory mem)
2559   %{
2560     // High registers handle in encode_RegMem
2561     int reg = $ereg$$reg;
2562     int base = $mem$$base;
2563     int index = $mem$$index;
2564     int scale = $mem$$scale;
2565     int disp = $mem$$disp;
2566     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2567 
2568     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
2569   %}
2570 
2571   enc_class RM_opc_mem(immI rm_opcode, memory mem)
2572   %{
2573     int rm_byte_opcode = $rm_opcode$$constant;
2574 
2575     // High registers handle in encode_RegMem
2576     int base = $mem$$base;
2577     int index = $mem$$index;
2578     int scale = $mem$$scale;
2579     int displace = $mem$$disp;
2580 
2581     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
2582                                             // working with static
2583                                             // globals
2584     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
2585                   disp_reloc);
2586   %}
2587 
2588   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
2589   %{
2590     int reg_encoding = $dst$$reg;
2591     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2592     int index        = 0x04;            // 0x04 indicates no index
2593     int scale        = 0x00;            // 0x00 indicates no scale
2594     int displace     = $src1$$constant; // 0x00 indicates no displacement
2595     relocInfo::relocType disp_reloc = relocInfo::none;
2596     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
2597                   disp_reloc);
2598   %}
2599 
2600   enc_class neg_reg(rRegI dst)
2601   %{
2602     int dstenc = $dst$$reg;
2603     if (dstenc >= 8) {
2604       emit_opcode(cbuf, Assembler::REX_B);
2605       dstenc -= 8;
2606     }
2607     // NEG $dst
2608     emit_opcode(cbuf, 0xF7);
2609     emit_rm(cbuf, 0x3, 0x03, dstenc);
2610   %}
2611 
2612   enc_class neg_reg_wide(rRegI dst)
2613   %{
2614     int dstenc = $dst$$reg;
2615     if (dstenc < 8) {
2616       emit_opcode(cbuf, Assembler::REX_W);
2617     } else {
2618       emit_opcode(cbuf, Assembler::REX_WB);
2619       dstenc -= 8;
2620     }
2621     // NEG $dst
2622     emit_opcode(cbuf, 0xF7);
2623     emit_rm(cbuf, 0x3, 0x03, dstenc);
2624   %}
2625 
2626   enc_class setLT_reg(rRegI dst)
2627   %{
2628     int dstenc = $dst$$reg;
2629     if (dstenc >= 8) {
2630       emit_opcode(cbuf, Assembler::REX_B);
2631       dstenc -= 8;
2632     } else if (dstenc >= 4) {
2633       emit_opcode(cbuf, Assembler::REX);
2634     }
2635     // SETLT $dst
2636     emit_opcode(cbuf, 0x0F);
2637     emit_opcode(cbuf, 0x9C);
2638     emit_rm(cbuf, 0x3, 0x0, dstenc);
2639   %}
2640 
2641   enc_class setNZ_reg(rRegI dst)
2642   %{
2643     int dstenc = $dst$$reg;
2644     if (dstenc >= 8) {
2645       emit_opcode(cbuf, Assembler::REX_B);
2646       dstenc -= 8;
2647     } else if (dstenc >= 4) {
2648       emit_opcode(cbuf, Assembler::REX);
2649     }
2650     // SETNZ $dst
2651     emit_opcode(cbuf, 0x0F);
2652     emit_opcode(cbuf, 0x95);
2653     emit_rm(cbuf, 0x3, 0x0, dstenc);
2654   %}
2655 
2656 
2657   // Compare the lonogs and set -1, 0, or 1 into dst
2658   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
2659   %{
2660     int src1enc = $src1$$reg;
2661     int src2enc = $src2$$reg;
2662     int dstenc = $dst$$reg;
2663 
2664     // cmpq $src1, $src2
2665     if (src1enc < 8) {
2666       if (src2enc < 8) {
2667         emit_opcode(cbuf, Assembler::REX_W);
2668       } else {
2669         emit_opcode(cbuf, Assembler::REX_WB);
2670       }
2671     } else {
2672       if (src2enc < 8) {
2673         emit_opcode(cbuf, Assembler::REX_WR);
2674       } else {
2675         emit_opcode(cbuf, Assembler::REX_WRB);
2676       }
2677     }
2678     emit_opcode(cbuf, 0x3B);
2679     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
2680 
2681     // movl $dst, -1
2682     if (dstenc >= 8) {
2683       emit_opcode(cbuf, Assembler::REX_B);
2684     }
2685     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
2686     emit_d32(cbuf, -1);
2687 
2688     // jl,s done
2689     emit_opcode(cbuf, 0x7C);
2690     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
2691 
2692     // setne $dst
2693     if (dstenc >= 4) {
2694       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
2695     }
2696     emit_opcode(cbuf, 0x0F);
2697     emit_opcode(cbuf, 0x95);
2698     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
2699 
2700     // movzbl $dst, $dst
2701     if (dstenc >= 4) {
2702       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
2703     }
2704     emit_opcode(cbuf, 0x0F);
2705     emit_opcode(cbuf, 0xB6);
2706     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
2707   %}
2708 
2709   enc_class Push_ResultXD(regD dst) %{
2710     MacroAssembler _masm(&cbuf);
2711     __ fstp_d(Address(rsp, 0));
2712     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2713     __ addptr(rsp, 8);
2714   %}
2715 
2716   enc_class Push_SrcXD(regD src) %{
2717     MacroAssembler _masm(&cbuf);
2718     __ subptr(rsp, 8);
2719     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2720     __ fld_d(Address(rsp, 0));
2721   %}
2722 
2723 
2724   enc_class enc_rethrow()
2725   %{
2726     cbuf.set_insts_mark();
2727     emit_opcode(cbuf, 0xE9); // jmp entry
2728     emit_d32_reloc(cbuf,
2729                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
2730                    runtime_call_Relocation::spec(),
2731                    RELOC_DISP32);
2732   %}
2733 
2734 %}
2735 
2736 
2737 
2738 //----------FRAME--------------------------------------------------------------
2739 // Definition of frame structure and management information.
2740 //
2741 //  S T A C K   L A Y O U T    Allocators stack-slot number
2742 //                             |   (to get allocators register number
2743 //  G  Owned by    |        |  v    add OptoReg::stack0())
2744 //  r   CALLER     |        |
2745 //  o     |        +--------+      pad to even-align allocators stack-slot
2746 //  w     V        |  pad0  |        numbers; owned by CALLER
2747 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
2748 //  h     ^        |   in   |  5
2749 //        |        |  args  |  4   Holes in incoming args owned by SELF
2750 //  |     |        |        |  3
2751 //  |     |        +--------+
2752 //  V     |        | old out|      Empty on Intel, window on Sparc
2753 //        |    old |preserve|      Must be even aligned.
2754 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
2755 //        |        |   in   |  3   area for Intel ret address
2756 //     Owned by    |preserve|      Empty on Sparc.
2757 //       SELF      +--------+
2758 //        |        |  pad2  |  2   pad to align old SP
2759 //        |        +--------+  1
2760 //        |        | locks  |  0
2761 //        |        +--------+----> OptoReg::stack0(), even aligned
2762 //        |        |  pad1  | 11   pad to align new SP
2763 //        |        +--------+
2764 //        |        |        | 10
2765 //        |        | spills |  9   spills
2766 //        V        |        |  8   (pad0 slot for callee)
2767 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
2768 //        ^        |  out   |  7
2769 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
2770 //     Owned by    +--------+
2771 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
2772 //        |    new |preserve|      Must be even-aligned.
2773 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
2774 //        |        |        |
2775 //
2776 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
2777 //         known from SELF's arguments and the Java calling convention.
2778 //         Region 6-7 is determined per call site.
2779 // Note 2: If the calling convention leaves holes in the incoming argument
2780 //         area, those holes are owned by SELF.  Holes in the outgoing area
2781 //         are owned by the CALLEE.  Holes should not be nessecary in the
2782 //         incoming area, as the Java calling convention is completely under
2783 //         the control of the AD file.  Doubles can be sorted and packed to
2784 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
2785 //         varargs C calling conventions.
2786 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
2787 //         even aligned with pad0 as needed.
2788 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
2789 //         region 6-11 is even aligned; it may be padded out more so that
2790 //         the region from SP to FP meets the minimum stack alignment.
2791 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
2792 //         alignment.  Region 11, pad1, may be dynamically extended so that
2793 //         SP meets the minimum alignment.
2794 
2795 frame
2796 %{
2797   // What direction does stack grow in (assumed to be same for C & Java)
2798   stack_direction(TOWARDS_LOW);
2799 
2800   // These three registers define part of the calling convention
2801   // between compiled code and the interpreter.
2802   inline_cache_reg(RAX);                // Inline Cache Register
2803   interpreter_method_oop_reg(RBX);      // Method Oop Register when
2804                                         // calling interpreter
2805 
2806   // Optional: name the operand used by cisc-spilling to access
2807   // [stack_pointer + offset]
2808   cisc_spilling_operand_name(indOffset32);
2809 
2810   // Number of stack slots consumed by locking an object
2811   sync_stack_slots(2);
2812 
2813   // Compiled code's Frame Pointer
2814   frame_pointer(RSP);
2815 
2816   // Interpreter stores its frame pointer in a register which is
2817   // stored to the stack by I2CAdaptors.
2818   // I2CAdaptors convert from interpreted java to compiled java.
2819   interpreter_frame_pointer(RBP);
2820 
2821   // Stack alignment requirement
2822   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
2823 
2824   // Number of stack slots between incoming argument block and the start of
2825   // a new frame.  The PROLOG must add this many slots to the stack.  The
2826   // EPILOG must remove this many slots.  amd64 needs two slots for
2827   // return address.
2828   in_preserve_stack_slots(4 + 2 * VerifyStackAtCalls);
2829 
2830   // Number of outgoing stack slots killed above the out_preserve_stack_slots
2831   // for calls to C.  Supports the var-args backing area for register parms.
2832   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
2833 
2834   // The after-PROLOG location of the return address.  Location of
2835   // return address specifies a type (REG or STACK) and a number
2836   // representing the register number (i.e. - use a register name) or
2837   // stack slot.
2838   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
2839   // Otherwise, it is above the locks and verification slot and alignment word
2840   return_addr(STACK - 2 +
2841               align_up((Compile::current()->in_preserve_stack_slots() +
2842                         Compile::current()->fixed_slots()),
2843                        stack_alignment_in_slots()));
2844 
2845   // Body of function which returns an integer array locating
2846   // arguments either in registers or in stack slots.  Passed an array
2847   // of ideal registers called "sig" and a "length" count.  Stack-slot
2848   // offsets are based on outgoing arguments, i.e. a CALLER setting up
2849   // arguments for a CALLEE.  Incoming stack arguments are
2850   // automatically biased by the preserve_stack_slots field above.
2851 
2852   calling_convention
2853   %{
2854     // No difference between ingoing/outgoing just pass false
2855     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
2856   %}
2857 
2858   c_calling_convention
2859   %{
2860     // This is obviously always outgoing
2861     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
2862   %}
2863 
2864   // Location of compiled Java return values.  Same as C for now.
2865   return_value
2866   %{
2867     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
2868            "only return normal values");
2869 
2870     static const int lo[Op_RegL + 1] = {
2871       0,
2872       0,
2873       RAX_num,  // Op_RegN
2874       RAX_num,  // Op_RegI
2875       RAX_num,  // Op_RegP
2876       XMM0_num, // Op_RegF
2877       XMM0_num, // Op_RegD
2878       RAX_num   // Op_RegL
2879     };
2880     static const int hi[Op_RegL + 1] = {
2881       0,
2882       0,
2883       OptoReg::Bad, // Op_RegN
2884       OptoReg::Bad, // Op_RegI
2885       RAX_H_num,    // Op_RegP
2886       OptoReg::Bad, // Op_RegF
2887       XMM0b_num,    // Op_RegD
2888       RAX_H_num     // Op_RegL
2889     };
2890     // Excluded flags and vector registers.
2891     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
2892     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
2893   %}
2894 %}
2895 
2896 //----------ATTRIBUTES---------------------------------------------------------
2897 //----------Operand Attributes-------------------------------------------------
2898 op_attrib op_cost(0);        // Required cost attribute
2899 
2900 //----------Instruction Attributes---------------------------------------------
2901 ins_attrib ins_cost(100);       // Required cost attribute
2902 ins_attrib ins_size(8);         // Required size attribute (in bits)
2903 ins_attrib ins_short_branch(0); // Required flag: is this instruction
2904                                 // a non-matching short branch variant
2905                                 // of some long branch?
2906 ins_attrib ins_alignment(1);    // Required alignment attribute (must
2907                                 // be a power of 2) specifies the
2908                                 // alignment that some part of the
2909                                 // instruction (not necessarily the
2910                                 // start) requires.  If > 1, a
2911                                 // compute_padding() function must be
2912                                 // provided for the instruction
2913 
2914 //----------OPERANDS-----------------------------------------------------------
2915 // Operand definitions must precede instruction definitions for correct parsing
2916 // in the ADLC because operands constitute user defined types which are used in
2917 // instruction definitions.
2918 
2919 //----------Simple Operands----------------------------------------------------
2920 // Immediate Operands
2921 // Integer Immediate
2922 operand immI()
2923 %{
2924   match(ConI);
2925 
2926   op_cost(10);
2927   format %{ %}
2928   interface(CONST_INTER);
2929 %}
2930 
2931 // Constant for test vs zero
2932 operand immI0()
2933 %{
2934   predicate(n->get_int() == 0);
2935   match(ConI);
2936 
2937   op_cost(0);
2938   format %{ %}
2939   interface(CONST_INTER);
2940 %}
2941 
2942 // Constant for increment
2943 operand immI1()
2944 %{
2945   predicate(n->get_int() == 1);
2946   match(ConI);
2947 
2948   op_cost(0);
2949   format %{ %}
2950   interface(CONST_INTER);
2951 %}
2952 
2953 // Constant for decrement
2954 operand immI_M1()
2955 %{
2956   predicate(n->get_int() == -1);
2957   match(ConI);
2958 
2959   op_cost(0);
2960   format %{ %}
2961   interface(CONST_INTER);
2962 %}
2963 
2964 // Valid scale values for addressing modes
2965 operand immI2()
2966 %{
2967   predicate(0 <= n->get_int() && (n->get_int() <= 3));
2968   match(ConI);
2969 
2970   format %{ %}
2971   interface(CONST_INTER);
2972 %}
2973 
2974 operand immI8()
2975 %{
2976   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
2977   match(ConI);
2978 
2979   op_cost(5);
2980   format %{ %}
2981   interface(CONST_INTER);
2982 %}
2983 
2984 operand immU8()
2985 %{
2986   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
2987   match(ConI);
2988 
2989   op_cost(5);
2990   format %{ %}
2991   interface(CONST_INTER);
2992 %}
2993 
2994 operand immI16()
2995 %{
2996   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
2997   match(ConI);
2998 
2999   op_cost(10);
3000   format %{ %}
3001   interface(CONST_INTER);
3002 %}
3003 
3004 // Int Immediate non-negative
3005 operand immU31()
3006 %{
3007   predicate(n->get_int() >= 0);
3008   match(ConI);
3009 
3010   op_cost(0);
3011   format %{ %}
3012   interface(CONST_INTER);
3013 %}
3014 
3015 // Constant for long shifts
3016 operand immI_32()
3017 %{
3018   predicate( n->get_int() == 32 );
3019   match(ConI);
3020 
3021   op_cost(0);
3022   format %{ %}
3023   interface(CONST_INTER);
3024 %}
3025 
3026 // Constant for long shifts
3027 operand immI_64()
3028 %{
3029   predicate( n->get_int() == 64 );
3030   match(ConI);
3031 
3032   op_cost(0);
3033   format %{ %}
3034   interface(CONST_INTER);
3035 %}
3036 
3037 // Pointer Immediate
3038 operand immP()
3039 %{
3040   match(ConP);
3041 
3042   op_cost(10);
3043   format %{ %}
3044   interface(CONST_INTER);
3045 %}
3046 
3047 // NULL Pointer Immediate
3048 operand immP0()
3049 %{
3050   predicate(n->get_ptr() == 0);
3051   match(ConP);
3052 
3053   op_cost(5);
3054   format %{ %}
3055   interface(CONST_INTER);
3056 %}
3057 
3058 // Pointer Immediate
3059 operand immN() %{
3060   match(ConN);
3061 
3062   op_cost(10);
3063   format %{ %}
3064   interface(CONST_INTER);
3065 %}
3066 
3067 operand immNKlass() %{
3068   match(ConNKlass);
3069 
3070   op_cost(10);
3071   format %{ %}
3072   interface(CONST_INTER);
3073 %}
3074 
3075 // NULL Pointer Immediate
3076 operand immN0() %{
3077   predicate(n->get_narrowcon() == 0);
3078   match(ConN);
3079 
3080   op_cost(5);
3081   format %{ %}
3082   interface(CONST_INTER);
3083 %}
3084 
3085 operand immP31()
3086 %{
3087   predicate(n->as_Type()->type()->reloc() == relocInfo::none
3088             && (n->get_ptr() >> 31) == 0);
3089   match(ConP);
3090 
3091   op_cost(5);
3092   format %{ %}
3093   interface(CONST_INTER);
3094 %}
3095 
3096 
3097 // Long Immediate
3098 operand immL()
3099 %{
3100   match(ConL);
3101 
3102   op_cost(20);
3103   format %{ %}
3104   interface(CONST_INTER);
3105 %}
3106 
3107 // Long Immediate 8-bit
3108 operand immL8()
3109 %{
3110   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
3111   match(ConL);
3112 
3113   op_cost(5);
3114   format %{ %}
3115   interface(CONST_INTER);
3116 %}
3117 
3118 // Long Immediate 32-bit unsigned
3119 operand immUL32()
3120 %{
3121   predicate(n->get_long() == (unsigned int) (n->get_long()));
3122   match(ConL);
3123 
3124   op_cost(10);
3125   format %{ %}
3126   interface(CONST_INTER);
3127 %}
3128 
3129 // Long Immediate 32-bit signed
3130 operand immL32()
3131 %{
3132   predicate(n->get_long() == (int) (n->get_long()));
3133   match(ConL);
3134 
3135   op_cost(15);
3136   format %{ %}
3137   interface(CONST_INTER);
3138 %}
3139 
3140 // Long Immediate zero
3141 operand immL0()
3142 %{
3143   predicate(n->get_long() == 0L);
3144   match(ConL);
3145 
3146   op_cost(10);
3147   format %{ %}
3148   interface(CONST_INTER);
3149 %}
3150 
3151 // Constant for increment
3152 operand immL1()
3153 %{
3154   predicate(n->get_long() == 1);
3155   match(ConL);
3156 
3157   format %{ %}
3158   interface(CONST_INTER);
3159 %}
3160 
3161 // Constant for decrement
3162 operand immL_M1()
3163 %{
3164   predicate(n->get_long() == -1);
3165   match(ConL);
3166 
3167   format %{ %}
3168   interface(CONST_INTER);
3169 %}
3170 
3171 // Long Immediate: the value 10
3172 operand immL10()
3173 %{
3174   predicate(n->get_long() == 10);
3175   match(ConL);
3176 
3177   format %{ %}
3178   interface(CONST_INTER);
3179 %}
3180 
3181 // Long immediate from 0 to 127.
3182 // Used for a shorter form of long mul by 10.
3183 operand immL_127()
3184 %{
3185   predicate(0 <= n->get_long() && n->get_long() < 0x80);
3186   match(ConL);
3187 
3188   op_cost(10);
3189   format %{ %}
3190   interface(CONST_INTER);
3191 %}
3192 
3193 // Long Immediate: low 32-bit mask
3194 operand immL_32bits()
3195 %{
3196   predicate(n->get_long() == 0xFFFFFFFFL);
3197   match(ConL);
3198   op_cost(20);
3199 
3200   format %{ %}
3201   interface(CONST_INTER);
3202 %}
3203 
3204 // Float Immediate zero
3205 operand immF0()
3206 %{
3207   predicate(jint_cast(n->getf()) == 0);
3208   match(ConF);
3209 
3210   op_cost(5);
3211   format %{ %}
3212   interface(CONST_INTER);
3213 %}
3214 
3215 // Float Immediate
3216 operand immF()
3217 %{
3218   match(ConF);
3219 
3220   op_cost(15);
3221   format %{ %}
3222   interface(CONST_INTER);
3223 %}
3224 
3225 // Double Immediate zero
3226 operand immD0()
3227 %{
3228   predicate(jlong_cast(n->getd()) == 0);
3229   match(ConD);
3230 
3231   op_cost(5);
3232   format %{ %}
3233   interface(CONST_INTER);
3234 %}
3235 
3236 // Double Immediate
3237 operand immD()
3238 %{
3239   match(ConD);
3240 
3241   op_cost(15);
3242   format %{ %}
3243   interface(CONST_INTER);
3244 %}
3245 
3246 // Immediates for special shifts (sign extend)
3247 
3248 // Constants for increment
3249 operand immI_16()
3250 %{
3251   predicate(n->get_int() == 16);
3252   match(ConI);
3253 
3254   format %{ %}
3255   interface(CONST_INTER);
3256 %}
3257 
3258 operand immI_24()
3259 %{
3260   predicate(n->get_int() == 24);
3261   match(ConI);
3262 
3263   format %{ %}
3264   interface(CONST_INTER);
3265 %}
3266 
3267 // Constant for byte-wide masking
3268 operand immI_255()
3269 %{
3270   predicate(n->get_int() == 255);
3271   match(ConI);
3272 
3273   format %{ %}
3274   interface(CONST_INTER);
3275 %}
3276 
3277 // Constant for short-wide masking
3278 operand immI_65535()
3279 %{
3280   predicate(n->get_int() == 65535);
3281   match(ConI);
3282 
3283   format %{ %}
3284   interface(CONST_INTER);
3285 %}
3286 
3287 // Constant for byte-wide masking
3288 operand immL_255()
3289 %{
3290   predicate(n->get_long() == 255);
3291   match(ConL);
3292 
3293   format %{ %}
3294   interface(CONST_INTER);
3295 %}
3296 
3297 // Constant for short-wide masking
3298 operand immL_65535()
3299 %{
3300   predicate(n->get_long() == 65535);
3301   match(ConL);
3302 
3303   format %{ %}
3304   interface(CONST_INTER);
3305 %}
3306 
3307 // Register Operands
3308 // Integer Register
3309 operand rRegI()
3310 %{
3311   constraint(ALLOC_IN_RC(int_reg));
3312   match(RegI);
3313 
3314   match(rax_RegI);
3315   match(rbx_RegI);
3316   match(rcx_RegI);
3317   match(rdx_RegI);
3318   match(rdi_RegI);
3319 
3320   format %{ %}
3321   interface(REG_INTER);
3322 %}
3323 
3324 // Special Registers
3325 operand rax_RegI()
3326 %{
3327   constraint(ALLOC_IN_RC(int_rax_reg));
3328   match(RegI);
3329   match(rRegI);
3330 
3331   format %{ "RAX" %}
3332   interface(REG_INTER);
3333 %}
3334 
3335 // Special Registers
3336 operand rbx_RegI()
3337 %{
3338   constraint(ALLOC_IN_RC(int_rbx_reg));
3339   match(RegI);
3340   match(rRegI);
3341 
3342   format %{ "RBX" %}
3343   interface(REG_INTER);
3344 %}
3345 
3346 operand rcx_RegI()
3347 %{
3348   constraint(ALLOC_IN_RC(int_rcx_reg));
3349   match(RegI);
3350   match(rRegI);
3351 
3352   format %{ "RCX" %}
3353   interface(REG_INTER);
3354 %}
3355 
3356 operand rdx_RegI()
3357 %{
3358   constraint(ALLOC_IN_RC(int_rdx_reg));
3359   match(RegI);
3360   match(rRegI);
3361 
3362   format %{ "RDX" %}
3363   interface(REG_INTER);
3364 %}
3365 
3366 operand rdi_RegI()
3367 %{
3368   constraint(ALLOC_IN_RC(int_rdi_reg));
3369   match(RegI);
3370   match(rRegI);
3371 
3372   format %{ "RDI" %}
3373   interface(REG_INTER);
3374 %}
3375 
3376 operand no_rcx_RegI()
3377 %{
3378   constraint(ALLOC_IN_RC(int_no_rcx_reg));
3379   match(RegI);
3380   match(rax_RegI);
3381   match(rbx_RegI);
3382   match(rdx_RegI);
3383   match(rdi_RegI);
3384 
3385   format %{ %}
3386   interface(REG_INTER);
3387 %}
3388 
3389 operand no_rax_rdx_RegI()
3390 %{
3391   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
3392   match(RegI);
3393   match(rbx_RegI);
3394   match(rcx_RegI);
3395   match(rdi_RegI);
3396 
3397   format %{ %}
3398   interface(REG_INTER);
3399 %}
3400 
3401 // Pointer Register
3402 operand any_RegP()
3403 %{
3404   constraint(ALLOC_IN_RC(any_reg));
3405   match(RegP);
3406   match(rax_RegP);
3407   match(rbx_RegP);
3408   match(rdi_RegP);
3409   match(rsi_RegP);
3410   match(rbp_RegP);
3411   match(r15_RegP);
3412   match(rRegP);
3413 
3414   format %{ %}
3415   interface(REG_INTER);
3416 %}
3417 
3418 operand rRegP()
3419 %{
3420   constraint(ALLOC_IN_RC(ptr_reg));
3421   match(RegP);
3422   match(rax_RegP);
3423   match(rbx_RegP);
3424   match(rdi_RegP);
3425   match(rsi_RegP);
3426   match(rbp_RegP);  // See Q&A below about
3427   match(r15_RegP);  // r15_RegP and rbp_RegP.
3428 
3429   format %{ %}
3430   interface(REG_INTER);
3431 %}
3432 
3433 operand rRegN() %{
3434   constraint(ALLOC_IN_RC(int_reg));
3435   match(RegN);
3436 
3437   format %{ %}
3438   interface(REG_INTER);
3439 %}
3440 
3441 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
3442 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
3443 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
3444 // The output of an instruction is controlled by the allocator, which respects
3445 // register class masks, not match rules.  Unless an instruction mentions
3446 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
3447 // by the allocator as an input.
3448 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
3449 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
3450 // result, RBP is not included in the output of the instruction either.
3451 
3452 operand no_rax_RegP()
3453 %{
3454   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
3455   match(RegP);
3456   match(rbx_RegP);
3457   match(rsi_RegP);
3458   match(rdi_RegP);
3459 
3460   format %{ %}
3461   interface(REG_INTER);
3462 %}
3463 
3464 // This operand is not allowed to use RBP even if
3465 // RBP is not used to hold the frame pointer.
3466 operand no_rbp_RegP()
3467 %{
3468   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
3469   match(RegP);
3470   match(rbx_RegP);
3471   match(rsi_RegP);
3472   match(rdi_RegP);
3473 
3474   format %{ %}
3475   interface(REG_INTER);
3476 %}
3477 
3478 operand no_rax_rbx_RegP()
3479 %{
3480   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
3481   match(RegP);
3482   match(rsi_RegP);
3483   match(rdi_RegP);
3484 
3485   format %{ %}
3486   interface(REG_INTER);
3487 %}
3488 
3489 // Special Registers
3490 // Return a pointer value
3491 operand rax_RegP()
3492 %{
3493   constraint(ALLOC_IN_RC(ptr_rax_reg));
3494   match(RegP);
3495   match(rRegP);
3496 
3497   format %{ %}
3498   interface(REG_INTER);
3499 %}
3500 
3501 // Special Registers
3502 // Return a compressed pointer value
3503 operand rax_RegN()
3504 %{
3505   constraint(ALLOC_IN_RC(int_rax_reg));
3506   match(RegN);
3507   match(rRegN);
3508 
3509   format %{ %}
3510   interface(REG_INTER);
3511 %}
3512 
3513 // Used in AtomicAdd
3514 operand rbx_RegP()
3515 %{
3516   constraint(ALLOC_IN_RC(ptr_rbx_reg));
3517   match(RegP);
3518   match(rRegP);
3519 
3520   format %{ %}
3521   interface(REG_INTER);
3522 %}
3523 
3524 operand rsi_RegP()
3525 %{
3526   constraint(ALLOC_IN_RC(ptr_rsi_reg));
3527   match(RegP);
3528   match(rRegP);
3529 
3530   format %{ %}
3531   interface(REG_INTER);
3532 %}
3533 
3534 // Used in rep stosq
3535 operand rdi_RegP()
3536 %{
3537   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3538   match(RegP);
3539   match(rRegP);
3540 
3541   format %{ %}
3542   interface(REG_INTER);
3543 %}
3544 
3545 operand r15_RegP()
3546 %{
3547   constraint(ALLOC_IN_RC(ptr_r15_reg));
3548   match(RegP);
3549   match(rRegP);
3550 
3551   format %{ %}
3552   interface(REG_INTER);
3553 %}
3554 
3555 operand rRegL()
3556 %{
3557   constraint(ALLOC_IN_RC(long_reg));
3558   match(RegL);
3559   match(rax_RegL);
3560   match(rdx_RegL);
3561 
3562   format %{ %}
3563   interface(REG_INTER);
3564 %}
3565 
3566 // Special Registers
3567 operand no_rax_rdx_RegL()
3568 %{
3569   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3570   match(RegL);
3571   match(rRegL);
3572 
3573   format %{ %}
3574   interface(REG_INTER);
3575 %}
3576 
3577 operand no_rax_RegL()
3578 %{
3579   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3580   match(RegL);
3581   match(rRegL);
3582   match(rdx_RegL);
3583 
3584   format %{ %}
3585   interface(REG_INTER);
3586 %}
3587 
3588 operand no_rcx_RegL()
3589 %{
3590   constraint(ALLOC_IN_RC(long_no_rcx_reg));
3591   match(RegL);
3592   match(rRegL);
3593 
3594   format %{ %}
3595   interface(REG_INTER);
3596 %}
3597 
3598 operand rax_RegL()
3599 %{
3600   constraint(ALLOC_IN_RC(long_rax_reg));
3601   match(RegL);
3602   match(rRegL);
3603 
3604   format %{ "RAX" %}
3605   interface(REG_INTER);
3606 %}
3607 
3608 operand rcx_RegL()
3609 %{
3610   constraint(ALLOC_IN_RC(long_rcx_reg));
3611   match(RegL);
3612   match(rRegL);
3613 
3614   format %{ %}
3615   interface(REG_INTER);
3616 %}
3617 
3618 operand rdx_RegL()
3619 %{
3620   constraint(ALLOC_IN_RC(long_rdx_reg));
3621   match(RegL);
3622   match(rRegL);
3623 
3624   format %{ %}
3625   interface(REG_INTER);
3626 %}
3627 
3628 // Flags register, used as output of compare instructions
3629 operand rFlagsReg()
3630 %{
3631   constraint(ALLOC_IN_RC(int_flags));
3632   match(RegFlags);
3633 
3634   format %{ "RFLAGS" %}
3635   interface(REG_INTER);
3636 %}
3637 
3638 // Flags register, used as output of FLOATING POINT compare instructions
3639 operand rFlagsRegU()
3640 %{
3641   constraint(ALLOC_IN_RC(int_flags));
3642   match(RegFlags);
3643 
3644   format %{ "RFLAGS_U" %}
3645   interface(REG_INTER);
3646 %}
3647 
3648 operand rFlagsRegUCF() %{
3649   constraint(ALLOC_IN_RC(int_flags));
3650   match(RegFlags);
3651   predicate(false);
3652 
3653   format %{ "RFLAGS_U_CF" %}
3654   interface(REG_INTER);
3655 %}
3656 
3657 // Float register operands
3658 operand regF() %{
3659    constraint(ALLOC_IN_RC(float_reg));
3660    match(RegF);
3661 
3662    format %{ %}
3663    interface(REG_INTER);
3664 %}
3665 
3666 // Double register operands
3667 operand regD() %{
3668    constraint(ALLOC_IN_RC(double_reg));
3669    match(RegD);
3670 
3671    format %{ %}
3672    interface(REG_INTER);
3673 %}
3674 
3675 // Vectors
3676 operand vecS() %{
3677   constraint(ALLOC_IN_RC(vectors_reg));
3678   match(VecS);
3679 
3680   format %{ %}
3681   interface(REG_INTER);
3682 %}
3683 
3684 operand vecD() %{
3685   constraint(ALLOC_IN_RC(vectord_reg));
3686   match(VecD);
3687 
3688   format %{ %}
3689   interface(REG_INTER);
3690 %}
3691 
3692 operand vecX() %{
3693   constraint(ALLOC_IN_RC(vectorx_reg));
3694   match(VecX);
3695 
3696   format %{ %}
3697   interface(REG_INTER);
3698 %}
3699 
3700 operand vecY() %{
3701   constraint(ALLOC_IN_RC(vectory_reg));
3702   match(VecY);
3703 
3704   format %{ %}
3705   interface(REG_INTER);
3706 %}
3707 
3708 //----------Memory Operands----------------------------------------------------
3709 // Direct Memory Operand
3710 // operand direct(immP addr)
3711 // %{
3712 //   match(addr);
3713 
3714 //   format %{ "[$addr]" %}
3715 //   interface(MEMORY_INTER) %{
3716 //     base(0xFFFFFFFF);
3717 //     index(0x4);
3718 //     scale(0x0);
3719 //     disp($addr);
3720 //   %}
3721 // %}
3722 
3723 // Indirect Memory Operand
3724 operand indirect(any_RegP reg)
3725 %{
3726   constraint(ALLOC_IN_RC(ptr_reg));
3727   match(reg);
3728 
3729   format %{ "[$reg]" %}
3730   interface(MEMORY_INTER) %{
3731     base($reg);
3732     index(0x4);
3733     scale(0x0);
3734     disp(0x0);
3735   %}
3736 %}
3737 
3738 // Indirect Memory Plus Short Offset Operand
3739 operand indOffset8(any_RegP reg, immL8 off)
3740 %{
3741   constraint(ALLOC_IN_RC(ptr_reg));
3742   match(AddP reg off);
3743 
3744   format %{ "[$reg + $off (8-bit)]" %}
3745   interface(MEMORY_INTER) %{
3746     base($reg);
3747     index(0x4);
3748     scale(0x0);
3749     disp($off);
3750   %}
3751 %}
3752 
3753 // Indirect Memory Plus Long Offset Operand
3754 operand indOffset32(any_RegP reg, immL32 off)
3755 %{
3756   constraint(ALLOC_IN_RC(ptr_reg));
3757   match(AddP reg off);
3758 
3759   format %{ "[$reg + $off (32-bit)]" %}
3760   interface(MEMORY_INTER) %{
3761     base($reg);
3762     index(0x4);
3763     scale(0x0);
3764     disp($off);
3765   %}
3766 %}
3767 
3768 // Indirect Memory Plus Index Register Plus Offset Operand
3769 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
3770 %{
3771   constraint(ALLOC_IN_RC(ptr_reg));
3772   match(AddP (AddP reg lreg) off);
3773 
3774   op_cost(10);
3775   format %{"[$reg + $off + $lreg]" %}
3776   interface(MEMORY_INTER) %{
3777     base($reg);
3778     index($lreg);
3779     scale(0x0);
3780     disp($off);
3781   %}
3782 %}
3783 
3784 // Indirect Memory Plus Index Register Plus Offset Operand
3785 operand indIndex(any_RegP reg, rRegL lreg)
3786 %{
3787   constraint(ALLOC_IN_RC(ptr_reg));
3788   match(AddP reg lreg);
3789 
3790   op_cost(10);
3791   format %{"[$reg + $lreg]" %}
3792   interface(MEMORY_INTER) %{
3793     base($reg);
3794     index($lreg);
3795     scale(0x0);
3796     disp(0x0);
3797   %}
3798 %}
3799 
3800 // Indirect Memory Times Scale Plus Index Register
3801 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
3802 %{
3803   constraint(ALLOC_IN_RC(ptr_reg));
3804   match(AddP reg (LShiftL lreg scale));
3805 
3806   op_cost(10);
3807   format %{"[$reg + $lreg << $scale]" %}
3808   interface(MEMORY_INTER) %{
3809     base($reg);
3810     index($lreg);
3811     scale($scale);
3812     disp(0x0);
3813   %}
3814 %}
3815 
3816 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
3817 %{
3818   constraint(ALLOC_IN_RC(ptr_reg));
3819   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3820   match(AddP reg (LShiftL (ConvI2L idx) scale));
3821 
3822   op_cost(10);
3823   format %{"[$reg + pos $idx << $scale]" %}
3824   interface(MEMORY_INTER) %{
3825     base($reg);
3826     index($idx);
3827     scale($scale);
3828     disp(0x0);
3829   %}
3830 %}
3831 
3832 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
3833 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
3834 %{
3835   constraint(ALLOC_IN_RC(ptr_reg));
3836   match(AddP (AddP reg (LShiftL lreg scale)) off);
3837 
3838   op_cost(10);
3839   format %{"[$reg + $off + $lreg << $scale]" %}
3840   interface(MEMORY_INTER) %{
3841     base($reg);
3842     index($lreg);
3843     scale($scale);
3844     disp($off);
3845   %}
3846 %}
3847 
3848 // Indirect Memory Plus Positive Index Register Plus Offset Operand
3849 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
3850 %{
3851   constraint(ALLOC_IN_RC(ptr_reg));
3852   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
3853   match(AddP (AddP reg (ConvI2L idx)) off);
3854 
3855   op_cost(10);
3856   format %{"[$reg + $off + $idx]" %}
3857   interface(MEMORY_INTER) %{
3858     base($reg);
3859     index($idx);
3860     scale(0x0);
3861     disp($off);
3862   %}
3863 %}
3864 
3865 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3866 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3867 %{
3868   constraint(ALLOC_IN_RC(ptr_reg));
3869   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3870   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3871 
3872   op_cost(10);
3873   format %{"[$reg + $off + $idx << $scale]" %}
3874   interface(MEMORY_INTER) %{
3875     base($reg);
3876     index($idx);
3877     scale($scale);
3878     disp($off);
3879   %}
3880 %}
3881 
3882 // Indirect Narrow Oop Plus Offset Operand
3883 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3884 // we can't free r12 even with Universe::narrow_oop_base() == NULL.
3885 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3886   predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
3887   constraint(ALLOC_IN_RC(ptr_reg));
3888   match(AddP (DecodeN reg) off);
3889 
3890   op_cost(10);
3891   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3892   interface(MEMORY_INTER) %{
3893     base(0xc); // R12
3894     index($reg);
3895     scale(0x3);
3896     disp($off);
3897   %}
3898 %}
3899 
3900 // Indirect Memory Operand
3901 operand indirectNarrow(rRegN reg)
3902 %{
3903   predicate(Universe::narrow_oop_shift() == 0);
3904   constraint(ALLOC_IN_RC(ptr_reg));
3905   match(DecodeN reg);
3906 
3907   format %{ "[$reg]" %}
3908   interface(MEMORY_INTER) %{
3909     base($reg);
3910     index(0x4);
3911     scale(0x0);
3912     disp(0x0);
3913   %}
3914 %}
3915 
3916 // Indirect Memory Plus Short Offset Operand
3917 operand indOffset8Narrow(rRegN reg, immL8 off)
3918 %{
3919   predicate(Universe::narrow_oop_shift() == 0);
3920   constraint(ALLOC_IN_RC(ptr_reg));
3921   match(AddP (DecodeN reg) off);
3922 
3923   format %{ "[$reg + $off (8-bit)]" %}
3924   interface(MEMORY_INTER) %{
3925     base($reg);
3926     index(0x4);
3927     scale(0x0);
3928     disp($off);
3929   %}
3930 %}
3931 
3932 // Indirect Memory Plus Long Offset Operand
3933 operand indOffset32Narrow(rRegN reg, immL32 off)
3934 %{
3935   predicate(Universe::narrow_oop_shift() == 0);
3936   constraint(ALLOC_IN_RC(ptr_reg));
3937   match(AddP (DecodeN reg) off);
3938 
3939   format %{ "[$reg + $off (32-bit)]" %}
3940   interface(MEMORY_INTER) %{
3941     base($reg);
3942     index(0x4);
3943     scale(0x0);
3944     disp($off);
3945   %}
3946 %}
3947 
3948 // Indirect Memory Plus Index Register Plus Offset Operand
3949 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
3950 %{
3951   predicate(Universe::narrow_oop_shift() == 0);
3952   constraint(ALLOC_IN_RC(ptr_reg));
3953   match(AddP (AddP (DecodeN reg) lreg) off);
3954 
3955   op_cost(10);
3956   format %{"[$reg + $off + $lreg]" %}
3957   interface(MEMORY_INTER) %{
3958     base($reg);
3959     index($lreg);
3960     scale(0x0);
3961     disp($off);
3962   %}
3963 %}
3964 
3965 // Indirect Memory Plus Index Register Plus Offset Operand
3966 operand indIndexNarrow(rRegN reg, rRegL lreg)
3967 %{
3968   predicate(Universe::narrow_oop_shift() == 0);
3969   constraint(ALLOC_IN_RC(ptr_reg));
3970   match(AddP (DecodeN reg) lreg);
3971 
3972   op_cost(10);
3973   format %{"[$reg + $lreg]" %}
3974   interface(MEMORY_INTER) %{
3975     base($reg);
3976     index($lreg);
3977     scale(0x0);
3978     disp(0x0);
3979   %}
3980 %}
3981 
3982 // Indirect Memory Times Scale Plus Index Register
3983 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
3984 %{
3985   predicate(Universe::narrow_oop_shift() == 0);
3986   constraint(ALLOC_IN_RC(ptr_reg));
3987   match(AddP (DecodeN reg) (LShiftL lreg scale));
3988 
3989   op_cost(10);
3990   format %{"[$reg + $lreg << $scale]" %}
3991   interface(MEMORY_INTER) %{
3992     base($reg);
3993     index($lreg);
3994     scale($scale);
3995     disp(0x0);
3996   %}
3997 %}
3998 
3999 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4000 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
4001 %{
4002   predicate(Universe::narrow_oop_shift() == 0);
4003   constraint(ALLOC_IN_RC(ptr_reg));
4004   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
4005 
4006   op_cost(10);
4007   format %{"[$reg + $off + $lreg << $scale]" %}
4008   interface(MEMORY_INTER) %{
4009     base($reg);
4010     index($lreg);
4011     scale($scale);
4012     disp($off);
4013   %}
4014 %}
4015 
4016 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
4017 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
4018 %{
4019   constraint(ALLOC_IN_RC(ptr_reg));
4020   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
4021   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
4022 
4023   op_cost(10);
4024   format %{"[$reg + $off + $idx]" %}
4025   interface(MEMORY_INTER) %{
4026     base($reg);
4027     index($idx);
4028     scale(0x0);
4029     disp($off);
4030   %}
4031 %}
4032 
4033 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
4034 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
4035 %{
4036   constraint(ALLOC_IN_RC(ptr_reg));
4037   predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
4038   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
4039 
4040   op_cost(10);
4041   format %{"[$reg + $off + $idx << $scale]" %}
4042   interface(MEMORY_INTER) %{
4043     base($reg);
4044     index($idx);
4045     scale($scale);
4046     disp($off);
4047   %}
4048 %}
4049 
4050 //----------Special Memory Operands--------------------------------------------
4051 // Stack Slot Operand - This operand is used for loading and storing temporary
4052 //                      values on the stack where a match requires a value to
4053 //                      flow through memory.
4054 operand stackSlotP(sRegP reg)
4055 %{
4056   constraint(ALLOC_IN_RC(stack_slots));
4057   // No match rule because this operand is only generated in matching
4058 
4059   format %{ "[$reg]" %}
4060   interface(MEMORY_INTER) %{
4061     base(0x4);   // RSP
4062     index(0x4);  // No Index
4063     scale(0x0);  // No Scale
4064     disp($reg);  // Stack Offset
4065   %}
4066 %}
4067 
4068 operand stackSlotI(sRegI reg)
4069 %{
4070   constraint(ALLOC_IN_RC(stack_slots));
4071   // No match rule because this operand is only generated in matching
4072 
4073   format %{ "[$reg]" %}
4074   interface(MEMORY_INTER) %{
4075     base(0x4);   // RSP
4076     index(0x4);  // No Index
4077     scale(0x0);  // No Scale
4078     disp($reg);  // Stack Offset
4079   %}
4080 %}
4081 
4082 operand stackSlotF(sRegF reg)
4083 %{
4084   constraint(ALLOC_IN_RC(stack_slots));
4085   // No match rule because this operand is only generated in matching
4086 
4087   format %{ "[$reg]" %}
4088   interface(MEMORY_INTER) %{
4089     base(0x4);   // RSP
4090     index(0x4);  // No Index
4091     scale(0x0);  // No Scale
4092     disp($reg);  // Stack Offset
4093   %}
4094 %}
4095 
4096 operand stackSlotD(sRegD reg)
4097 %{
4098   constraint(ALLOC_IN_RC(stack_slots));
4099   // No match rule because this operand is only generated in matching
4100 
4101   format %{ "[$reg]" %}
4102   interface(MEMORY_INTER) %{
4103     base(0x4);   // RSP
4104     index(0x4);  // No Index
4105     scale(0x0);  // No Scale
4106     disp($reg);  // Stack Offset
4107   %}
4108 %}
4109 operand stackSlotL(sRegL reg)
4110 %{
4111   constraint(ALLOC_IN_RC(stack_slots));
4112   // No match rule because this operand is only generated in matching
4113 
4114   format %{ "[$reg]" %}
4115   interface(MEMORY_INTER) %{
4116     base(0x4);   // RSP
4117     index(0x4);  // No Index
4118     scale(0x0);  // No Scale
4119     disp($reg);  // Stack Offset
4120   %}
4121 %}
4122 
4123 //----------Conditional Branch Operands----------------------------------------
4124 // Comparison Op  - This is the operation of the comparison, and is limited to
4125 //                  the following set of codes:
4126 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4127 //
4128 // Other attributes of the comparison, such as unsignedness, are specified
4129 // by the comparison instruction that sets a condition code flags register.
4130 // That result is represented by a flags operand whose subtype is appropriate
4131 // to the unsignedness (etc.) of the comparison.
4132 //
4133 // Later, the instruction which matches both the Comparison Op (a Bool) and
4134 // the flags (produced by the Cmp) specifies the coding of the comparison op
4135 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4136 
4137 // Comparision Code
4138 operand cmpOp()
4139 %{
4140   match(Bool);
4141 
4142   format %{ "" %}
4143   interface(COND_INTER) %{
4144     equal(0x4, "e");
4145     not_equal(0x5, "ne");
4146     less(0xC, "l");
4147     greater_equal(0xD, "ge");
4148     less_equal(0xE, "le");
4149     greater(0xF, "g");
4150     overflow(0x0, "o");
4151     no_overflow(0x1, "no");
4152   %}
4153 %}
4154 
4155 // Comparison Code, unsigned compare.  Used by FP also, with
4156 // C2 (unordered) turned into GT or LT already.  The other bits
4157 // C0 and C3 are turned into Carry & Zero flags.
4158 operand cmpOpU()
4159 %{
4160   match(Bool);
4161 
4162   format %{ "" %}
4163   interface(COND_INTER) %{
4164     equal(0x4, "e");
4165     not_equal(0x5, "ne");
4166     less(0x2, "b");
4167     greater_equal(0x3, "nb");
4168     less_equal(0x6, "be");
4169     greater(0x7, "nbe");
4170     overflow(0x0, "o");
4171     no_overflow(0x1, "no");
4172   %}
4173 %}
4174 
4175 
4176 // Floating comparisons that don't require any fixup for the unordered case
4177 operand cmpOpUCF() %{
4178   match(Bool);
4179   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4180             n->as_Bool()->_test._test == BoolTest::ge ||
4181             n->as_Bool()->_test._test == BoolTest::le ||
4182             n->as_Bool()->_test._test == BoolTest::gt);
4183   format %{ "" %}
4184   interface(COND_INTER) %{
4185     equal(0x4, "e");
4186     not_equal(0x5, "ne");
4187     less(0x2, "b");
4188     greater_equal(0x3, "nb");
4189     less_equal(0x6, "be");
4190     greater(0x7, "nbe");
4191     overflow(0x0, "o");
4192     no_overflow(0x1, "no");
4193   %}
4194 %}
4195 
4196 
4197 // Floating comparisons that can be fixed up with extra conditional jumps
4198 operand cmpOpUCF2() %{
4199   match(Bool);
4200   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4201             n->as_Bool()->_test._test == BoolTest::eq);
4202   format %{ "" %}
4203   interface(COND_INTER) %{
4204     equal(0x4, "e");
4205     not_equal(0x5, "ne");
4206     less(0x2, "b");
4207     greater_equal(0x3, "nb");
4208     less_equal(0x6, "be");
4209     greater(0x7, "nbe");
4210     overflow(0x0, "o");
4211     no_overflow(0x1, "no");
4212   %}
4213 %}
4214 
4215 // Operands for bound floating pointer register arguments
4216 operand rxmm0() %{
4217   constraint(ALLOC_IN_RC(xmm0_reg));  match(VecX);
4218   predicate((UseSSE > 0) && (UseAVX<= 2));  format%{%}  interface(REG_INTER);
4219 %}
4220 operand rxmm1() %{
4221   constraint(ALLOC_IN_RC(xmm1_reg));  match(VecX);
4222   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4223 %}
4224 operand rxmm2() %{
4225   constraint(ALLOC_IN_RC(xmm2_reg));  match(VecX);
4226   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4227 %}
4228 operand rxmm3() %{
4229   constraint(ALLOC_IN_RC(xmm3_reg));  match(VecX);
4230   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4231 %}
4232 operand rxmm4() %{
4233   constraint(ALLOC_IN_RC(xmm4_reg));  match(VecX);
4234   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4235 %}
4236 operand rxmm5() %{
4237   constraint(ALLOC_IN_RC(xmm5_reg));  match(VecX);
4238   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4239 %}
4240 operand rxmm6() %{
4241   constraint(ALLOC_IN_RC(xmm6_reg));  match(VecX);
4242   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4243 %}
4244 operand rxmm7() %{
4245   constraint(ALLOC_IN_RC(xmm7_reg));  match(VecX);
4246   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4247 %}
4248 operand rxmm8() %{
4249   constraint(ALLOC_IN_RC(xmm8_reg));  match(VecX);
4250   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4251 %}
4252 operand rxmm9() %{
4253   constraint(ALLOC_IN_RC(xmm9_reg));  match(VecX);
4254   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4255 %}
4256 operand rxmm10() %{
4257   constraint(ALLOC_IN_RC(xmm10_reg));  match(VecX);
4258   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4259 %}
4260 operand rxmm11() %{
4261   constraint(ALLOC_IN_RC(xmm11_reg));  match(VecX);
4262   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4263 %}
4264 operand rxmm12() %{
4265   constraint(ALLOC_IN_RC(xmm12_reg));  match(VecX);
4266   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4267 %}
4268 operand rxmm13() %{
4269   constraint(ALLOC_IN_RC(xmm13_reg));  match(VecX);
4270   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4271 %}
4272 operand rxmm14() %{
4273   constraint(ALLOC_IN_RC(xmm14_reg));  match(VecX);
4274   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4275 %}
4276 operand rxmm15() %{
4277   constraint(ALLOC_IN_RC(xmm15_reg));  match(VecX);
4278   predicate((UseSSE > 0) && (UseAVX <= 2));  format%{%}  interface(REG_INTER);
4279 %}
4280 operand rxmm16() %{
4281   constraint(ALLOC_IN_RC(xmm16_reg));  match(VecX);
4282   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4283 %}
4284 operand rxmm17() %{
4285   constraint(ALLOC_IN_RC(xmm17_reg));  match(VecX);
4286   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4287 %}
4288 operand rxmm18() %{
4289   constraint(ALLOC_IN_RC(xmm18_reg));  match(VecX);
4290   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4291 %}
4292 operand rxmm19() %{
4293   constraint(ALLOC_IN_RC(xmm19_reg));  match(VecX);
4294   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4295 %}
4296 operand rxmm20() %{
4297   constraint(ALLOC_IN_RC(xmm20_reg));  match(VecX);
4298   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4299 %}
4300 operand rxmm21() %{
4301   constraint(ALLOC_IN_RC(xmm21_reg));  match(VecX);
4302   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4303 %}
4304 operand rxmm22() %{
4305   constraint(ALLOC_IN_RC(xmm22_reg));  match(VecX);
4306   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4307 %}
4308 operand rxmm23() %{
4309   constraint(ALLOC_IN_RC(xmm23_reg));  match(VecX);
4310   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4311 %}
4312 operand rxmm24() %{
4313   constraint(ALLOC_IN_RC(xmm24_reg));  match(VecX);
4314   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4315 %}
4316 operand rxmm25() %{
4317   constraint(ALLOC_IN_RC(xmm25_reg));  match(VecX);
4318   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4319 %}
4320 operand rxmm26() %{
4321   constraint(ALLOC_IN_RC(xmm26_reg));  match(VecX);
4322   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4323 %}
4324 operand rxmm27() %{
4325   constraint(ALLOC_IN_RC(xmm27_reg));  match(VecX);
4326   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4327 %}
4328 operand rxmm28() %{
4329   constraint(ALLOC_IN_RC(xmm28_reg));  match(VecX);
4330   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4331 %}
4332 operand rxmm29() %{
4333   constraint(ALLOC_IN_RC(xmm29_reg));  match(VecX);
4334   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4335 %}
4336 operand rxmm30() %{
4337   constraint(ALLOC_IN_RC(xmm30_reg));  match(VecX);
4338   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4339 %}
4340 operand rxmm31() %{
4341   constraint(ALLOC_IN_RC(xmm31_reg));  match(VecX);
4342   predicate(UseAVX == 3);  format%{%}  interface(REG_INTER);
4343 %}
4344 
4345 //----------OPERAND CLASSES----------------------------------------------------
4346 // Operand Classes are groups of operands that are used as to simplify
4347 // instruction definitions by not requiring the AD writer to specify separate
4348 // instructions for every form of operand when the instruction accepts
4349 // multiple operand types with the same basic encoding and format.  The classic
4350 // case of this is memory operands.
4351 
4352 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4353                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
4354                indCompressedOopOffset,
4355                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4356                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4357                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
4358 
4359 //----------PIPELINE-----------------------------------------------------------
4360 // Rules which define the behavior of the target architectures pipeline.
4361 pipeline %{
4362 
4363 //----------ATTRIBUTES---------------------------------------------------------
4364 attributes %{
4365   variable_size_instructions;        // Fixed size instructions
4366   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4367   instruction_unit_size = 1;         // An instruction is 1 bytes long
4368   instruction_fetch_unit_size = 16;  // The processor fetches one line
4369   instruction_fetch_units = 1;       // of 16 bytes
4370 
4371   // List of nop instructions
4372   nops( MachNop );
4373 %}
4374 
4375 //----------RESOURCES----------------------------------------------------------
4376 // Resources are the functional units available to the machine
4377 
4378 // Generic P2/P3 pipeline
4379 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4380 // 3 instructions decoded per cycle.
4381 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4382 // 3 ALU op, only ALU0 handles mul instructions.
4383 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4384            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
4385            BR, FPU,
4386            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
4387 
4388 //----------PIPELINE DESCRIPTION-----------------------------------------------
4389 // Pipeline Description specifies the stages in the machine's pipeline
4390 
4391 // Generic P2/P3 pipeline
4392 pipe_desc(S0, S1, S2, S3, S4, S5);
4393 
4394 //----------PIPELINE CLASSES---------------------------------------------------
4395 // Pipeline Classes describe the stages in which input and output are
4396 // referenced by the hardware pipeline.
4397 
4398 // Naming convention: ialu or fpu
4399 // Then: _reg
4400 // Then: _reg if there is a 2nd register
4401 // Then: _long if it's a pair of instructions implementing a long
4402 // Then: _fat if it requires the big decoder
4403 //   Or: _mem if it requires the big decoder and a memory unit.
4404 
4405 // Integer ALU reg operation
4406 pipe_class ialu_reg(rRegI dst)
4407 %{
4408     single_instruction;
4409     dst    : S4(write);
4410     dst    : S3(read);
4411     DECODE : S0;        // any decoder
4412     ALU    : S3;        // any alu
4413 %}
4414 
4415 // Long ALU reg operation
4416 pipe_class ialu_reg_long(rRegL dst)
4417 %{
4418     instruction_count(2);
4419     dst    : S4(write);
4420     dst    : S3(read);
4421     DECODE : S0(2);     // any 2 decoders
4422     ALU    : S3(2);     // both alus
4423 %}
4424 
4425 // Integer ALU reg operation using big decoder
4426 pipe_class ialu_reg_fat(rRegI dst)
4427 %{
4428     single_instruction;
4429     dst    : S4(write);
4430     dst    : S3(read);
4431     D0     : S0;        // big decoder only
4432     ALU    : S3;        // any alu
4433 %}
4434 
4435 // Long ALU reg operation using big decoder
4436 pipe_class ialu_reg_long_fat(rRegL dst)
4437 %{
4438     instruction_count(2);
4439     dst    : S4(write);
4440     dst    : S3(read);
4441     D0     : S0(2);     // big decoder only; twice
4442     ALU    : S3(2);     // any 2 alus
4443 %}
4444 
4445 // Integer ALU reg-reg operation
4446 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
4447 %{
4448     single_instruction;
4449     dst    : S4(write);
4450     src    : S3(read);
4451     DECODE : S0;        // any decoder
4452     ALU    : S3;        // any alu
4453 %}
4454 
4455 // Long ALU reg-reg operation
4456 pipe_class ialu_reg_reg_long(rRegL dst, rRegL src)
4457 %{
4458     instruction_count(2);
4459     dst    : S4(write);
4460     src    : S3(read);
4461     DECODE : S0(2);     // any 2 decoders
4462     ALU    : S3(2);     // both alus
4463 %}
4464 
4465 // Integer ALU reg-reg operation
4466 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
4467 %{
4468     single_instruction;
4469     dst    : S4(write);
4470     src    : S3(read);
4471     D0     : S0;        // big decoder only
4472     ALU    : S3;        // any alu
4473 %}
4474 
4475 // Long ALU reg-reg operation
4476 pipe_class ialu_reg_reg_long_fat(rRegL dst, rRegL src)
4477 %{
4478     instruction_count(2);
4479     dst    : S4(write);
4480     src    : S3(read);
4481     D0     : S0(2);     // big decoder only; twice
4482     ALU    : S3(2);     // both alus
4483 %}
4484 
4485 // Integer ALU reg-mem operation
4486 pipe_class ialu_reg_mem(rRegI dst, memory mem)
4487 %{
4488     single_instruction;
4489     dst    : S5(write);
4490     mem    : S3(read);
4491     D0     : S0;        // big decoder only
4492     ALU    : S4;        // any alu
4493     MEM    : S3;        // any mem
4494 %}
4495 
4496 // Integer mem operation (prefetch)
4497 pipe_class ialu_mem(memory mem)
4498 %{
4499     single_instruction;
4500     mem    : S3(read);
4501     D0     : S0;        // big decoder only
4502     MEM    : S3;        // any mem
4503 %}
4504 
4505 // Integer Store to Memory
4506 pipe_class ialu_mem_reg(memory mem, rRegI src)
4507 %{
4508     single_instruction;
4509     mem    : S3(read);
4510     src    : S5(read);
4511     D0     : S0;        // big decoder only
4512     ALU    : S4;        // any alu
4513     MEM    : S3;
4514 %}
4515 
4516 // // Long Store to Memory
4517 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
4518 // %{
4519 //     instruction_count(2);
4520 //     mem    : S3(read);
4521 //     src    : S5(read);
4522 //     D0     : S0(2);          // big decoder only; twice
4523 //     ALU    : S4(2);     // any 2 alus
4524 //     MEM    : S3(2);  // Both mems
4525 // %}
4526 
4527 // Integer Store to Memory
4528 pipe_class ialu_mem_imm(memory mem)
4529 %{
4530     single_instruction;
4531     mem    : S3(read);
4532     D0     : S0;        // big decoder only
4533     ALU    : S4;        // any alu
4534     MEM    : S3;
4535 %}
4536 
4537 // Integer ALU0 reg-reg operation
4538 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
4539 %{
4540     single_instruction;
4541     dst    : S4(write);
4542     src    : S3(read);
4543     D0     : S0;        // Big decoder only
4544     ALU0   : S3;        // only alu0
4545 %}
4546 
4547 // Integer ALU0 reg-mem operation
4548 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
4549 %{
4550     single_instruction;
4551     dst    : S5(write);
4552     mem    : S3(read);
4553     D0     : S0;        // big decoder only
4554     ALU0   : S4;        // ALU0 only
4555     MEM    : S3;        // any mem
4556 %}
4557 
4558 // Integer ALU reg-reg operation
4559 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
4560 %{
4561     single_instruction;
4562     cr     : S4(write);
4563     src1   : S3(read);
4564     src2   : S3(read);
4565     DECODE : S0;        // any decoder
4566     ALU    : S3;        // any alu
4567 %}
4568 
4569 // Integer ALU reg-imm operation
4570 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
4571 %{
4572     single_instruction;
4573     cr     : S4(write);
4574     src1   : S3(read);
4575     DECODE : S0;        // any decoder
4576     ALU    : S3;        // any alu
4577 %}
4578 
4579 // Integer ALU reg-mem operation
4580 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
4581 %{
4582     single_instruction;
4583     cr     : S4(write);
4584     src1   : S3(read);
4585     src2   : S3(read);
4586     D0     : S0;        // big decoder only
4587     ALU    : S4;        // any alu
4588     MEM    : S3;
4589 %}
4590 
4591 // Conditional move reg-reg
4592 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
4593 %{
4594     instruction_count(4);
4595     y      : S4(read);
4596     q      : S3(read);
4597     p      : S3(read);
4598     DECODE : S0(4);     // any decoder
4599 %}
4600 
4601 // Conditional move reg-reg
4602 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
4603 %{
4604     single_instruction;
4605     dst    : S4(write);
4606     src    : S3(read);
4607     cr     : S3(read);
4608     DECODE : S0;        // any decoder
4609 %}
4610 
4611 // Conditional move reg-mem
4612 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
4613 %{
4614     single_instruction;
4615     dst    : S4(write);
4616     src    : S3(read);
4617     cr     : S3(read);
4618     DECODE : S0;        // any decoder
4619     MEM    : S3;
4620 %}
4621 
4622 // Conditional move reg-reg long
4623 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
4624 %{
4625     single_instruction;
4626     dst    : S4(write);
4627     src    : S3(read);
4628     cr     : S3(read);
4629     DECODE : S0(2);     // any 2 decoders
4630 %}
4631 
4632 // XXX
4633 // // Conditional move double reg-reg
4634 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
4635 // %{
4636 //     single_instruction;
4637 //     dst    : S4(write);
4638 //     src    : S3(read);
4639 //     cr     : S3(read);
4640 //     DECODE : S0;     // any decoder
4641 // %}
4642 
4643 // Float reg-reg operation
4644 pipe_class fpu_reg(regD dst)
4645 %{
4646     instruction_count(2);
4647     dst    : S3(read);
4648     DECODE : S0(2);     // any 2 decoders
4649     FPU    : S3;
4650 %}
4651 
4652 // Float reg-reg operation
4653 pipe_class fpu_reg_reg(regD dst, regD src)
4654 %{
4655     instruction_count(2);
4656     dst    : S4(write);
4657     src    : S3(read);
4658     DECODE : S0(2);     // any 2 decoders
4659     FPU    : S3;
4660 %}
4661 
4662 // Float reg-reg operation
4663 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
4664 %{
4665     instruction_count(3);
4666     dst    : S4(write);
4667     src1   : S3(read);
4668     src2   : S3(read);
4669     DECODE : S0(3);     // any 3 decoders
4670     FPU    : S3(2);
4671 %}
4672 
4673 // Float reg-reg operation
4674 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
4675 %{
4676     instruction_count(4);
4677     dst    : S4(write);
4678     src1   : S3(read);
4679     src2   : S3(read);
4680     src3   : S3(read);
4681     DECODE : S0(4);     // any 3 decoders
4682     FPU    : S3(2);
4683 %}
4684 
4685 // Float reg-reg operation
4686 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
4687 %{
4688     instruction_count(4);
4689     dst    : S4(write);
4690     src1   : S3(read);
4691     src2   : S3(read);
4692     src3   : S3(read);
4693     DECODE : S1(3);     // any 3 decoders
4694     D0     : S0;        // Big decoder only
4695     FPU    : S3(2);
4696     MEM    : S3;
4697 %}
4698 
4699 // Float reg-mem operation
4700 pipe_class fpu_reg_mem(regD dst, memory mem)
4701 %{
4702     instruction_count(2);
4703     dst    : S5(write);
4704     mem    : S3(read);
4705     D0     : S0;        // big decoder only
4706     DECODE : S1;        // any decoder for FPU POP
4707     FPU    : S4;
4708     MEM    : S3;        // any mem
4709 %}
4710 
4711 // Float reg-mem operation
4712 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
4713 %{
4714     instruction_count(3);
4715     dst    : S5(write);
4716     src1   : S3(read);
4717     mem    : S3(read);
4718     D0     : S0;        // big decoder only
4719     DECODE : S1(2);     // any decoder for FPU POP
4720     FPU    : S4;
4721     MEM    : S3;        // any mem
4722 %}
4723 
4724 // Float mem-reg operation
4725 pipe_class fpu_mem_reg(memory mem, regD src)
4726 %{
4727     instruction_count(2);
4728     src    : S5(read);
4729     mem    : S3(read);
4730     DECODE : S0;        // any decoder for FPU PUSH
4731     D0     : S1;        // big decoder only
4732     FPU    : S4;
4733     MEM    : S3;        // any mem
4734 %}
4735 
4736 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
4737 %{
4738     instruction_count(3);
4739     src1   : S3(read);
4740     src2   : S3(read);
4741     mem    : S3(read);
4742     DECODE : S0(2);     // any decoder for FPU PUSH
4743     D0     : S1;        // big decoder only
4744     FPU    : S4;
4745     MEM    : S3;        // any mem
4746 %}
4747 
4748 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
4749 %{
4750     instruction_count(3);
4751     src1   : S3(read);
4752     src2   : S3(read);
4753     mem    : S4(read);
4754     DECODE : S0;        // any decoder for FPU PUSH
4755     D0     : S0(2);     // big decoder only
4756     FPU    : S4;
4757     MEM    : S3(2);     // any mem
4758 %}
4759 
4760 pipe_class fpu_mem_mem(memory dst, memory src1)
4761 %{
4762     instruction_count(2);
4763     src1   : S3(read);
4764     dst    : S4(read);
4765     D0     : S0(2);     // big decoder only
4766     MEM    : S3(2);     // any mem
4767 %}
4768 
4769 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
4770 %{
4771     instruction_count(3);
4772     src1   : S3(read);
4773     src2   : S3(read);
4774     dst    : S4(read);
4775     D0     : S0(3);     // big decoder only
4776     FPU    : S4;
4777     MEM    : S3(3);     // any mem
4778 %}
4779 
4780 pipe_class fpu_mem_reg_con(memory mem, regD src1)
4781 %{
4782     instruction_count(3);
4783     src1   : S4(read);
4784     mem    : S4(read);
4785     DECODE : S0;        // any decoder for FPU PUSH
4786     D0     : S0(2);     // big decoder only
4787     FPU    : S4;
4788     MEM    : S3(2);     // any mem
4789 %}
4790 
4791 // Float load constant
4792 pipe_class fpu_reg_con(regD dst)
4793 %{
4794     instruction_count(2);
4795     dst    : S5(write);
4796     D0     : S0;        // big decoder only for the load
4797     DECODE : S1;        // any decoder for FPU POP
4798     FPU    : S4;
4799     MEM    : S3;        // any mem
4800 %}
4801 
4802 // Float load constant
4803 pipe_class fpu_reg_reg_con(regD dst, regD src)
4804 %{
4805     instruction_count(3);
4806     dst    : S5(write);
4807     src    : S3(read);
4808     D0     : S0;        // big decoder only for the load
4809     DECODE : S1(2);     // any decoder for FPU POP
4810     FPU    : S4;
4811     MEM    : S3;        // any mem
4812 %}
4813 
4814 // UnConditional branch
4815 pipe_class pipe_jmp(label labl)
4816 %{
4817     single_instruction;
4818     BR   : S3;
4819 %}
4820 
4821 // Conditional branch
4822 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
4823 %{
4824     single_instruction;
4825     cr    : S1(read);
4826     BR    : S3;
4827 %}
4828 
4829 // Allocation idiom
4830 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
4831 %{
4832     instruction_count(1); force_serialization;
4833     fixed_latency(6);
4834     heap_ptr : S3(read);
4835     DECODE   : S0(3);
4836     D0       : S2;
4837     MEM      : S3;
4838     ALU      : S3(2);
4839     dst      : S5(write);
4840     BR       : S5;
4841 %}
4842 
4843 // Generic big/slow expanded idiom
4844 pipe_class pipe_slow()
4845 %{
4846     instruction_count(10); multiple_bundles; force_serialization;
4847     fixed_latency(100);
4848     D0  : S0(2);
4849     MEM : S3(2);
4850 %}
4851 
4852 // The real do-nothing guy
4853 pipe_class empty()
4854 %{
4855     instruction_count(0);
4856 %}
4857 
4858 // Define the class for the Nop node
4859 define
4860 %{
4861    MachNop = empty;
4862 %}
4863 
4864 %}
4865 
4866 //----------INSTRUCTIONS-------------------------------------------------------
4867 //
4868 // match      -- States which machine-independent subtree may be replaced
4869 //               by this instruction.
4870 // ins_cost   -- The estimated cost of this instruction is used by instruction
4871 //               selection to identify a minimum cost tree of machine
4872 //               instructions that matches a tree of machine-independent
4873 //               instructions.
4874 // format     -- A string providing the disassembly for this instruction.
4875 //               The value of an instruction's operand may be inserted
4876 //               by referring to it with a '$' prefix.
4877 // opcode     -- Three instruction opcodes may be provided.  These are referred
4878 //               to within an encode class as $primary, $secondary, and $tertiary
4879 //               rrspectively.  The primary opcode is commonly used to
4880 //               indicate the type of machine instruction, while secondary
4881 //               and tertiary are often used for prefix options or addressing
4882 //               modes.
4883 // ins_encode -- A list of encode classes with parameters. The encode class
4884 //               name must have been defined in an 'enc_class' specification
4885 //               in the encode section of the architecture description.
4886 
4887 
4888 //----------Load/Store/Move Instructions---------------------------------------
4889 //----------Load Instructions--------------------------------------------------
4890 
4891 // Load Byte (8 bit signed)
4892 instruct loadB(rRegI dst, memory mem)
4893 %{
4894   match(Set dst (LoadB mem));
4895 
4896   ins_cost(125);
4897   format %{ "movsbl  $dst, $mem\t# byte" %}
4898 
4899   ins_encode %{
4900     __ movsbl($dst$$Register, $mem$$Address);
4901   %}
4902 
4903   ins_pipe(ialu_reg_mem);
4904 %}
4905 
4906 // Load Byte (8 bit signed) into Long Register
4907 instruct loadB2L(rRegL dst, memory mem)
4908 %{
4909   match(Set dst (ConvI2L (LoadB mem)));
4910 
4911   ins_cost(125);
4912   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
4913 
4914   ins_encode %{
4915     __ movsbq($dst$$Register, $mem$$Address);
4916   %}
4917 
4918   ins_pipe(ialu_reg_mem);
4919 %}
4920 
4921 // Load Unsigned Byte (8 bit UNsigned)
4922 instruct loadUB(rRegI dst, memory mem)
4923 %{
4924   match(Set dst (LoadUB mem));
4925 
4926   ins_cost(125);
4927   format %{ "movzbl  $dst, $mem\t# ubyte" %}
4928 
4929   ins_encode %{
4930     __ movzbl($dst$$Register, $mem$$Address);
4931   %}
4932 
4933   ins_pipe(ialu_reg_mem);
4934 %}
4935 
4936 // Load Unsigned Byte (8 bit UNsigned) into Long Register
4937 instruct loadUB2L(rRegL dst, memory mem)
4938 %{
4939   match(Set dst (ConvI2L (LoadUB mem)));
4940 
4941   ins_cost(125);
4942   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
4943 
4944   ins_encode %{
4945     __ movzbq($dst$$Register, $mem$$Address);
4946   %}
4947 
4948   ins_pipe(ialu_reg_mem);
4949 %}
4950 
4951 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
4952 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
4953   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
4954   effect(KILL cr);
4955 
4956   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
4957             "andl    $dst, right_n_bits($mask, 8)" %}
4958   ins_encode %{
4959     Register Rdst = $dst$$Register;
4960     __ movzbq(Rdst, $mem$$Address);
4961     __ andl(Rdst, $mask$$constant & right_n_bits(8));
4962   %}
4963   ins_pipe(ialu_reg_mem);
4964 %}
4965 
4966 // Load Short (16 bit signed)
4967 instruct loadS(rRegI dst, memory mem)
4968 %{
4969   match(Set dst (LoadS mem));
4970 
4971   ins_cost(125);
4972   format %{ "movswl $dst, $mem\t# short" %}
4973 
4974   ins_encode %{
4975     __ movswl($dst$$Register, $mem$$Address);
4976   %}
4977 
4978   ins_pipe(ialu_reg_mem);
4979 %}
4980 
4981 // Load Short (16 bit signed) to Byte (8 bit signed)
4982 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
4983   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
4984 
4985   ins_cost(125);
4986   format %{ "movsbl $dst, $mem\t# short -> byte" %}
4987   ins_encode %{
4988     __ movsbl($dst$$Register, $mem$$Address);
4989   %}
4990   ins_pipe(ialu_reg_mem);
4991 %}
4992 
4993 // Load Short (16 bit signed) into Long Register
4994 instruct loadS2L(rRegL dst, memory mem)
4995 %{
4996   match(Set dst (ConvI2L (LoadS mem)));
4997 
4998   ins_cost(125);
4999   format %{ "movswq $dst, $mem\t# short -> long" %}
5000 
5001   ins_encode %{
5002     __ movswq($dst$$Register, $mem$$Address);
5003   %}
5004 
5005   ins_pipe(ialu_reg_mem);
5006 %}
5007 
5008 // Load Unsigned Short/Char (16 bit UNsigned)
5009 instruct loadUS(rRegI dst, memory mem)
5010 %{
5011   match(Set dst (LoadUS mem));
5012 
5013   ins_cost(125);
5014   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
5015 
5016   ins_encode %{
5017     __ movzwl($dst$$Register, $mem$$Address);
5018   %}
5019 
5020   ins_pipe(ialu_reg_mem);
5021 %}
5022 
5023 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5024 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5025   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5026 
5027   ins_cost(125);
5028   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
5029   ins_encode %{
5030     __ movsbl($dst$$Register, $mem$$Address);
5031   %}
5032   ins_pipe(ialu_reg_mem);
5033 %}
5034 
5035 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5036 instruct loadUS2L(rRegL dst, memory mem)
5037 %{
5038   match(Set dst (ConvI2L (LoadUS mem)));
5039 
5040   ins_cost(125);
5041   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
5042 
5043   ins_encode %{
5044     __ movzwq($dst$$Register, $mem$$Address);
5045   %}
5046 
5047   ins_pipe(ialu_reg_mem);
5048 %}
5049 
5050 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5051 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5052   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5053 
5054   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
5055   ins_encode %{
5056     __ movzbq($dst$$Register, $mem$$Address);
5057   %}
5058   ins_pipe(ialu_reg_mem);
5059 %}
5060 
5061 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
5062 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
5063   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5064   effect(KILL cr);
5065 
5066   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5067             "andl    $dst, right_n_bits($mask, 16)" %}
5068   ins_encode %{
5069     Register Rdst = $dst$$Register;
5070     __ movzwq(Rdst, $mem$$Address);
5071     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5072   %}
5073   ins_pipe(ialu_reg_mem);
5074 %}
5075 
5076 // Load Integer
5077 instruct loadI(rRegI dst, memory mem)
5078 %{
5079   match(Set dst (LoadI mem));
5080 
5081   ins_cost(125);
5082   format %{ "movl    $dst, $mem\t# int" %}
5083 
5084   ins_encode %{
5085     __ movl($dst$$Register, $mem$$Address);
5086   %}
5087 
5088   ins_pipe(ialu_reg_mem);
5089 %}
5090 
5091 // Load Integer (32 bit signed) to Byte (8 bit signed)
5092 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5093   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5094 
5095   ins_cost(125);
5096   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
5097   ins_encode %{
5098     __ movsbl($dst$$Register, $mem$$Address);
5099   %}
5100   ins_pipe(ialu_reg_mem);
5101 %}
5102 
5103 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5104 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5105   match(Set dst (AndI (LoadI mem) mask));
5106 
5107   ins_cost(125);
5108   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
5109   ins_encode %{
5110     __ movzbl($dst$$Register, $mem$$Address);
5111   %}
5112   ins_pipe(ialu_reg_mem);
5113 %}
5114 
5115 // Load Integer (32 bit signed) to Short (16 bit signed)
5116 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5117   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5118 
5119   ins_cost(125);
5120   format %{ "movswl  $dst, $mem\t# int -> short" %}
5121   ins_encode %{
5122     __ movswl($dst$$Register, $mem$$Address);
5123   %}
5124   ins_pipe(ialu_reg_mem);
5125 %}
5126 
5127 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5128 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5129   match(Set dst (AndI (LoadI mem) mask));
5130 
5131   ins_cost(125);
5132   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
5133   ins_encode %{
5134     __ movzwl($dst$$Register, $mem$$Address);
5135   %}
5136   ins_pipe(ialu_reg_mem);
5137 %}
5138 
5139 // Load Integer into Long Register
5140 instruct loadI2L(rRegL dst, memory mem)
5141 %{
5142   match(Set dst (ConvI2L (LoadI mem)));
5143 
5144   ins_cost(125);
5145   format %{ "movslq  $dst, $mem\t# int -> long" %}
5146 
5147   ins_encode %{
5148     __ movslq($dst$$Register, $mem$$Address);
5149   %}
5150 
5151   ins_pipe(ialu_reg_mem);
5152 %}
5153 
5154 // Load Integer with mask 0xFF into Long Register
5155 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
5156   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5157 
5158   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
5159   ins_encode %{
5160     __ movzbq($dst$$Register, $mem$$Address);
5161   %}
5162   ins_pipe(ialu_reg_mem);
5163 %}
5164 
5165 // Load Integer with mask 0xFFFF into Long Register
5166 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
5167   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5168 
5169   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
5170   ins_encode %{
5171     __ movzwq($dst$$Register, $mem$$Address);
5172   %}
5173   ins_pipe(ialu_reg_mem);
5174 %}
5175 
5176 // Load Integer with a 31-bit mask into Long Register
5177 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
5178   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5179   effect(KILL cr);
5180 
5181   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
5182             "andl    $dst, $mask" %}
5183   ins_encode %{
5184     Register Rdst = $dst$$Register;
5185     __ movl(Rdst, $mem$$Address);
5186     __ andl(Rdst, $mask$$constant);
5187   %}
5188   ins_pipe(ialu_reg_mem);
5189 %}
5190 
5191 // Load Unsigned Integer into Long Register
5192 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
5193 %{
5194   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5195 
5196   ins_cost(125);
5197   format %{ "movl    $dst, $mem\t# uint -> long" %}
5198 
5199   ins_encode %{
5200     __ movl($dst$$Register, $mem$$Address);
5201   %}
5202 
5203   ins_pipe(ialu_reg_mem);
5204 %}
5205 
5206 // Load Long
5207 instruct loadL(rRegL dst, memory mem)
5208 %{
5209   match(Set dst (LoadL mem));
5210 
5211   ins_cost(125);
5212   format %{ "movq    $dst, $mem\t# long" %}
5213 
5214   ins_encode %{
5215     __ movq($dst$$Register, $mem$$Address);
5216   %}
5217 
5218   ins_pipe(ialu_reg_mem); // XXX
5219 %}
5220 
5221 // Load Range
5222 instruct loadRange(rRegI dst, memory mem)
5223 %{
5224   match(Set dst (LoadRange mem));
5225 
5226   ins_cost(125); // XXX
5227   format %{ "movl    $dst, $mem\t# range" %}
5228   opcode(0x8B);
5229   ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
5230   ins_pipe(ialu_reg_mem);
5231 %}
5232 
5233 // Load Pointer
5234 instruct loadP(rRegP dst, memory mem)
5235 %{
5236   match(Set dst (LoadP mem));
5237 
5238   ins_cost(125); // XXX
5239   format %{ "movq    $dst, $mem\t# ptr" %}
5240   opcode(0x8B);
5241   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5242   ins_pipe(ialu_reg_mem); // XXX
5243 %}
5244 
5245 // Load Compressed Pointer
5246 instruct loadN(rRegN dst, memory mem)
5247 %{
5248    match(Set dst (LoadN mem));
5249 
5250    ins_cost(125); // XXX
5251    format %{ "movl    $dst, $mem\t# compressed ptr" %}
5252    ins_encode %{
5253      __ movl($dst$$Register, $mem$$Address);
5254    %}
5255    ins_pipe(ialu_reg_mem); // XXX
5256 %}
5257 
5258 
5259 // Load Klass Pointer
5260 instruct loadKlass(rRegP dst, memory mem)
5261 %{
5262   match(Set dst (LoadKlass mem));
5263 
5264   ins_cost(125); // XXX
5265   format %{ "movq    $dst, $mem\t# class" %}
5266   opcode(0x8B);
5267   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5268   ins_pipe(ialu_reg_mem); // XXX
5269 %}
5270 
5271 // Load narrow Klass Pointer
5272 instruct loadNKlass(rRegN dst, memory mem)
5273 %{
5274   match(Set dst (LoadNKlass mem));
5275 
5276   ins_cost(125); // XXX
5277   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
5278   ins_encode %{
5279     __ movl($dst$$Register, $mem$$Address);
5280   %}
5281   ins_pipe(ialu_reg_mem); // XXX
5282 %}
5283 
5284 // Load Float
5285 instruct loadF(regF dst, memory mem)
5286 %{
5287   match(Set dst (LoadF mem));
5288 
5289   ins_cost(145); // XXX
5290   format %{ "movss   $dst, $mem\t# float" %}
5291   ins_encode %{
5292     __ movflt($dst$$XMMRegister, $mem$$Address);
5293   %}
5294   ins_pipe(pipe_slow); // XXX
5295 %}
5296 
5297 // Load Double
5298 instruct loadD_partial(regD dst, memory mem)
5299 %{
5300   predicate(!UseXmmLoadAndClearUpper);
5301   match(Set dst (LoadD mem));
5302 
5303   ins_cost(145); // XXX
5304   format %{ "movlpd  $dst, $mem\t# double" %}
5305   ins_encode %{
5306     __ movdbl($dst$$XMMRegister, $mem$$Address);
5307   %}
5308   ins_pipe(pipe_slow); // XXX
5309 %}
5310 
5311 instruct loadD(regD dst, memory mem)
5312 %{
5313   predicate(UseXmmLoadAndClearUpper);
5314   match(Set dst (LoadD mem));
5315 
5316   ins_cost(145); // XXX
5317   format %{ "movsd   $dst, $mem\t# double" %}
5318   ins_encode %{
5319     __ movdbl($dst$$XMMRegister, $mem$$Address);
5320   %}
5321   ins_pipe(pipe_slow); // XXX
5322 %}
5323 
5324 // Load Effective Address
5325 instruct leaP8(rRegP dst, indOffset8 mem)
5326 %{
5327   match(Set dst mem);
5328 
5329   ins_cost(110); // XXX
5330   format %{ "leaq    $dst, $mem\t# ptr 8" %}
5331   opcode(0x8D);
5332   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5333   ins_pipe(ialu_reg_reg_fat);
5334 %}
5335 
5336 instruct leaP32(rRegP dst, indOffset32 mem)
5337 %{
5338   match(Set dst mem);
5339 
5340   ins_cost(110);
5341   format %{ "leaq    $dst, $mem\t# ptr 32" %}
5342   opcode(0x8D);
5343   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5344   ins_pipe(ialu_reg_reg_fat);
5345 %}
5346 
5347 // instruct leaPIdx(rRegP dst, indIndex mem)
5348 // %{
5349 //   match(Set dst mem);
5350 
5351 //   ins_cost(110);
5352 //   format %{ "leaq    $dst, $mem\t# ptr idx" %}
5353 //   opcode(0x8D);
5354 //   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5355 //   ins_pipe(ialu_reg_reg_fat);
5356 // %}
5357 
5358 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
5359 %{
5360   match(Set dst mem);
5361 
5362   ins_cost(110);
5363   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
5364   opcode(0x8D);
5365   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5366   ins_pipe(ialu_reg_reg_fat);
5367 %}
5368 
5369 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
5370 %{
5371   match(Set dst mem);
5372 
5373   ins_cost(110);
5374   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5375   opcode(0x8D);
5376   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5377   ins_pipe(ialu_reg_reg_fat);
5378 %}
5379 
5380 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
5381 %{
5382   match(Set dst mem);
5383 
5384   ins_cost(110);
5385   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
5386   opcode(0x8D);
5387   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5388   ins_pipe(ialu_reg_reg_fat);
5389 %}
5390 
5391 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
5392 %{
5393   match(Set dst mem);
5394 
5395   ins_cost(110);
5396   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
5397   opcode(0x8D);
5398   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5399   ins_pipe(ialu_reg_reg_fat);
5400 %}
5401 
5402 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
5403 %{
5404   match(Set dst mem);
5405 
5406   ins_cost(110);
5407   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
5408   opcode(0x8D);
5409   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5410   ins_pipe(ialu_reg_reg_fat);
5411 %}
5412 
5413 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
5414 %{
5415   match(Set dst mem);
5416 
5417   ins_cost(110);
5418   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
5419   opcode(0x8D);
5420   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5421   ins_pipe(ialu_reg_reg_fat);
5422 %}
5423 
5424 // Load Effective Address which uses Narrow (32-bits) oop
5425 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
5426 %{
5427   predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
5428   match(Set dst mem);
5429 
5430   ins_cost(110);
5431   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
5432   opcode(0x8D);
5433   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5434   ins_pipe(ialu_reg_reg_fat);
5435 %}
5436 
5437 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
5438 %{
5439   predicate(Universe::narrow_oop_shift() == 0);
5440   match(Set dst mem);
5441 
5442   ins_cost(110); // XXX
5443   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
5444   opcode(0x8D);
5445   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5446   ins_pipe(ialu_reg_reg_fat);
5447 %}
5448 
5449 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
5450 %{
5451   predicate(Universe::narrow_oop_shift() == 0);
5452   match(Set dst mem);
5453 
5454   ins_cost(110);
5455   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
5456   opcode(0x8D);
5457   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5458   ins_pipe(ialu_reg_reg_fat);
5459 %}
5460 
5461 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
5462 %{
5463   predicate(Universe::narrow_oop_shift() == 0);
5464   match(Set dst mem);
5465 
5466   ins_cost(110);
5467   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
5468   opcode(0x8D);
5469   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5470   ins_pipe(ialu_reg_reg_fat);
5471 %}
5472 
5473 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
5474 %{
5475   predicate(Universe::narrow_oop_shift() == 0);
5476   match(Set dst mem);
5477 
5478   ins_cost(110);
5479   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
5480   opcode(0x8D);
5481   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5482   ins_pipe(ialu_reg_reg_fat);
5483 %}
5484 
5485 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
5486 %{
5487   predicate(Universe::narrow_oop_shift() == 0);
5488   match(Set dst mem);
5489 
5490   ins_cost(110);
5491   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
5492   opcode(0x8D);
5493   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5494   ins_pipe(ialu_reg_reg_fat);
5495 %}
5496 
5497 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
5498 %{
5499   predicate(Universe::narrow_oop_shift() == 0);
5500   match(Set dst mem);
5501 
5502   ins_cost(110);
5503   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
5504   opcode(0x8D);
5505   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5506   ins_pipe(ialu_reg_reg_fat);
5507 %}
5508 
5509 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
5510 %{
5511   predicate(Universe::narrow_oop_shift() == 0);
5512   match(Set dst mem);
5513 
5514   ins_cost(110);
5515   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
5516   opcode(0x8D);
5517   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
5518   ins_pipe(ialu_reg_reg_fat);
5519 %}
5520 
5521 instruct loadConI(rRegI dst, immI src)
5522 %{
5523   match(Set dst src);
5524 
5525   format %{ "movl    $dst, $src\t# int" %}
5526   ins_encode(load_immI(dst, src));
5527   ins_pipe(ialu_reg_fat); // XXX
5528 %}
5529 
5530 instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
5531 %{
5532   match(Set dst src);
5533   effect(KILL cr);
5534 
5535   ins_cost(50);
5536   format %{ "xorl    $dst, $dst\t# int" %}
5537   opcode(0x33); /* + rd */
5538   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5539   ins_pipe(ialu_reg);
5540 %}
5541 
5542 instruct loadConL(rRegL dst, immL src)
5543 %{
5544   match(Set dst src);
5545 
5546   ins_cost(150);
5547   format %{ "movq    $dst, $src\t# long" %}
5548   ins_encode(load_immL(dst, src));
5549   ins_pipe(ialu_reg);
5550 %}
5551 
5552 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
5553 %{
5554   match(Set dst src);
5555   effect(KILL cr);
5556 
5557   ins_cost(50);
5558   format %{ "xorl    $dst, $dst\t# long" %}
5559   opcode(0x33); /* + rd */
5560   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5561   ins_pipe(ialu_reg); // XXX
5562 %}
5563 
5564 instruct loadConUL32(rRegL dst, immUL32 src)
5565 %{
5566   match(Set dst src);
5567 
5568   ins_cost(60);
5569   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
5570   ins_encode(load_immUL32(dst, src));
5571   ins_pipe(ialu_reg);
5572 %}
5573 
5574 instruct loadConL32(rRegL dst, immL32 src)
5575 %{
5576   match(Set dst src);
5577 
5578   ins_cost(70);
5579   format %{ "movq    $dst, $src\t# long (32-bit)" %}
5580   ins_encode(load_immL32(dst, src));
5581   ins_pipe(ialu_reg);
5582 %}
5583 
5584 instruct loadConP(rRegP dst, immP con) %{
5585   match(Set dst con);
5586 
5587   format %{ "movq    $dst, $con\t# ptr" %}
5588   ins_encode(load_immP(dst, con));
5589   ins_pipe(ialu_reg_fat); // XXX
5590 %}
5591 
5592 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
5593 %{
5594   match(Set dst src);
5595   effect(KILL cr);
5596 
5597   ins_cost(50);
5598   format %{ "xorl    $dst, $dst\t# ptr" %}
5599   opcode(0x33); /* + rd */
5600   ins_encode(REX_reg_reg(dst, dst), OpcP, reg_reg(dst, dst));
5601   ins_pipe(ialu_reg);
5602 %}
5603 
5604 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
5605 %{
5606   match(Set dst src);
5607   effect(KILL cr);
5608 
5609   ins_cost(60);
5610   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
5611   ins_encode(load_immP31(dst, src));
5612   ins_pipe(ialu_reg);
5613 %}
5614 
5615 instruct loadConF(regF dst, immF con) %{
5616   match(Set dst con);
5617   ins_cost(125);
5618   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
5619   ins_encode %{
5620     __ movflt($dst$$XMMRegister, $constantaddress($con));
5621   %}
5622   ins_pipe(pipe_slow);
5623 %}
5624 
5625 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
5626   match(Set dst src);
5627   effect(KILL cr);
5628   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
5629   ins_encode %{
5630     __ xorq($dst$$Register, $dst$$Register);
5631   %}
5632   ins_pipe(ialu_reg);
5633 %}
5634 
5635 instruct loadConN(rRegN dst, immN src) %{
5636   match(Set dst src);
5637 
5638   ins_cost(125);
5639   format %{ "movl    $dst, $src\t# compressed ptr" %}
5640   ins_encode %{
5641     address con = (address)$src$$constant;
5642     if (con == NULL) {
5643       ShouldNotReachHere();
5644     } else {
5645       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
5646     }
5647   %}
5648   ins_pipe(ialu_reg_fat); // XXX
5649 %}
5650 
5651 instruct loadConNKlass(rRegN dst, immNKlass src) %{
5652   match(Set dst src);
5653 
5654   ins_cost(125);
5655   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
5656   ins_encode %{
5657     address con = (address)$src$$constant;
5658     if (con == NULL) {
5659       ShouldNotReachHere();
5660     } else {
5661       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
5662     }
5663   %}
5664   ins_pipe(ialu_reg_fat); // XXX
5665 %}
5666 
5667 instruct loadConF0(regF dst, immF0 src)
5668 %{
5669   match(Set dst src);
5670   ins_cost(100);
5671 
5672   format %{ "xorps   $dst, $dst\t# float 0.0" %}
5673   ins_encode %{
5674     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5675   %}
5676   ins_pipe(pipe_slow);
5677 %}
5678 
5679 // Use the same format since predicate() can not be used here.
5680 instruct loadConD(regD dst, immD con) %{
5681   match(Set dst con);
5682   ins_cost(125);
5683   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
5684   ins_encode %{
5685     __ movdbl($dst$$XMMRegister, $constantaddress($con));
5686   %}
5687   ins_pipe(pipe_slow);
5688 %}
5689 
5690 instruct loadConD0(regD dst, immD0 src)
5691 %{
5692   match(Set dst src);
5693   ins_cost(100);
5694 
5695   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
5696   ins_encode %{
5697     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
5698   %}
5699   ins_pipe(pipe_slow);
5700 %}
5701 
5702 instruct loadSSI(rRegI dst, stackSlotI src)
5703 %{
5704   match(Set dst src);
5705 
5706   ins_cost(125);
5707   format %{ "movl    $dst, $src\t# int stk" %}
5708   opcode(0x8B);
5709   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
5710   ins_pipe(ialu_reg_mem);
5711 %}
5712 
5713 instruct loadSSL(rRegL dst, stackSlotL src)
5714 %{
5715   match(Set dst src);
5716 
5717   ins_cost(125);
5718   format %{ "movq    $dst, $src\t# long stk" %}
5719   opcode(0x8B);
5720   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5721   ins_pipe(ialu_reg_mem);
5722 %}
5723 
5724 instruct loadSSP(rRegP dst, stackSlotP src)
5725 %{
5726   match(Set dst src);
5727 
5728   ins_cost(125);
5729   format %{ "movq    $dst, $src\t# ptr stk" %}
5730   opcode(0x8B);
5731   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
5732   ins_pipe(ialu_reg_mem);
5733 %}
5734 
5735 instruct loadSSF(regF dst, stackSlotF src)
5736 %{
5737   match(Set dst src);
5738 
5739   ins_cost(125);
5740   format %{ "movss   $dst, $src\t# float stk" %}
5741   ins_encode %{
5742     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
5743   %}
5744   ins_pipe(pipe_slow); // XXX
5745 %}
5746 
5747 // Use the same format since predicate() can not be used here.
5748 instruct loadSSD(regD dst, stackSlotD src)
5749 %{
5750   match(Set dst src);
5751 
5752   ins_cost(125);
5753   format %{ "movsd   $dst, $src\t# double stk" %}
5754   ins_encode  %{
5755     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
5756   %}
5757   ins_pipe(pipe_slow); // XXX
5758 %}
5759 
5760 // Prefetch instructions for allocation.
5761 // Must be safe to execute with invalid address (cannot fault).
5762 
5763 instruct prefetchAlloc( memory mem ) %{
5764   predicate(AllocatePrefetchInstr==3);
5765   match(PrefetchAllocation mem);
5766   ins_cost(125);
5767 
5768   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
5769   ins_encode %{
5770     __ prefetchw($mem$$Address);
5771   %}
5772   ins_pipe(ialu_mem);
5773 %}
5774 
5775 instruct prefetchAllocNTA( memory mem ) %{
5776   predicate(AllocatePrefetchInstr==0);
5777   match(PrefetchAllocation mem);
5778   ins_cost(125);
5779 
5780   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
5781   ins_encode %{
5782     __ prefetchnta($mem$$Address);
5783   %}
5784   ins_pipe(ialu_mem);
5785 %}
5786 
5787 instruct prefetchAllocT0( memory mem ) %{
5788   predicate(AllocatePrefetchInstr==1);
5789   match(PrefetchAllocation mem);
5790   ins_cost(125);
5791 
5792   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
5793   ins_encode %{
5794     __ prefetcht0($mem$$Address);
5795   %}
5796   ins_pipe(ialu_mem);
5797 %}
5798 
5799 instruct prefetchAllocT2( memory mem ) %{
5800   predicate(AllocatePrefetchInstr==2);
5801   match(PrefetchAllocation mem);
5802   ins_cost(125);
5803 
5804   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
5805   ins_encode %{
5806     __ prefetcht2($mem$$Address);
5807   %}
5808   ins_pipe(ialu_mem);
5809 %}
5810 
5811 //----------Store Instructions-------------------------------------------------
5812 
5813 // Store Byte
5814 instruct storeB(memory mem, rRegI src)
5815 %{
5816   match(Set mem (StoreB mem src));
5817 
5818   ins_cost(125); // XXX
5819   format %{ "movb    $mem, $src\t# byte" %}
5820   opcode(0x88);
5821   ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem));
5822   ins_pipe(ialu_mem_reg);
5823 %}
5824 
5825 // Store Char/Short
5826 instruct storeC(memory mem, rRegI src)
5827 %{
5828   match(Set mem (StoreC mem src));
5829 
5830   ins_cost(125); // XXX
5831   format %{ "movw    $mem, $src\t# char/short" %}
5832   opcode(0x89);
5833   ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5834   ins_pipe(ialu_mem_reg);
5835 %}
5836 
5837 // Store Integer
5838 instruct storeI(memory mem, rRegI src)
5839 %{
5840   match(Set mem (StoreI mem src));
5841 
5842   ins_cost(125); // XXX
5843   format %{ "movl    $mem, $src\t# int" %}
5844   opcode(0x89);
5845   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
5846   ins_pipe(ialu_mem_reg);
5847 %}
5848 
5849 // Store Long
5850 instruct storeL(memory mem, rRegL src)
5851 %{
5852   match(Set mem (StoreL mem src));
5853 
5854   ins_cost(125); // XXX
5855   format %{ "movq    $mem, $src\t# long" %}
5856   opcode(0x89);
5857   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5858   ins_pipe(ialu_mem_reg); // XXX
5859 %}
5860 
5861 // Store Pointer
5862 instruct storeP(memory mem, any_RegP src)
5863 %{
5864   match(Set mem (StoreP mem src));
5865 
5866   ins_cost(125); // XXX
5867   format %{ "movq    $mem, $src\t# ptr" %}
5868   opcode(0x89);
5869   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
5870   ins_pipe(ialu_mem_reg);
5871 %}
5872 
5873 instruct storeImmP0(memory mem, immP0 zero)
5874 %{
5875   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5876   match(Set mem (StoreP mem zero));
5877 
5878   ins_cost(125); // XXX
5879   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
5880   ins_encode %{
5881     __ movq($mem$$Address, r12);
5882   %}
5883   ins_pipe(ialu_mem_reg);
5884 %}
5885 
5886 // Store NULL Pointer, mark word, or other simple pointer constant.
5887 instruct storeImmP(memory mem, immP31 src)
5888 %{
5889   match(Set mem (StoreP mem src));
5890 
5891   ins_cost(150); // XXX
5892   format %{ "movq    $mem, $src\t# ptr" %}
5893   opcode(0xC7); /* C7 /0 */
5894   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5895   ins_pipe(ialu_mem_imm);
5896 %}
5897 
5898 // Store Compressed Pointer
5899 instruct storeN(memory mem, rRegN src)
5900 %{
5901   match(Set mem (StoreN mem src));
5902 
5903   ins_cost(125); // XXX
5904   format %{ "movl    $mem, $src\t# compressed ptr" %}
5905   ins_encode %{
5906     __ movl($mem$$Address, $src$$Register);
5907   %}
5908   ins_pipe(ialu_mem_reg);
5909 %}
5910 
5911 instruct storeNKlass(memory mem, rRegN src)
5912 %{
5913   match(Set mem (StoreNKlass mem src));
5914 
5915   ins_cost(125); // XXX
5916   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5917   ins_encode %{
5918     __ movl($mem$$Address, $src$$Register);
5919   %}
5920   ins_pipe(ialu_mem_reg);
5921 %}
5922 
5923 instruct storeImmN0(memory mem, immN0 zero)
5924 %{
5925   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
5926   match(Set mem (StoreN mem zero));
5927 
5928   ins_cost(125); // XXX
5929   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
5930   ins_encode %{
5931     __ movl($mem$$Address, r12);
5932   %}
5933   ins_pipe(ialu_mem_reg);
5934 %}
5935 
5936 instruct storeImmN(memory mem, immN src)
5937 %{
5938   match(Set mem (StoreN mem src));
5939 
5940   ins_cost(150); // XXX
5941   format %{ "movl    $mem, $src\t# compressed ptr" %}
5942   ins_encode %{
5943     address con = (address)$src$$constant;
5944     if (con == NULL) {
5945       __ movl($mem$$Address, (int32_t)0);
5946     } else {
5947       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
5948     }
5949   %}
5950   ins_pipe(ialu_mem_imm);
5951 %}
5952 
5953 instruct storeImmNKlass(memory mem, immNKlass src)
5954 %{
5955   match(Set mem (StoreNKlass mem src));
5956 
5957   ins_cost(150); // XXX
5958   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
5959   ins_encode %{
5960     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
5961   %}
5962   ins_pipe(ialu_mem_imm);
5963 %}
5964 
5965 // Store Integer Immediate
5966 instruct storeImmI0(memory mem, immI0 zero)
5967 %{
5968   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5969   match(Set mem (StoreI mem zero));
5970 
5971   ins_cost(125); // XXX
5972   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
5973   ins_encode %{
5974     __ movl($mem$$Address, r12);
5975   %}
5976   ins_pipe(ialu_mem_reg);
5977 %}
5978 
5979 instruct storeImmI(memory mem, immI src)
5980 %{
5981   match(Set mem (StoreI mem src));
5982 
5983   ins_cost(150);
5984   format %{ "movl    $mem, $src\t# int" %}
5985   opcode(0xC7); /* C7 /0 */
5986   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
5987   ins_pipe(ialu_mem_imm);
5988 %}
5989 
5990 // Store Long Immediate
5991 instruct storeImmL0(memory mem, immL0 zero)
5992 %{
5993   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
5994   match(Set mem (StoreL mem zero));
5995 
5996   ins_cost(125); // XXX
5997   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
5998   ins_encode %{
5999     __ movq($mem$$Address, r12);
6000   %}
6001   ins_pipe(ialu_mem_reg);
6002 %}
6003 
6004 instruct storeImmL(memory mem, immL32 src)
6005 %{
6006   match(Set mem (StoreL mem src));
6007 
6008   ins_cost(150);
6009   format %{ "movq    $mem, $src\t# long" %}
6010   opcode(0xC7); /* C7 /0 */
6011   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
6012   ins_pipe(ialu_mem_imm);
6013 %}
6014 
6015 // Store Short/Char Immediate
6016 instruct storeImmC0(memory mem, immI0 zero)
6017 %{
6018   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6019   match(Set mem (StoreC mem zero));
6020 
6021   ins_cost(125); // XXX
6022   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
6023   ins_encode %{
6024     __ movw($mem$$Address, r12);
6025   %}
6026   ins_pipe(ialu_mem_reg);
6027 %}
6028 
6029 instruct storeImmI16(memory mem, immI16 src)
6030 %{
6031   predicate(UseStoreImmI16);
6032   match(Set mem (StoreC mem src));
6033 
6034   ins_cost(150);
6035   format %{ "movw    $mem, $src\t# short/char" %}
6036   opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6037   ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src));
6038   ins_pipe(ialu_mem_imm);
6039 %}
6040 
6041 // Store Byte Immediate
6042 instruct storeImmB0(memory mem, immI0 zero)
6043 %{
6044   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6045   match(Set mem (StoreB mem zero));
6046 
6047   ins_cost(125); // XXX
6048   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
6049   ins_encode %{
6050     __ movb($mem$$Address, r12);
6051   %}
6052   ins_pipe(ialu_mem_reg);
6053 %}
6054 
6055 instruct storeImmB(memory mem, immI8 src)
6056 %{
6057   match(Set mem (StoreB mem src));
6058 
6059   ins_cost(150); // XXX
6060   format %{ "movb    $mem, $src\t# byte" %}
6061   opcode(0xC6); /* C6 /0 */
6062   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6063   ins_pipe(ialu_mem_imm);
6064 %}
6065 
6066 // Store CMS card-mark Immediate
6067 instruct storeImmCM0_reg(memory mem, immI0 zero)
6068 %{
6069   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6070   match(Set mem (StoreCM mem zero));
6071 
6072   ins_cost(125); // XXX
6073   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
6074   ins_encode %{
6075     __ movb($mem$$Address, r12);
6076   %}
6077   ins_pipe(ialu_mem_reg);
6078 %}
6079 
6080 instruct storeImmCM0(memory mem, immI0 src)
6081 %{
6082   match(Set mem (StoreCM mem src));
6083 
6084   ins_cost(150); // XXX
6085   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
6086   opcode(0xC6); /* C6 /0 */
6087   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6088   ins_pipe(ialu_mem_imm);
6089 %}
6090 
6091 // Store Float
6092 instruct storeF(memory mem, regF src)
6093 %{
6094   match(Set mem (StoreF mem src));
6095 
6096   ins_cost(95); // XXX
6097   format %{ "movss   $mem, $src\t# float" %}
6098   ins_encode %{
6099     __ movflt($mem$$Address, $src$$XMMRegister);
6100   %}
6101   ins_pipe(pipe_slow); // XXX
6102 %}
6103 
6104 // Store immediate Float value (it is faster than store from XMM register)
6105 instruct storeF0(memory mem, immF0 zero)
6106 %{
6107   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6108   match(Set mem (StoreF mem zero));
6109 
6110   ins_cost(25); // XXX
6111   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
6112   ins_encode %{
6113     __ movl($mem$$Address, r12);
6114   %}
6115   ins_pipe(ialu_mem_reg);
6116 %}
6117 
6118 instruct storeF_imm(memory mem, immF src)
6119 %{
6120   match(Set mem (StoreF mem src));
6121 
6122   ins_cost(50);
6123   format %{ "movl    $mem, $src\t# float" %}
6124   opcode(0xC7); /* C7 /0 */
6125   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6126   ins_pipe(ialu_mem_imm);
6127 %}
6128 
6129 // Store Double
6130 instruct storeD(memory mem, regD src)
6131 %{
6132   match(Set mem (StoreD mem src));
6133 
6134   ins_cost(95); // XXX
6135   format %{ "movsd   $mem, $src\t# double" %}
6136   ins_encode %{
6137     __ movdbl($mem$$Address, $src$$XMMRegister);
6138   %}
6139   ins_pipe(pipe_slow); // XXX
6140 %}
6141 
6142 // Store immediate double 0.0 (it is faster than store from XMM register)
6143 instruct storeD0_imm(memory mem, immD0 src)
6144 %{
6145   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
6146   match(Set mem (StoreD mem src));
6147 
6148   ins_cost(50);
6149   format %{ "movq    $mem, $src\t# double 0." %}
6150   opcode(0xC7); /* C7 /0 */
6151   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src));
6152   ins_pipe(ialu_mem_imm);
6153 %}
6154 
6155 instruct storeD0(memory mem, immD0 zero)
6156 %{
6157   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
6158   match(Set mem (StoreD mem zero));
6159 
6160   ins_cost(25); // XXX
6161   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
6162   ins_encode %{
6163     __ movq($mem$$Address, r12);
6164   %}
6165   ins_pipe(ialu_mem_reg);
6166 %}
6167 
6168 instruct storeSSI(stackSlotI dst, rRegI src)
6169 %{
6170   match(Set dst src);
6171 
6172   ins_cost(100);
6173   format %{ "movl    $dst, $src\t# int stk" %}
6174   opcode(0x89);
6175   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
6176   ins_pipe( ialu_mem_reg );
6177 %}
6178 
6179 instruct storeSSL(stackSlotL dst, rRegL src)
6180 %{
6181   match(Set dst src);
6182 
6183   ins_cost(100);
6184   format %{ "movq    $dst, $src\t# long stk" %}
6185   opcode(0x89);
6186   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6187   ins_pipe(ialu_mem_reg);
6188 %}
6189 
6190 instruct storeSSP(stackSlotP dst, rRegP src)
6191 %{
6192   match(Set dst src);
6193 
6194   ins_cost(100);
6195   format %{ "movq    $dst, $src\t# ptr stk" %}
6196   opcode(0x89);
6197   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
6198   ins_pipe(ialu_mem_reg);
6199 %}
6200 
6201 instruct storeSSF(stackSlotF dst, regF src)
6202 %{
6203   match(Set dst src);
6204 
6205   ins_cost(95); // XXX
6206   format %{ "movss   $dst, $src\t# float stk" %}
6207   ins_encode %{
6208     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
6209   %}
6210   ins_pipe(pipe_slow); // XXX
6211 %}
6212 
6213 instruct storeSSD(stackSlotD dst, regD src)
6214 %{
6215   match(Set dst src);
6216 
6217   ins_cost(95); // XXX
6218   format %{ "movsd   $dst, $src\t# double stk" %}
6219   ins_encode %{
6220     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
6221   %}
6222   ins_pipe(pipe_slow); // XXX
6223 %}
6224 
6225 //----------BSWAP Instructions-------------------------------------------------
6226 instruct bytes_reverse_int(rRegI dst) %{
6227   match(Set dst (ReverseBytesI dst));
6228 
6229   format %{ "bswapl  $dst" %}
6230   opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
6231   ins_encode( REX_reg(dst), OpcP, opc2_reg(dst) );
6232   ins_pipe( ialu_reg );
6233 %}
6234 
6235 instruct bytes_reverse_long(rRegL dst) %{
6236   match(Set dst (ReverseBytesL dst));
6237 
6238   format %{ "bswapq  $dst" %}
6239   opcode(0x0F, 0xC8); /* Opcode 0F /C8 */
6240   ins_encode( REX_reg_wide(dst), OpcP, opc2_reg(dst) );
6241   ins_pipe( ialu_reg);
6242 %}
6243 
6244 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
6245   match(Set dst (ReverseBytesUS dst));
6246   effect(KILL cr);
6247 
6248   format %{ "bswapl  $dst\n\t"
6249             "shrl    $dst,16\n\t" %}
6250   ins_encode %{
6251     __ bswapl($dst$$Register);
6252     __ shrl($dst$$Register, 16);
6253   %}
6254   ins_pipe( ialu_reg );
6255 %}
6256 
6257 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
6258   match(Set dst (ReverseBytesS dst));
6259   effect(KILL cr);
6260 
6261   format %{ "bswapl  $dst\n\t"
6262             "sar     $dst,16\n\t" %}
6263   ins_encode %{
6264     __ bswapl($dst$$Register);
6265     __ sarl($dst$$Register, 16);
6266   %}
6267   ins_pipe( ialu_reg );
6268 %}
6269 
6270 //---------- Zeros Count Instructions ------------------------------------------
6271 
6272 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6273   predicate(UseCountLeadingZerosInstruction);
6274   match(Set dst (CountLeadingZerosI src));
6275   effect(KILL cr);
6276 
6277   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
6278   ins_encode %{
6279     __ lzcntl($dst$$Register, $src$$Register);
6280   %}
6281   ins_pipe(ialu_reg);
6282 %}
6283 
6284 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
6285   predicate(!UseCountLeadingZerosInstruction);
6286   match(Set dst (CountLeadingZerosI src));
6287   effect(KILL cr);
6288 
6289   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
6290             "jnz     skip\n\t"
6291             "movl    $dst, -1\n"
6292       "skip:\n\t"
6293             "negl    $dst\n\t"
6294             "addl    $dst, 31" %}
6295   ins_encode %{
6296     Register Rdst = $dst$$Register;
6297     Register Rsrc = $src$$Register;
6298     Label skip;
6299     __ bsrl(Rdst, Rsrc);
6300     __ jccb(Assembler::notZero, skip);
6301     __ movl(Rdst, -1);
6302     __ bind(skip);
6303     __ negl(Rdst);
6304     __ addl(Rdst, BitsPerInt - 1);
6305   %}
6306   ins_pipe(ialu_reg);
6307 %}
6308 
6309 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6310   predicate(UseCountLeadingZerosInstruction);
6311   match(Set dst (CountLeadingZerosL src));
6312   effect(KILL cr);
6313 
6314   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
6315   ins_encode %{
6316     __ lzcntq($dst$$Register, $src$$Register);
6317   %}
6318   ins_pipe(ialu_reg);
6319 %}
6320 
6321 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
6322   predicate(!UseCountLeadingZerosInstruction);
6323   match(Set dst (CountLeadingZerosL src));
6324   effect(KILL cr);
6325 
6326   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
6327             "jnz     skip\n\t"
6328             "movl    $dst, -1\n"
6329       "skip:\n\t"
6330             "negl    $dst\n\t"
6331             "addl    $dst, 63" %}
6332   ins_encode %{
6333     Register Rdst = $dst$$Register;
6334     Register Rsrc = $src$$Register;
6335     Label skip;
6336     __ bsrq(Rdst, Rsrc);
6337     __ jccb(Assembler::notZero, skip);
6338     __ movl(Rdst, -1);
6339     __ bind(skip);
6340     __ negl(Rdst);
6341     __ addl(Rdst, BitsPerLong - 1);
6342   %}
6343   ins_pipe(ialu_reg);
6344 %}
6345 
6346 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
6347   predicate(UseCountTrailingZerosInstruction);
6348   match(Set dst (CountTrailingZerosI src));
6349   effect(KILL cr);
6350 
6351   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
6352   ins_encode %{
6353     __ tzcntl($dst$$Register, $src$$Register);
6354   %}
6355   ins_pipe(ialu_reg);
6356 %}
6357 
6358 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
6359   predicate(!UseCountTrailingZerosInstruction);
6360   match(Set dst (CountTrailingZerosI src));
6361   effect(KILL cr);
6362 
6363   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
6364             "jnz     done\n\t"
6365             "movl    $dst, 32\n"
6366       "done:" %}
6367   ins_encode %{
6368     Register Rdst = $dst$$Register;
6369     Label done;
6370     __ bsfl(Rdst, $src$$Register);
6371     __ jccb(Assembler::notZero, done);
6372     __ movl(Rdst, BitsPerInt);
6373     __ bind(done);
6374   %}
6375   ins_pipe(ialu_reg);
6376 %}
6377 
6378 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
6379   predicate(UseCountTrailingZerosInstruction);
6380   match(Set dst (CountTrailingZerosL src));
6381   effect(KILL cr);
6382 
6383   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
6384   ins_encode %{
6385     __ tzcntq($dst$$Register, $src$$Register);
6386   %}
6387   ins_pipe(ialu_reg);
6388 %}
6389 
6390 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
6391   predicate(!UseCountTrailingZerosInstruction);
6392   match(Set dst (CountTrailingZerosL src));
6393   effect(KILL cr);
6394 
6395   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
6396             "jnz     done\n\t"
6397             "movl    $dst, 64\n"
6398       "done:" %}
6399   ins_encode %{
6400     Register Rdst = $dst$$Register;
6401     Label done;
6402     __ bsfq(Rdst, $src$$Register);
6403     __ jccb(Assembler::notZero, done);
6404     __ movl(Rdst, BitsPerLong);
6405     __ bind(done);
6406   %}
6407   ins_pipe(ialu_reg);
6408 %}
6409 
6410 
6411 //---------- Population Count Instructions -------------------------------------
6412 
6413 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
6414   predicate(UsePopCountInstruction);
6415   match(Set dst (PopCountI src));
6416   effect(KILL cr);
6417 
6418   format %{ "popcnt  $dst, $src" %}
6419   ins_encode %{
6420     __ popcntl($dst$$Register, $src$$Register);
6421   %}
6422   ins_pipe(ialu_reg);
6423 %}
6424 
6425 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6426   predicate(UsePopCountInstruction);
6427   match(Set dst (PopCountI (LoadI mem)));
6428   effect(KILL cr);
6429 
6430   format %{ "popcnt  $dst, $mem" %}
6431   ins_encode %{
6432     __ popcntl($dst$$Register, $mem$$Address);
6433   %}
6434   ins_pipe(ialu_reg);
6435 %}
6436 
6437 // Note: Long.bitCount(long) returns an int.
6438 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
6439   predicate(UsePopCountInstruction);
6440   match(Set dst (PopCountL src));
6441   effect(KILL cr);
6442 
6443   format %{ "popcnt  $dst, $src" %}
6444   ins_encode %{
6445     __ popcntq($dst$$Register, $src$$Register);
6446   %}
6447   ins_pipe(ialu_reg);
6448 %}
6449 
6450 // Note: Long.bitCount(long) returns an int.
6451 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
6452   predicate(UsePopCountInstruction);
6453   match(Set dst (PopCountL (LoadL mem)));
6454   effect(KILL cr);
6455 
6456   format %{ "popcnt  $dst, $mem" %}
6457   ins_encode %{
6458     __ popcntq($dst$$Register, $mem$$Address);
6459   %}
6460   ins_pipe(ialu_reg);
6461 %}
6462 
6463 
6464 //----------MemBar Instructions-----------------------------------------------
6465 // Memory barrier flavors
6466 
6467 instruct membar_acquire()
6468 %{
6469   match(MemBarAcquire);
6470   match(LoadFence);
6471   ins_cost(0);
6472 
6473   size(0);
6474   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6475   ins_encode();
6476   ins_pipe(empty);
6477 %}
6478 
6479 instruct membar_acquire_lock()
6480 %{
6481   match(MemBarAcquireLock);
6482   ins_cost(0);
6483 
6484   size(0);
6485   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6486   ins_encode();
6487   ins_pipe(empty);
6488 %}
6489 
6490 instruct membar_release()
6491 %{
6492   match(MemBarRelease);
6493   match(StoreFence);
6494   ins_cost(0);
6495 
6496   size(0);
6497   format %{ "MEMBAR-release ! (empty encoding)" %}
6498   ins_encode();
6499   ins_pipe(empty);
6500 %}
6501 
6502 instruct membar_release_lock()
6503 %{
6504   match(MemBarReleaseLock);
6505   ins_cost(0);
6506 
6507   size(0);
6508   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6509   ins_encode();
6510   ins_pipe(empty);
6511 %}
6512 
6513 instruct membar_volatile(rFlagsReg cr) %{
6514   match(MemBarVolatile);
6515   effect(KILL cr);
6516   ins_cost(400);
6517 
6518   format %{
6519     $$template
6520     if (os::is_MP()) {
6521       $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
6522     } else {
6523       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6524     }
6525   %}
6526   ins_encode %{
6527     __ membar(Assembler::StoreLoad);
6528   %}
6529   ins_pipe(pipe_slow);
6530 %}
6531 
6532 instruct unnecessary_membar_volatile()
6533 %{
6534   match(MemBarVolatile);
6535   predicate(Matcher::post_store_load_barrier(n));
6536   ins_cost(0);
6537 
6538   size(0);
6539   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6540   ins_encode();
6541   ins_pipe(empty);
6542 %}
6543 
6544 instruct membar_storestore() %{
6545   match(MemBarStoreStore);
6546   ins_cost(0);
6547 
6548   size(0);
6549   format %{ "MEMBAR-storestore (empty encoding)" %}
6550   ins_encode( );
6551   ins_pipe(empty);
6552 %}
6553 
6554 //----------Move Instructions--------------------------------------------------
6555 
6556 instruct castX2P(rRegP dst, rRegL src)
6557 %{
6558   match(Set dst (CastX2P src));
6559 
6560   format %{ "movq    $dst, $src\t# long->ptr" %}
6561   ins_encode %{
6562     if ($dst$$reg != $src$$reg) {
6563       __ movptr($dst$$Register, $src$$Register);
6564     }
6565   %}
6566   ins_pipe(ialu_reg_reg); // XXX
6567 %}
6568 
6569 instruct castN2X(rRegL dst, rRegN src)
6570 %{
6571   match(Set dst (CastP2X src));
6572 
6573   format %{ "movq    $dst, $src\t# ptr -> long" %}
6574   ins_encode %{
6575     if ($dst$$reg != $src$$reg) {
6576       __ movptr($dst$$Register, $src$$Register);
6577     }
6578   %}
6579   ins_pipe(ialu_reg_reg); // XXX
6580 %}
6581 
6582 instruct castP2X(rRegL dst, rRegP src)
6583 %{
6584   match(Set dst (CastP2X src));
6585 
6586   format %{ "movq    $dst, $src\t# ptr -> long" %}
6587   ins_encode %{
6588     if ($dst$$reg != $src$$reg) {
6589       __ movptr($dst$$Register, $src$$Register);
6590     }
6591   %}
6592   ins_pipe(ialu_reg_reg); // XXX
6593 %}
6594 
6595 // Convert oop into int for vectors alignment masking
6596 instruct convP2I(rRegI dst, rRegP src)
6597 %{
6598   match(Set dst (ConvL2I (CastP2X src)));
6599 
6600   format %{ "movl    $dst, $src\t# ptr -> int" %}
6601   ins_encode %{
6602     __ movl($dst$$Register, $src$$Register);
6603   %}
6604   ins_pipe(ialu_reg_reg); // XXX
6605 %}
6606 
6607 // Convert compressed oop into int for vectors alignment masking
6608 // in case of 32bit oops (heap < 4Gb).
6609 instruct convN2I(rRegI dst, rRegN src)
6610 %{
6611   predicate(Universe::narrow_oop_shift() == 0);
6612   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6613 
6614   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
6615   ins_encode %{
6616     __ movl($dst$$Register, $src$$Register);
6617   %}
6618   ins_pipe(ialu_reg_reg); // XXX
6619 %}
6620 
6621 // Convert oop pointer into compressed form
6622 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
6623   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
6624   match(Set dst (EncodeP src));
6625   effect(KILL cr);
6626   format %{ "encode_heap_oop $dst,$src" %}
6627   ins_encode %{
6628     Register s = $src$$Register;
6629     Register d = $dst$$Register;
6630     if (s != d) {
6631       __ movq(d, s);
6632     }
6633     __ encode_heap_oop(d);
6634   %}
6635   ins_pipe(ialu_reg_long);
6636 %}
6637 
6638 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6639   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
6640   match(Set dst (EncodeP src));
6641   effect(KILL cr);
6642   format %{ "encode_heap_oop_not_null $dst,$src" %}
6643   ins_encode %{
6644     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
6645   %}
6646   ins_pipe(ialu_reg_long);
6647 %}
6648 
6649 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
6650   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
6651             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
6652   match(Set dst (DecodeN src));
6653   effect(KILL cr);
6654   format %{ "decode_heap_oop $dst,$src" %}
6655   ins_encode %{
6656     Register s = $src$$Register;
6657     Register d = $dst$$Register;
6658     if (s != d) {
6659       __ movq(d, s);
6660     }
6661     __ decode_heap_oop(d);
6662   %}
6663   ins_pipe(ialu_reg_long);
6664 %}
6665 
6666 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6667   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
6668             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
6669   match(Set dst (DecodeN src));
6670   effect(KILL cr);
6671   format %{ "decode_heap_oop_not_null $dst,$src" %}
6672   ins_encode %{
6673     Register s = $src$$Register;
6674     Register d = $dst$$Register;
6675     if (s != d) {
6676       __ decode_heap_oop_not_null(d, s);
6677     } else {
6678       __ decode_heap_oop_not_null(d);
6679     }
6680   %}
6681   ins_pipe(ialu_reg_long);
6682 %}
6683 
6684 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
6685   match(Set dst (EncodePKlass src));
6686   effect(KILL cr);
6687   format %{ "encode_klass_not_null $dst,$src" %}
6688   ins_encode %{
6689     __ encode_klass_not_null($dst$$Register, $src$$Register);
6690   %}
6691   ins_pipe(ialu_reg_long);
6692 %}
6693 
6694 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
6695   match(Set dst (DecodeNKlass src));
6696   effect(KILL cr);
6697   format %{ "decode_klass_not_null $dst,$src" %}
6698   ins_encode %{
6699     Register s = $src$$Register;
6700     Register d = $dst$$Register;
6701     if (s != d) {
6702       __ decode_klass_not_null(d, s);
6703     } else {
6704       __ decode_klass_not_null(d);
6705     }
6706   %}
6707   ins_pipe(ialu_reg_long);
6708 %}
6709 
6710 
6711 //----------Conditional Move---------------------------------------------------
6712 // Jump
6713 // dummy instruction for generating temp registers
6714 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
6715   match(Jump (LShiftL switch_val shift));
6716   ins_cost(350);
6717   predicate(false);
6718   effect(TEMP dest);
6719 
6720   format %{ "leaq    $dest, [$constantaddress]\n\t"
6721             "jmp     [$dest + $switch_val << $shift]\n\t" %}
6722   ins_encode %{
6723     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6724     // to do that and the compiler is using that register as one it can allocate.
6725     // So we build it all by hand.
6726     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
6727     // ArrayAddress dispatch(table, index);
6728     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
6729     __ lea($dest$$Register, $constantaddress);
6730     __ jmp(dispatch);
6731   %}
6732   ins_pipe(pipe_jmp);
6733 %}
6734 
6735 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
6736   match(Jump (AddL (LShiftL switch_val shift) offset));
6737   ins_cost(350);
6738   effect(TEMP dest);
6739 
6740   format %{ "leaq    $dest, [$constantaddress]\n\t"
6741             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
6742   ins_encode %{
6743     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6744     // to do that and the compiler is using that register as one it can allocate.
6745     // So we build it all by hand.
6746     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6747     // ArrayAddress dispatch(table, index);
6748     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
6749     __ lea($dest$$Register, $constantaddress);
6750     __ jmp(dispatch);
6751   %}
6752   ins_pipe(pipe_jmp);
6753 %}
6754 
6755 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
6756   match(Jump switch_val);
6757   ins_cost(350);
6758   effect(TEMP dest);
6759 
6760   format %{ "leaq    $dest, [$constantaddress]\n\t"
6761             "jmp     [$dest + $switch_val]\n\t" %}
6762   ins_encode %{
6763     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
6764     // to do that and the compiler is using that register as one it can allocate.
6765     // So we build it all by hand.
6766     // Address index(noreg, switch_reg, Address::times_1);
6767     // ArrayAddress dispatch(table, index);
6768     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
6769     __ lea($dest$$Register, $constantaddress);
6770     __ jmp(dispatch);
6771   %}
6772   ins_pipe(pipe_jmp);
6773 %}
6774 
6775 // Conditional move
6776 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
6777 %{
6778   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6779 
6780   ins_cost(200); // XXX
6781   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6782   opcode(0x0F, 0x40);
6783   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6784   ins_pipe(pipe_cmov_reg);
6785 %}
6786 
6787 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
6788   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6789 
6790   ins_cost(200); // XXX
6791   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6792   opcode(0x0F, 0x40);
6793   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6794   ins_pipe(pipe_cmov_reg);
6795 %}
6796 
6797 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
6798   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6799   ins_cost(200);
6800   expand %{
6801     cmovI_regU(cop, cr, dst, src);
6802   %}
6803 %}
6804 
6805 // Conditional move
6806 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
6807   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6808 
6809   ins_cost(250); // XXX
6810   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
6811   opcode(0x0F, 0x40);
6812   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6813   ins_pipe(pipe_cmov_mem);
6814 %}
6815 
6816 // Conditional move
6817 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
6818 %{
6819   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6820 
6821   ins_cost(250); // XXX
6822   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
6823   opcode(0x0F, 0x40);
6824   ins_encode(REX_reg_mem(dst, src), enc_cmov(cop), reg_mem(dst, src));
6825   ins_pipe(pipe_cmov_mem);
6826 %}
6827 
6828 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
6829   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6830   ins_cost(250);
6831   expand %{
6832     cmovI_memU(cop, cr, dst, src);
6833   %}
6834 %}
6835 
6836 // Conditional move
6837 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
6838 %{
6839   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6840 
6841   ins_cost(200); // XXX
6842   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
6843   opcode(0x0F, 0x40);
6844   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6845   ins_pipe(pipe_cmov_reg);
6846 %}
6847 
6848 // Conditional move
6849 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
6850 %{
6851   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6852 
6853   ins_cost(200); // XXX
6854   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
6855   opcode(0x0F, 0x40);
6856   ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
6857   ins_pipe(pipe_cmov_reg);
6858 %}
6859 
6860 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
6861   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
6862   ins_cost(200);
6863   expand %{
6864     cmovN_regU(cop, cr, dst, src);
6865   %}
6866 %}
6867 
6868 // Conditional move
6869 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
6870 %{
6871   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6872 
6873   ins_cost(200); // XXX
6874   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
6875   opcode(0x0F, 0x40);
6876   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6877   ins_pipe(pipe_cmov_reg);  // XXX
6878 %}
6879 
6880 // Conditional move
6881 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
6882 %{
6883   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6884 
6885   ins_cost(200); // XXX
6886   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
6887   opcode(0x0F, 0x40);
6888   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6889   ins_pipe(pipe_cmov_reg); // XXX
6890 %}
6891 
6892 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
6893   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6894   ins_cost(200);
6895   expand %{
6896     cmovP_regU(cop, cr, dst, src);
6897   %}
6898 %}
6899 
6900 // DISABLED: Requires the ADLC to emit a bottom_type call that
6901 // correctly meets the two pointer arguments; one is an incoming
6902 // register but the other is a memory operand.  ALSO appears to
6903 // be buggy with implicit null checks.
6904 //
6905 //// Conditional move
6906 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
6907 //%{
6908 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6909 //  ins_cost(250);
6910 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6911 //  opcode(0x0F,0x40);
6912 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6913 //  ins_pipe( pipe_cmov_mem );
6914 //%}
6915 //
6916 //// Conditional move
6917 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
6918 //%{
6919 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6920 //  ins_cost(250);
6921 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6922 //  opcode(0x0F,0x40);
6923 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
6924 //  ins_pipe( pipe_cmov_mem );
6925 //%}
6926 
6927 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
6928 %{
6929   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6930 
6931   ins_cost(200); // XXX
6932   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6933   opcode(0x0F, 0x40);
6934   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6935   ins_pipe(pipe_cmov_reg);  // XXX
6936 %}
6937 
6938 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
6939 %{
6940   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6941 
6942   ins_cost(200); // XXX
6943   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
6944   opcode(0x0F, 0x40);
6945   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6946   ins_pipe(pipe_cmov_mem);  // XXX
6947 %}
6948 
6949 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
6950 %{
6951   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6952 
6953   ins_cost(200); // XXX
6954   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6955   opcode(0x0F, 0x40);
6956   ins_encode(REX_reg_reg_wide(dst, src), enc_cmov(cop), reg_reg(dst, src));
6957   ins_pipe(pipe_cmov_reg); // XXX
6958 %}
6959 
6960 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
6961   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6962   ins_cost(200);
6963   expand %{
6964     cmovL_regU(cop, cr, dst, src);
6965   %}
6966 %}
6967 
6968 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
6969 %{
6970   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6971 
6972   ins_cost(200); // XXX
6973   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
6974   opcode(0x0F, 0x40);
6975   ins_encode(REX_reg_mem_wide(dst, src), enc_cmov(cop), reg_mem(dst, src));
6976   ins_pipe(pipe_cmov_mem); // XXX
6977 %}
6978 
6979 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
6980   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
6981   ins_cost(200);
6982   expand %{
6983     cmovL_memU(cop, cr, dst, src);
6984   %}
6985 %}
6986 
6987 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
6988 %{
6989   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6990 
6991   ins_cost(200); // XXX
6992   format %{ "jn$cop    skip\t# signed cmove float\n\t"
6993             "movss     $dst, $src\n"
6994     "skip:" %}
6995   ins_encode %{
6996     Label Lskip;
6997     // Invert sense of branch from sense of CMOV
6998     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6999     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7000     __ bind(Lskip);
7001   %}
7002   ins_pipe(pipe_slow);
7003 %}
7004 
7005 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
7006 // %{
7007 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
7008 
7009 //   ins_cost(200); // XXX
7010 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
7011 //             "movss     $dst, $src\n"
7012 //     "skip:" %}
7013 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
7014 //   ins_pipe(pipe_slow);
7015 // %}
7016 
7017 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
7018 %{
7019   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7020 
7021   ins_cost(200); // XXX
7022   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
7023             "movss     $dst, $src\n"
7024     "skip:" %}
7025   ins_encode %{
7026     Label Lskip;
7027     // Invert sense of branch from sense of CMOV
7028     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7029     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7030     __ bind(Lskip);
7031   %}
7032   ins_pipe(pipe_slow);
7033 %}
7034 
7035 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
7036   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7037   ins_cost(200);
7038   expand %{
7039     cmovF_regU(cop, cr, dst, src);
7040   %}
7041 %}
7042 
7043 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
7044 %{
7045   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7046 
7047   ins_cost(200); // XXX
7048   format %{ "jn$cop    skip\t# signed cmove double\n\t"
7049             "movsd     $dst, $src\n"
7050     "skip:" %}
7051   ins_encode %{
7052     Label Lskip;
7053     // Invert sense of branch from sense of CMOV
7054     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7055     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7056     __ bind(Lskip);
7057   %}
7058   ins_pipe(pipe_slow);
7059 %}
7060 
7061 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
7062 %{
7063   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7064 
7065   ins_cost(200); // XXX
7066   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
7067             "movsd     $dst, $src\n"
7068     "skip:" %}
7069   ins_encode %{
7070     Label Lskip;
7071     // Invert sense of branch from sense of CMOV
7072     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7073     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7074     __ bind(Lskip);
7075   %}
7076   ins_pipe(pipe_slow);
7077 %}
7078 
7079 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
7080   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7081   ins_cost(200);
7082   expand %{
7083     cmovD_regU(cop, cr, dst, src);
7084   %}
7085 %}
7086 
7087 //----------Arithmetic Instructions--------------------------------------------
7088 //----------Addition Instructions----------------------------------------------
7089 
7090 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7091 %{
7092   match(Set dst (AddI dst src));
7093   effect(KILL cr);
7094 
7095   format %{ "addl    $dst, $src\t# int" %}
7096   opcode(0x03);
7097   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7098   ins_pipe(ialu_reg_reg);
7099 %}
7100 
7101 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7102 %{
7103   match(Set dst (AddI dst src));
7104   effect(KILL cr);
7105 
7106   format %{ "addl    $dst, $src\t# int" %}
7107   opcode(0x81, 0x00); /* /0 id */
7108   ins_encode(OpcSErm(dst, src), Con8or32(src));
7109   ins_pipe( ialu_reg );
7110 %}
7111 
7112 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7113 %{
7114   match(Set dst (AddI dst (LoadI src)));
7115   effect(KILL cr);
7116 
7117   ins_cost(125); // XXX
7118   format %{ "addl    $dst, $src\t# int" %}
7119   opcode(0x03);
7120   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7121   ins_pipe(ialu_reg_mem);
7122 %}
7123 
7124 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7125 %{
7126   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7127   effect(KILL cr);
7128 
7129   ins_cost(150); // XXX
7130   format %{ "addl    $dst, $src\t# int" %}
7131   opcode(0x01); /* Opcode 01 /r */
7132   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7133   ins_pipe(ialu_mem_reg);
7134 %}
7135 
7136 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
7137 %{
7138   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7139   effect(KILL cr);
7140 
7141   ins_cost(125); // XXX
7142   format %{ "addl    $dst, $src\t# int" %}
7143   opcode(0x81); /* Opcode 81 /0 id */
7144   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7145   ins_pipe(ialu_mem_imm);
7146 %}
7147 
7148 instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
7149 %{
7150   predicate(UseIncDec);
7151   match(Set dst (AddI dst src));
7152   effect(KILL cr);
7153 
7154   format %{ "incl    $dst\t# int" %}
7155   opcode(0xFF, 0x00); // FF /0
7156   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7157   ins_pipe(ialu_reg);
7158 %}
7159 
7160 instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
7161 %{
7162   predicate(UseIncDec);
7163   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7164   effect(KILL cr);
7165 
7166   ins_cost(125); // XXX
7167   format %{ "incl    $dst\t# int" %}
7168   opcode(0xFF); /* Opcode FF /0 */
7169   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x00, dst));
7170   ins_pipe(ialu_mem_imm);
7171 %}
7172 
7173 // XXX why does that use AddI
7174 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
7175 %{
7176   predicate(UseIncDec);
7177   match(Set dst (AddI dst src));
7178   effect(KILL cr);
7179 
7180   format %{ "decl    $dst\t# int" %}
7181   opcode(0xFF, 0x01); // FF /1
7182   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
7183   ins_pipe(ialu_reg);
7184 %}
7185 
7186 // XXX why does that use AddI
7187 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
7188 %{
7189   predicate(UseIncDec);
7190   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7191   effect(KILL cr);
7192 
7193   ins_cost(125); // XXX
7194   format %{ "decl    $dst\t# int" %}
7195   opcode(0xFF); /* Opcode FF /1 */
7196   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(0x01, dst));
7197   ins_pipe(ialu_mem_imm);
7198 %}
7199 
7200 instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
7201 %{
7202   match(Set dst (AddI src0 src1));
7203 
7204   ins_cost(110);
7205   format %{ "addr32 leal $dst, [$src0 + $src1]\t# int" %}
7206   opcode(0x8D); /* 0x8D /r */
7207   ins_encode(Opcode(0x67), REX_reg_reg(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7208   ins_pipe(ialu_reg_reg);
7209 %}
7210 
7211 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7212 %{
7213   match(Set dst (AddL dst src));
7214   effect(KILL cr);
7215 
7216   format %{ "addq    $dst, $src\t# long" %}
7217   opcode(0x03);
7218   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7219   ins_pipe(ialu_reg_reg);
7220 %}
7221 
7222 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
7223 %{
7224   match(Set dst (AddL dst src));
7225   effect(KILL cr);
7226 
7227   format %{ "addq    $dst, $src\t# long" %}
7228   opcode(0x81, 0x00); /* /0 id */
7229   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7230   ins_pipe( ialu_reg );
7231 %}
7232 
7233 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7234 %{
7235   match(Set dst (AddL dst (LoadL src)));
7236   effect(KILL cr);
7237 
7238   ins_cost(125); // XXX
7239   format %{ "addq    $dst, $src\t# long" %}
7240   opcode(0x03);
7241   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7242   ins_pipe(ialu_reg_mem);
7243 %}
7244 
7245 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7246 %{
7247   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7248   effect(KILL cr);
7249 
7250   ins_cost(150); // XXX
7251   format %{ "addq    $dst, $src\t# long" %}
7252   opcode(0x01); /* Opcode 01 /r */
7253   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7254   ins_pipe(ialu_mem_reg);
7255 %}
7256 
7257 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7258 %{
7259   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7260   effect(KILL cr);
7261 
7262   ins_cost(125); // XXX
7263   format %{ "addq    $dst, $src\t# long" %}
7264   opcode(0x81); /* Opcode 81 /0 id */
7265   ins_encode(REX_mem_wide(dst),
7266              OpcSE(src), RM_opc_mem(0x00, dst), Con8or32(src));
7267   ins_pipe(ialu_mem_imm);
7268 %}
7269 
7270 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
7271 %{
7272   predicate(UseIncDec);
7273   match(Set dst (AddL dst src));
7274   effect(KILL cr);
7275 
7276   format %{ "incq    $dst\t# long" %}
7277   opcode(0xFF, 0x00); // FF /0
7278   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7279   ins_pipe(ialu_reg);
7280 %}
7281 
7282 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
7283 %{
7284   predicate(UseIncDec);
7285   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7286   effect(KILL cr);
7287 
7288   ins_cost(125); // XXX
7289   format %{ "incq    $dst\t# long" %}
7290   opcode(0xFF); /* Opcode FF /0 */
7291   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x00, dst));
7292   ins_pipe(ialu_mem_imm);
7293 %}
7294 
7295 // XXX why does that use AddL
7296 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
7297 %{
7298   predicate(UseIncDec);
7299   match(Set dst (AddL dst src));
7300   effect(KILL cr);
7301 
7302   format %{ "decq    $dst\t# long" %}
7303   opcode(0xFF, 0x01); // FF /1
7304   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
7305   ins_pipe(ialu_reg);
7306 %}
7307 
7308 // XXX why does that use AddL
7309 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
7310 %{
7311   predicate(UseIncDec);
7312   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
7313   effect(KILL cr);
7314 
7315   ins_cost(125); // XXX
7316   format %{ "decq    $dst\t# long" %}
7317   opcode(0xFF); /* Opcode FF /1 */
7318   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(0x01, dst));
7319   ins_pipe(ialu_mem_imm);
7320 %}
7321 
7322 instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
7323 %{
7324   match(Set dst (AddL src0 src1));
7325 
7326   ins_cost(110);
7327   format %{ "leaq    $dst, [$src0 + $src1]\t# long" %}
7328   opcode(0x8D); /* 0x8D /r */
7329   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1)); // XXX
7330   ins_pipe(ialu_reg_reg);
7331 %}
7332 
7333 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
7334 %{
7335   match(Set dst (AddP dst src));
7336   effect(KILL cr);
7337 
7338   format %{ "addq    $dst, $src\t# ptr" %}
7339   opcode(0x03);
7340   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7341   ins_pipe(ialu_reg_reg);
7342 %}
7343 
7344 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
7345 %{
7346   match(Set dst (AddP dst src));
7347   effect(KILL cr);
7348 
7349   format %{ "addq    $dst, $src\t# ptr" %}
7350   opcode(0x81, 0x00); /* /0 id */
7351   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7352   ins_pipe( ialu_reg );
7353 %}
7354 
7355 // XXX addP mem ops ????
7356 
7357 instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
7358 %{
7359   match(Set dst (AddP src0 src1));
7360 
7361   ins_cost(110);
7362   format %{ "leaq    $dst, [$src0 + $src1]\t# ptr" %}
7363   opcode(0x8D); /* 0x8D /r */
7364   ins_encode(REX_reg_reg_wide(dst, src0), OpcP, reg_lea(dst, src0, src1));// XXX
7365   ins_pipe(ialu_reg_reg);
7366 %}
7367 
7368 instruct checkCastPP(rRegP dst)
7369 %{
7370   match(Set dst (CheckCastPP dst));
7371 
7372   size(0);
7373   format %{ "# checkcastPP of $dst" %}
7374   ins_encode(/* empty encoding */);
7375   ins_pipe(empty);
7376 %}
7377 
7378 instruct castPP(rRegP dst)
7379 %{
7380   match(Set dst (CastPP dst));
7381 
7382   size(0);
7383   format %{ "# castPP of $dst" %}
7384   ins_encode(/* empty encoding */);
7385   ins_pipe(empty);
7386 %}
7387 
7388 instruct castII(rRegI dst)
7389 %{
7390   match(Set dst (CastII dst));
7391 
7392   size(0);
7393   format %{ "# castII of $dst" %}
7394   ins_encode(/* empty encoding */);
7395   ins_cost(0);
7396   ins_pipe(empty);
7397 %}
7398 
7399 // LoadP-locked same as a regular LoadP when used with compare-swap
7400 instruct loadPLocked(rRegP dst, memory mem)
7401 %{
7402   match(Set dst (LoadPLocked mem));
7403 
7404   ins_cost(125); // XXX
7405   format %{ "movq    $dst, $mem\t# ptr locked" %}
7406   opcode(0x8B);
7407   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
7408   ins_pipe(ialu_reg_mem); // XXX
7409 %}
7410 
7411 // Conditional-store of the updated heap-top.
7412 // Used during allocation of the shared heap.
7413 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7414 
7415 instruct storePConditional(memory heap_top_ptr,
7416                            rax_RegP oldval, rRegP newval,
7417                            rFlagsReg cr)
7418 %{
7419   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7420 
7421   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
7422             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
7423   opcode(0x0F, 0xB1);
7424   ins_encode(lock_prefix,
7425              REX_reg_mem_wide(newval, heap_top_ptr),
7426              OpcP, OpcS,
7427              reg_mem(newval, heap_top_ptr));
7428   ins_pipe(pipe_cmpxchg);
7429 %}
7430 
7431 // Conditional-store of an int value.
7432 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7433 instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
7434 %{
7435   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7436   effect(KILL oldval);
7437 
7438   format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7439   opcode(0x0F, 0xB1);
7440   ins_encode(lock_prefix,
7441              REX_reg_mem(newval, mem),
7442              OpcP, OpcS,
7443              reg_mem(newval, mem));
7444   ins_pipe(pipe_cmpxchg);
7445 %}
7446 
7447 // Conditional-store of a long value.
7448 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
7449 instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
7450 %{
7451   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7452   effect(KILL oldval);
7453 
7454   format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
7455   opcode(0x0F, 0xB1);
7456   ins_encode(lock_prefix,
7457              REX_reg_mem_wide(newval, mem),
7458              OpcP, OpcS,
7459              reg_mem(newval, mem));
7460   ins_pipe(pipe_cmpxchg);
7461 %}
7462 
7463 
7464 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7465 instruct compareAndSwapP(rRegI res,
7466                          memory mem_ptr,
7467                          rax_RegP oldval, rRegP newval,
7468                          rFlagsReg cr)
7469 %{
7470   predicate(VM_Version::supports_cx8());
7471   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7472   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7473   effect(KILL cr, KILL oldval);
7474 
7475   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7476             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7477             "sete    $res\n\t"
7478             "movzbl  $res, $res" %}
7479   opcode(0x0F, 0xB1);
7480   ins_encode(lock_prefix,
7481              REX_reg_mem_wide(newval, mem_ptr),
7482              OpcP, OpcS,
7483              reg_mem(newval, mem_ptr),
7484              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7485              REX_reg_breg(res, res), // movzbl
7486              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7487   ins_pipe( pipe_cmpxchg );
7488 %}
7489 
7490 instruct compareAndSwapL(rRegI res,
7491                          memory mem_ptr,
7492                          rax_RegL oldval, rRegL newval,
7493                          rFlagsReg cr)
7494 %{
7495   predicate(VM_Version::supports_cx8());
7496   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7497   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7498   effect(KILL cr, KILL oldval);
7499 
7500   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7501             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7502             "sete    $res\n\t"
7503             "movzbl  $res, $res" %}
7504   opcode(0x0F, 0xB1);
7505   ins_encode(lock_prefix,
7506              REX_reg_mem_wide(newval, mem_ptr),
7507              OpcP, OpcS,
7508              reg_mem(newval, mem_ptr),
7509              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7510              REX_reg_breg(res, res), // movzbl
7511              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7512   ins_pipe( pipe_cmpxchg );
7513 %}
7514 
7515 instruct compareAndSwapI(rRegI res,
7516                          memory mem_ptr,
7517                          rax_RegI oldval, rRegI newval,
7518                          rFlagsReg cr)
7519 %{
7520   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7521   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7522   effect(KILL cr, KILL oldval);
7523 
7524   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7525             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7526             "sete    $res\n\t"
7527             "movzbl  $res, $res" %}
7528   opcode(0x0F, 0xB1);
7529   ins_encode(lock_prefix,
7530              REX_reg_mem(newval, mem_ptr),
7531              OpcP, OpcS,
7532              reg_mem(newval, mem_ptr),
7533              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7534              REX_reg_breg(res, res), // movzbl
7535              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7536   ins_pipe( pipe_cmpxchg );
7537 %}
7538 
7539 instruct compareAndSwapB(rRegI res,
7540                          memory mem_ptr,
7541                          rax_RegI oldval, rRegI newval,
7542                          rFlagsReg cr)
7543 %{
7544   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7545   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7546   effect(KILL cr, KILL oldval);
7547 
7548   format %{ "cmpxchgb $mem_ptr,$newval\t# "
7549             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7550             "sete    $res\n\t"
7551             "movzbl  $res, $res" %}
7552   opcode(0x0F, 0xB0);
7553   ins_encode(lock_prefix,
7554              REX_breg_mem(newval, mem_ptr),
7555              OpcP, OpcS,
7556              reg_mem(newval, mem_ptr),
7557              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7558              REX_reg_breg(res, res), // movzbl
7559              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7560   ins_pipe( pipe_cmpxchg );
7561 %}
7562 
7563 instruct compareAndSwapS(rRegI res,
7564                          memory mem_ptr,
7565                          rax_RegI oldval, rRegI newval,
7566                          rFlagsReg cr)
7567 %{
7568   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7569   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7570   effect(KILL cr, KILL oldval);
7571 
7572   format %{ "cmpxchgw $mem_ptr,$newval\t# "
7573             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7574             "sete    $res\n\t"
7575             "movzbl  $res, $res" %}
7576   opcode(0x0F, 0xB1);
7577   ins_encode(lock_prefix,
7578              SizePrefix,
7579              REX_reg_mem(newval, mem_ptr),
7580              OpcP, OpcS,
7581              reg_mem(newval, mem_ptr),
7582              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7583              REX_reg_breg(res, res), // movzbl
7584              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7585   ins_pipe( pipe_cmpxchg );
7586 %}
7587 
7588 instruct compareAndSwapN(rRegI res,
7589                           memory mem_ptr,
7590                           rax_RegN oldval, rRegN newval,
7591                           rFlagsReg cr) %{
7592   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
7593   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
7594   effect(KILL cr, KILL oldval);
7595 
7596   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7597             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
7598             "sete    $res\n\t"
7599             "movzbl  $res, $res" %}
7600   opcode(0x0F, 0xB1);
7601   ins_encode(lock_prefix,
7602              REX_reg_mem(newval, mem_ptr),
7603              OpcP, OpcS,
7604              reg_mem(newval, mem_ptr),
7605              REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
7606              REX_reg_breg(res, res), // movzbl
7607              Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
7608   ins_pipe( pipe_cmpxchg );
7609 %}
7610 
7611 instruct compareAndExchangeB(
7612                          memory mem_ptr,
7613                          rax_RegI oldval, rRegI newval,
7614                          rFlagsReg cr)
7615 %{
7616   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7617   effect(KILL cr);
7618 
7619   format %{ "cmpxchgb $mem_ptr,$newval\t# "
7620             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7621   opcode(0x0F, 0xB0);
7622   ins_encode(lock_prefix,
7623              REX_breg_mem(newval, mem_ptr),
7624              OpcP, OpcS,
7625              reg_mem(newval, mem_ptr) // lock cmpxchg
7626              );
7627   ins_pipe( pipe_cmpxchg );
7628 %}
7629 
7630 instruct compareAndExchangeS(
7631                          memory mem_ptr,
7632                          rax_RegI oldval, rRegI newval,
7633                          rFlagsReg cr)
7634 %{
7635   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7636   effect(KILL cr);
7637 
7638   format %{ "cmpxchgw $mem_ptr,$newval\t# "
7639             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7640   opcode(0x0F, 0xB1);
7641   ins_encode(lock_prefix,
7642              SizePrefix,
7643              REX_reg_mem(newval, mem_ptr),
7644              OpcP, OpcS,
7645              reg_mem(newval, mem_ptr) // lock cmpxchg
7646              );
7647   ins_pipe( pipe_cmpxchg );
7648 %}
7649 
7650 instruct compareAndExchangeI(
7651                          memory mem_ptr,
7652                          rax_RegI oldval, rRegI newval,
7653                          rFlagsReg cr)
7654 %{
7655   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7656   effect(KILL cr);
7657 
7658   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7659             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7660   opcode(0x0F, 0xB1);
7661   ins_encode(lock_prefix,
7662              REX_reg_mem(newval, mem_ptr),
7663              OpcP, OpcS,
7664              reg_mem(newval, mem_ptr) // lock cmpxchg
7665              );
7666   ins_pipe( pipe_cmpxchg );
7667 %}
7668 
7669 instruct compareAndExchangeL(
7670                          memory mem_ptr,
7671                          rax_RegL oldval, rRegL newval,
7672                          rFlagsReg cr)
7673 %{
7674   predicate(VM_Version::supports_cx8());
7675   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7676   effect(KILL cr);
7677 
7678   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7679             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
7680   opcode(0x0F, 0xB1);
7681   ins_encode(lock_prefix,
7682              REX_reg_mem_wide(newval, mem_ptr),
7683              OpcP, OpcS,
7684              reg_mem(newval, mem_ptr)  // lock cmpxchg
7685             );
7686   ins_pipe( pipe_cmpxchg );
7687 %}
7688 
7689 instruct compareAndExchangeN(
7690                           memory mem_ptr,
7691                           rax_RegN oldval, rRegN newval,
7692                           rFlagsReg cr) %{
7693   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
7694   effect(KILL cr);
7695 
7696   format %{ "cmpxchgl $mem_ptr,$newval\t# "
7697             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
7698   opcode(0x0F, 0xB1);
7699   ins_encode(lock_prefix,
7700              REX_reg_mem(newval, mem_ptr),
7701              OpcP, OpcS,
7702              reg_mem(newval, mem_ptr)  // lock cmpxchg
7703           );
7704   ins_pipe( pipe_cmpxchg );
7705 %}
7706 
7707 instruct compareAndExchangeP(
7708                          memory mem_ptr,
7709                          rax_RegP oldval, rRegP newval,
7710                          rFlagsReg cr)
7711 %{
7712   predicate(VM_Version::supports_cx8());
7713   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7714   effect(KILL cr);
7715 
7716   format %{ "cmpxchgq $mem_ptr,$newval\t# "
7717             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
7718   opcode(0x0F, 0xB1);
7719   ins_encode(lock_prefix,
7720              REX_reg_mem_wide(newval, mem_ptr),
7721              OpcP, OpcS,
7722              reg_mem(newval, mem_ptr)  // lock cmpxchg
7723           );
7724   ins_pipe( pipe_cmpxchg );
7725 %}
7726 
7727 instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7728   predicate(n->as_LoadStore()->result_not_used());
7729   match(Set dummy (GetAndAddB mem add));
7730   effect(KILL cr);
7731   format %{ "ADDB  [$mem],$add" %}
7732   ins_encode %{
7733     if (os::is_MP()) { __ lock(); }
7734     __ addb($mem$$Address, $add$$constant);
7735   %}
7736   ins_pipe( pipe_cmpxchg );
7737 %}
7738 
7739 instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
7740   match(Set newval (GetAndAddB mem newval));
7741   effect(KILL cr);
7742   format %{ "XADDB  [$mem],$newval" %}
7743   ins_encode %{
7744     if (os::is_MP()) { __ lock(); }
7745     __ xaddb($mem$$Address, $newval$$Register);
7746   %}
7747   ins_pipe( pipe_cmpxchg );
7748 %}
7749 
7750 instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7751   predicate(n->as_LoadStore()->result_not_used());
7752   match(Set dummy (GetAndAddS mem add));
7753   effect(KILL cr);
7754   format %{ "ADDW  [$mem],$add" %}
7755   ins_encode %{
7756     if (os::is_MP()) { __ lock(); }
7757     __ addw($mem$$Address, $add$$constant);
7758   %}
7759   ins_pipe( pipe_cmpxchg );
7760 %}
7761 
7762 instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
7763   match(Set newval (GetAndAddS mem newval));
7764   effect(KILL cr);
7765   format %{ "XADDW  [$mem],$newval" %}
7766   ins_encode %{
7767     if (os::is_MP()) { __ lock(); }
7768     __ xaddw($mem$$Address, $newval$$Register);
7769   %}
7770   ins_pipe( pipe_cmpxchg );
7771 %}
7772 
7773 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
7774   predicate(n->as_LoadStore()->result_not_used());
7775   match(Set dummy (GetAndAddI mem add));
7776   effect(KILL cr);
7777   format %{ "ADDL  [$mem],$add" %}
7778   ins_encode %{
7779     if (os::is_MP()) { __ lock(); }
7780     __ addl($mem$$Address, $add$$constant);
7781   %}
7782   ins_pipe( pipe_cmpxchg );
7783 %}
7784 
7785 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
7786   match(Set newval (GetAndAddI mem newval));
7787   effect(KILL cr);
7788   format %{ "XADDL  [$mem],$newval" %}
7789   ins_encode %{
7790     if (os::is_MP()) { __ lock(); }
7791     __ xaddl($mem$$Address, $newval$$Register);
7792   %}
7793   ins_pipe( pipe_cmpxchg );
7794 %}
7795 
7796 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
7797   predicate(n->as_LoadStore()->result_not_used());
7798   match(Set dummy (GetAndAddL mem add));
7799   effect(KILL cr);
7800   format %{ "ADDQ  [$mem],$add" %}
7801   ins_encode %{
7802     if (os::is_MP()) { __ lock(); }
7803     __ addq($mem$$Address, $add$$constant);
7804   %}
7805   ins_pipe( pipe_cmpxchg );
7806 %}
7807 
7808 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
7809   match(Set newval (GetAndAddL mem newval));
7810   effect(KILL cr);
7811   format %{ "XADDQ  [$mem],$newval" %}
7812   ins_encode %{
7813     if (os::is_MP()) { __ lock(); }
7814     __ xaddq($mem$$Address, $newval$$Register);
7815   %}
7816   ins_pipe( pipe_cmpxchg );
7817 %}
7818 
7819 instruct xchgB( memory mem, rRegI newval) %{
7820   match(Set newval (GetAndSetB mem newval));
7821   format %{ "XCHGB  $newval,[$mem]" %}
7822   ins_encode %{
7823     __ xchgb($newval$$Register, $mem$$Address);
7824   %}
7825   ins_pipe( pipe_cmpxchg );
7826 %}
7827 
7828 instruct xchgS( memory mem, rRegI newval) %{
7829   match(Set newval (GetAndSetS mem newval));
7830   format %{ "XCHGW  $newval,[$mem]" %}
7831   ins_encode %{
7832     __ xchgw($newval$$Register, $mem$$Address);
7833   %}
7834   ins_pipe( pipe_cmpxchg );
7835 %}
7836 
7837 instruct xchgI( memory mem, rRegI newval) %{
7838   match(Set newval (GetAndSetI mem newval));
7839   format %{ "XCHGL  $newval,[$mem]" %}
7840   ins_encode %{
7841     __ xchgl($newval$$Register, $mem$$Address);
7842   %}
7843   ins_pipe( pipe_cmpxchg );
7844 %}
7845 
7846 instruct xchgL( memory mem, rRegL newval) %{
7847   match(Set newval (GetAndSetL mem newval));
7848   format %{ "XCHGL  $newval,[$mem]" %}
7849   ins_encode %{
7850     __ xchgq($newval$$Register, $mem$$Address);
7851   %}
7852   ins_pipe( pipe_cmpxchg );
7853 %}
7854 
7855 instruct xchgP( memory mem, rRegP newval) %{
7856   match(Set newval (GetAndSetP mem newval));
7857   format %{ "XCHGQ  $newval,[$mem]" %}
7858   ins_encode %{
7859     __ xchgq($newval$$Register, $mem$$Address);
7860   %}
7861   ins_pipe( pipe_cmpxchg );
7862 %}
7863 
7864 instruct xchgN( memory mem, rRegN newval) %{
7865   match(Set newval (GetAndSetN mem newval));
7866   format %{ "XCHGL  $newval,$mem]" %}
7867   ins_encode %{
7868     __ xchgl($newval$$Register, $mem$$Address);
7869   %}
7870   ins_pipe( pipe_cmpxchg );
7871 %}
7872 
7873 //----------Subtraction Instructions-------------------------------------------
7874 
7875 // Integer Subtraction Instructions
7876 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
7877 %{
7878   match(Set dst (SubI dst src));
7879   effect(KILL cr);
7880 
7881   format %{ "subl    $dst, $src\t# int" %}
7882   opcode(0x2B);
7883   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
7884   ins_pipe(ialu_reg_reg);
7885 %}
7886 
7887 instruct subI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
7888 %{
7889   match(Set dst (SubI dst src));
7890   effect(KILL cr);
7891 
7892   format %{ "subl    $dst, $src\t# int" %}
7893   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7894   ins_encode(OpcSErm(dst, src), Con8or32(src));
7895   ins_pipe(ialu_reg);
7896 %}
7897 
7898 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
7899 %{
7900   match(Set dst (SubI dst (LoadI src)));
7901   effect(KILL cr);
7902 
7903   ins_cost(125);
7904   format %{ "subl    $dst, $src\t# int" %}
7905   opcode(0x2B);
7906   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
7907   ins_pipe(ialu_reg_mem);
7908 %}
7909 
7910 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
7911 %{
7912   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7913   effect(KILL cr);
7914 
7915   ins_cost(150);
7916   format %{ "subl    $dst, $src\t# int" %}
7917   opcode(0x29); /* Opcode 29 /r */
7918   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
7919   ins_pipe(ialu_mem_reg);
7920 %}
7921 
7922 instruct subI_mem_imm(memory dst, immI src, rFlagsReg cr)
7923 %{
7924   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7925   effect(KILL cr);
7926 
7927   ins_cost(125); // XXX
7928   format %{ "subl    $dst, $src\t# int" %}
7929   opcode(0x81); /* Opcode 81 /5 id */
7930   ins_encode(REX_mem(dst), OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7931   ins_pipe(ialu_mem_imm);
7932 %}
7933 
7934 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
7935 %{
7936   match(Set dst (SubL dst src));
7937   effect(KILL cr);
7938 
7939   format %{ "subq    $dst, $src\t# long" %}
7940   opcode(0x2B);
7941   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
7942   ins_pipe(ialu_reg_reg);
7943 %}
7944 
7945 instruct subL_rReg_imm(rRegI dst, immL32 src, rFlagsReg cr)
7946 %{
7947   match(Set dst (SubL dst src));
7948   effect(KILL cr);
7949 
7950   format %{ "subq    $dst, $src\t# long" %}
7951   opcode(0x81, 0x05);  /* Opcode 81 /5 */
7952   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
7953   ins_pipe(ialu_reg);
7954 %}
7955 
7956 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
7957 %{
7958   match(Set dst (SubL dst (LoadL src)));
7959   effect(KILL cr);
7960 
7961   ins_cost(125);
7962   format %{ "subq    $dst, $src\t# long" %}
7963   opcode(0x2B);
7964   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
7965   ins_pipe(ialu_reg_mem);
7966 %}
7967 
7968 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
7969 %{
7970   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7971   effect(KILL cr);
7972 
7973   ins_cost(150);
7974   format %{ "subq    $dst, $src\t# long" %}
7975   opcode(0x29); /* Opcode 29 /r */
7976   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
7977   ins_pipe(ialu_mem_reg);
7978 %}
7979 
7980 instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
7981 %{
7982   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
7983   effect(KILL cr);
7984 
7985   ins_cost(125); // XXX
7986   format %{ "subq    $dst, $src\t# long" %}
7987   opcode(0x81); /* Opcode 81 /5 id */
7988   ins_encode(REX_mem_wide(dst),
7989              OpcSE(src), RM_opc_mem(0x05, dst), Con8or32(src));
7990   ins_pipe(ialu_mem_imm);
7991 %}
7992 
7993 // Subtract from a pointer
7994 // XXX hmpf???
7995 instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
7996 %{
7997   match(Set dst (AddP dst (SubI zero src)));
7998   effect(KILL cr);
7999 
8000   format %{ "subq    $dst, $src\t# ptr - int" %}
8001   opcode(0x2B);
8002   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
8003   ins_pipe(ialu_reg_reg);
8004 %}
8005 
8006 instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
8007 %{
8008   match(Set dst (SubI zero dst));
8009   effect(KILL cr);
8010 
8011   format %{ "negl    $dst\t# int" %}
8012   opcode(0xF7, 0x03);  // Opcode F7 /3
8013   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8014   ins_pipe(ialu_reg);
8015 %}
8016 
8017 instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
8018 %{
8019   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
8020   effect(KILL cr);
8021 
8022   format %{ "negl    $dst\t# int" %}
8023   opcode(0xF7, 0x03);  // Opcode F7 /3
8024   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8025   ins_pipe(ialu_reg);
8026 %}
8027 
8028 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
8029 %{
8030   match(Set dst (SubL zero dst));
8031   effect(KILL cr);
8032 
8033   format %{ "negq    $dst\t# long" %}
8034   opcode(0xF7, 0x03);  // Opcode F7 /3
8035   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8036   ins_pipe(ialu_reg);
8037 %}
8038 
8039 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
8040 %{
8041   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
8042   effect(KILL cr);
8043 
8044   format %{ "negq    $dst\t# long" %}
8045   opcode(0xF7, 0x03);  // Opcode F7 /3
8046   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8047   ins_pipe(ialu_reg);
8048 %}
8049 
8050 //----------Multiplication/Division Instructions-------------------------------
8051 // Integer Multiplication Instructions
8052 // Multiply Register
8053 
8054 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
8055 %{
8056   match(Set dst (MulI dst src));
8057   effect(KILL cr);
8058 
8059   ins_cost(300);
8060   format %{ "imull   $dst, $src\t# int" %}
8061   opcode(0x0F, 0xAF);
8062   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8063   ins_pipe(ialu_reg_reg_alu0);
8064 %}
8065 
8066 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
8067 %{
8068   match(Set dst (MulI src imm));
8069   effect(KILL cr);
8070 
8071   ins_cost(300);
8072   format %{ "imull   $dst, $src, $imm\t# int" %}
8073   opcode(0x69); /* 69 /r id */
8074   ins_encode(REX_reg_reg(dst, src),
8075              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8076   ins_pipe(ialu_reg_reg_alu0);
8077 %}
8078 
8079 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
8080 %{
8081   match(Set dst (MulI dst (LoadI src)));
8082   effect(KILL cr);
8083 
8084   ins_cost(350);
8085   format %{ "imull   $dst, $src\t# int" %}
8086   opcode(0x0F, 0xAF);
8087   ins_encode(REX_reg_mem(dst, src), OpcP, OpcS, reg_mem(dst, src));
8088   ins_pipe(ialu_reg_mem_alu0);
8089 %}
8090 
8091 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
8092 %{
8093   match(Set dst (MulI (LoadI src) imm));
8094   effect(KILL cr);
8095 
8096   ins_cost(300);
8097   format %{ "imull   $dst, $src, $imm\t# int" %}
8098   opcode(0x69); /* 69 /r id */
8099   ins_encode(REX_reg_mem(dst, src),
8100              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8101   ins_pipe(ialu_reg_mem_alu0);
8102 %}
8103 
8104 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8105 %{
8106   match(Set dst (MulL dst src));
8107   effect(KILL cr);
8108 
8109   ins_cost(300);
8110   format %{ "imulq   $dst, $src\t# long" %}
8111   opcode(0x0F, 0xAF);
8112   ins_encode(REX_reg_reg_wide(dst, src), OpcP, OpcS, reg_reg(dst, src));
8113   ins_pipe(ialu_reg_reg_alu0);
8114 %}
8115 
8116 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
8117 %{
8118   match(Set dst (MulL src imm));
8119   effect(KILL cr);
8120 
8121   ins_cost(300);
8122   format %{ "imulq   $dst, $src, $imm\t# long" %}
8123   opcode(0x69); /* 69 /r id */
8124   ins_encode(REX_reg_reg_wide(dst, src),
8125              OpcSE(imm), reg_reg(dst, src), Con8or32(imm));
8126   ins_pipe(ialu_reg_reg_alu0);
8127 %}
8128 
8129 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
8130 %{
8131   match(Set dst (MulL dst (LoadL src)));
8132   effect(KILL cr);
8133 
8134   ins_cost(350);
8135   format %{ "imulq   $dst, $src\t# long" %}
8136   opcode(0x0F, 0xAF);
8137   ins_encode(REX_reg_mem_wide(dst, src), OpcP, OpcS, reg_mem(dst, src));
8138   ins_pipe(ialu_reg_mem_alu0);
8139 %}
8140 
8141 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
8142 %{
8143   match(Set dst (MulL (LoadL src) imm));
8144   effect(KILL cr);
8145 
8146   ins_cost(300);
8147   format %{ "imulq   $dst, $src, $imm\t# long" %}
8148   opcode(0x69); /* 69 /r id */
8149   ins_encode(REX_reg_mem_wide(dst, src),
8150              OpcSE(imm), reg_mem(dst, src), Con8or32(imm));
8151   ins_pipe(ialu_reg_mem_alu0);
8152 %}
8153 
8154 instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8155 %{
8156   match(Set dst (MulHiL src rax));
8157   effect(USE_KILL rax, KILL cr);
8158 
8159   ins_cost(300);
8160   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
8161   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8162   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8163   ins_pipe(ialu_reg_reg_alu0);
8164 %}
8165 
8166 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8167                    rFlagsReg cr)
8168 %{
8169   match(Set rax (DivI rax div));
8170   effect(KILL rdx, KILL cr);
8171 
8172   ins_cost(30*100+10*100); // XXX
8173   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8174             "jne,s   normal\n\t"
8175             "xorl    rdx, rdx\n\t"
8176             "cmpl    $div, -1\n\t"
8177             "je,s    done\n"
8178     "normal: cdql\n\t"
8179             "idivl   $div\n"
8180     "done:"        %}
8181   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8182   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8183   ins_pipe(ialu_reg_reg_alu0);
8184 %}
8185 
8186 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8187                    rFlagsReg cr)
8188 %{
8189   match(Set rax (DivL rax div));
8190   effect(KILL rdx, KILL cr);
8191 
8192   ins_cost(30*100+10*100); // XXX
8193   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8194             "cmpq    rax, rdx\n\t"
8195             "jne,s   normal\n\t"
8196             "xorl    rdx, rdx\n\t"
8197             "cmpq    $div, -1\n\t"
8198             "je,s    done\n"
8199     "normal: cdqq\n\t"
8200             "idivq   $div\n"
8201     "done:"        %}
8202   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8203   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8204   ins_pipe(ialu_reg_reg_alu0);
8205 %}
8206 
8207 // Integer DIVMOD with Register, both quotient and mod results
8208 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
8209                              rFlagsReg cr)
8210 %{
8211   match(DivModI rax div);
8212   effect(KILL cr);
8213 
8214   ins_cost(30*100+10*100); // XXX
8215   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
8216             "jne,s   normal\n\t"
8217             "xorl    rdx, rdx\n\t"
8218             "cmpl    $div, -1\n\t"
8219             "je,s    done\n"
8220     "normal: cdql\n\t"
8221             "idivl   $div\n"
8222     "done:"        %}
8223   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8224   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8225   ins_pipe(pipe_slow);
8226 %}
8227 
8228 // Long DIVMOD with Register, both quotient and mod results
8229 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
8230                              rFlagsReg cr)
8231 %{
8232   match(DivModL rax div);
8233   effect(KILL cr);
8234 
8235   ins_cost(30*100+10*100); // XXX
8236   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
8237             "cmpq    rax, rdx\n\t"
8238             "jne,s   normal\n\t"
8239             "xorl    rdx, rdx\n\t"
8240             "cmpq    $div, -1\n\t"
8241             "je,s    done\n"
8242     "normal: cdqq\n\t"
8243             "idivq   $div\n"
8244     "done:"        %}
8245   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8246   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8247   ins_pipe(pipe_slow);
8248 %}
8249 
8250 //----------- DivL-By-Constant-Expansions--------------------------------------
8251 // DivI cases are handled by the compiler
8252 
8253 // Magic constant, reciprocal of 10
8254 instruct loadConL_0x6666666666666667(rRegL dst)
8255 %{
8256   effect(DEF dst);
8257 
8258   format %{ "movq    $dst, #0x666666666666667\t# Used in div-by-10" %}
8259   ins_encode(load_immL(dst, 0x6666666666666667));
8260   ins_pipe(ialu_reg);
8261 %}
8262 
8263 instruct mul_hi(rdx_RegL dst, no_rax_RegL src, rax_RegL rax, rFlagsReg cr)
8264 %{
8265   effect(DEF dst, USE src, USE_KILL rax, KILL cr);
8266 
8267   format %{ "imulq   rdx:rax, rax, $src\t# Used in div-by-10" %}
8268   opcode(0xF7, 0x5); /* Opcode F7 /5 */
8269   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src));
8270   ins_pipe(ialu_reg_reg_alu0);
8271 %}
8272 
8273 instruct sarL_rReg_63(rRegL dst, rFlagsReg cr)
8274 %{
8275   effect(USE_DEF dst, KILL cr);
8276 
8277   format %{ "sarq    $dst, #63\t# Used in div-by-10" %}
8278   opcode(0xC1, 0x7); /* C1 /7 ib */
8279   ins_encode(reg_opc_imm_wide(dst, 0x3F));
8280   ins_pipe(ialu_reg);
8281 %}
8282 
8283 instruct sarL_rReg_2(rRegL dst, rFlagsReg cr)
8284 %{
8285   effect(USE_DEF dst, KILL cr);
8286 
8287   format %{ "sarq    $dst, #2\t# Used in div-by-10" %}
8288   opcode(0xC1, 0x7); /* C1 /7 ib */
8289   ins_encode(reg_opc_imm_wide(dst, 0x2));
8290   ins_pipe(ialu_reg);
8291 %}
8292 
8293 instruct divL_10(rdx_RegL dst, no_rax_RegL src, immL10 div)
8294 %{
8295   match(Set dst (DivL src div));
8296 
8297   ins_cost((5+8)*100);
8298   expand %{
8299     rax_RegL rax;                     // Killed temp
8300     rFlagsReg cr;                     // Killed
8301     loadConL_0x6666666666666667(rax); // movq  rax, 0x6666666666666667
8302     mul_hi(dst, src, rax, cr);        // mulq  rdx:rax <= rax * $src
8303     sarL_rReg_63(src, cr);            // sarq  src, 63
8304     sarL_rReg_2(dst, cr);             // sarq  rdx, 2
8305     subL_rReg(dst, src, cr);          // subl  rdx, src
8306   %}
8307 %}
8308 
8309 //-----------------------------------------------------------------------------
8310 
8311 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
8312                    rFlagsReg cr)
8313 %{
8314   match(Set rdx (ModI rax div));
8315   effect(KILL rax, KILL cr);
8316 
8317   ins_cost(300); // XXX
8318   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
8319             "jne,s   normal\n\t"
8320             "xorl    rdx, rdx\n\t"
8321             "cmpl    $div, -1\n\t"
8322             "je,s    done\n"
8323     "normal: cdql\n\t"
8324             "idivl   $div\n"
8325     "done:"        %}
8326   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8327   ins_encode(cdql_enc(div), REX_reg(div), OpcP, reg_opc(div));
8328   ins_pipe(ialu_reg_reg_alu0);
8329 %}
8330 
8331 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
8332                    rFlagsReg cr)
8333 %{
8334   match(Set rdx (ModL rax div));
8335   effect(KILL rax, KILL cr);
8336 
8337   ins_cost(300); // XXX
8338   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
8339             "cmpq    rax, rdx\n\t"
8340             "jne,s   normal\n\t"
8341             "xorl    rdx, rdx\n\t"
8342             "cmpq    $div, -1\n\t"
8343             "je,s    done\n"
8344     "normal: cdqq\n\t"
8345             "idivq   $div\n"
8346     "done:"        %}
8347   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8348   ins_encode(cdqq_enc(div), REX_reg_wide(div), OpcP, reg_opc(div));
8349   ins_pipe(ialu_reg_reg_alu0);
8350 %}
8351 
8352 // Integer Shift Instructions
8353 // Shift Left by one
8354 instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8355 %{
8356   match(Set dst (LShiftI dst shift));
8357   effect(KILL cr);
8358 
8359   format %{ "sall    $dst, $shift" %}
8360   opcode(0xD1, 0x4); /* D1 /4 */
8361   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8362   ins_pipe(ialu_reg);
8363 %}
8364 
8365 // Shift Left by one
8366 instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8367 %{
8368   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8369   effect(KILL cr);
8370 
8371   format %{ "sall    $dst, $shift\t" %}
8372   opcode(0xD1, 0x4); /* D1 /4 */
8373   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8374   ins_pipe(ialu_mem_imm);
8375 %}
8376 
8377 // Shift Left by 8-bit immediate
8378 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8379 %{
8380   match(Set dst (LShiftI dst shift));
8381   effect(KILL cr);
8382 
8383   format %{ "sall    $dst, $shift" %}
8384   opcode(0xC1, 0x4); /* C1 /4 ib */
8385   ins_encode(reg_opc_imm(dst, shift));
8386   ins_pipe(ialu_reg);
8387 %}
8388 
8389 // Shift Left by 8-bit immediate
8390 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8391 %{
8392   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8393   effect(KILL cr);
8394 
8395   format %{ "sall    $dst, $shift" %}
8396   opcode(0xC1, 0x4); /* C1 /4 ib */
8397   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8398   ins_pipe(ialu_mem_imm);
8399 %}
8400 
8401 // Shift Left by variable
8402 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8403 %{
8404   match(Set dst (LShiftI dst shift));
8405   effect(KILL cr);
8406 
8407   format %{ "sall    $dst, $shift" %}
8408   opcode(0xD3, 0x4); /* D3 /4 */
8409   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8410   ins_pipe(ialu_reg_reg);
8411 %}
8412 
8413 // Shift Left by variable
8414 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8415 %{
8416   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
8417   effect(KILL cr);
8418 
8419   format %{ "sall    $dst, $shift" %}
8420   opcode(0xD3, 0x4); /* D3 /4 */
8421   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8422   ins_pipe(ialu_mem_reg);
8423 %}
8424 
8425 // Arithmetic shift right by one
8426 instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8427 %{
8428   match(Set dst (RShiftI dst shift));
8429   effect(KILL cr);
8430 
8431   format %{ "sarl    $dst, $shift" %}
8432   opcode(0xD1, 0x7); /* D1 /7 */
8433   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8434   ins_pipe(ialu_reg);
8435 %}
8436 
8437 // Arithmetic shift right by one
8438 instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8439 %{
8440   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8441   effect(KILL cr);
8442 
8443   format %{ "sarl    $dst, $shift" %}
8444   opcode(0xD1, 0x7); /* D1 /7 */
8445   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8446   ins_pipe(ialu_mem_imm);
8447 %}
8448 
8449 // Arithmetic Shift Right by 8-bit immediate
8450 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8451 %{
8452   match(Set dst (RShiftI dst shift));
8453   effect(KILL cr);
8454 
8455   format %{ "sarl    $dst, $shift" %}
8456   opcode(0xC1, 0x7); /* C1 /7 ib */
8457   ins_encode(reg_opc_imm(dst, shift));
8458   ins_pipe(ialu_mem_imm);
8459 %}
8460 
8461 // Arithmetic Shift Right by 8-bit immediate
8462 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8463 %{
8464   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8465   effect(KILL cr);
8466 
8467   format %{ "sarl    $dst, $shift" %}
8468   opcode(0xC1, 0x7); /* C1 /7 ib */
8469   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8470   ins_pipe(ialu_mem_imm);
8471 %}
8472 
8473 // Arithmetic Shift Right by variable
8474 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8475 %{
8476   match(Set dst (RShiftI dst shift));
8477   effect(KILL cr);
8478 
8479   format %{ "sarl    $dst, $shift" %}
8480   opcode(0xD3, 0x7); /* D3 /7 */
8481   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8482   ins_pipe(ialu_reg_reg);
8483 %}
8484 
8485 // Arithmetic Shift Right by variable
8486 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8487 %{
8488   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8489   effect(KILL cr);
8490 
8491   format %{ "sarl    $dst, $shift" %}
8492   opcode(0xD3, 0x7); /* D3 /7 */
8493   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8494   ins_pipe(ialu_mem_reg);
8495 %}
8496 
8497 // Logical shift right by one
8498 instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
8499 %{
8500   match(Set dst (URShiftI dst shift));
8501   effect(KILL cr);
8502 
8503   format %{ "shrl    $dst, $shift" %}
8504   opcode(0xD1, 0x5); /* D1 /5 */
8505   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8506   ins_pipe(ialu_reg);
8507 %}
8508 
8509 // Logical shift right by one
8510 instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8511 %{
8512   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8513   effect(KILL cr);
8514 
8515   format %{ "shrl    $dst, $shift" %}
8516   opcode(0xD1, 0x5); /* D1 /5 */
8517   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8518   ins_pipe(ialu_mem_imm);
8519 %}
8520 
8521 // Logical Shift Right by 8-bit immediate
8522 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
8523 %{
8524   match(Set dst (URShiftI dst shift));
8525   effect(KILL cr);
8526 
8527   format %{ "shrl    $dst, $shift" %}
8528   opcode(0xC1, 0x5); /* C1 /5 ib */
8529   ins_encode(reg_opc_imm(dst, shift));
8530   ins_pipe(ialu_reg);
8531 %}
8532 
8533 // Logical Shift Right by 8-bit immediate
8534 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8535 %{
8536   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8537   effect(KILL cr);
8538 
8539   format %{ "shrl    $dst, $shift" %}
8540   opcode(0xC1, 0x5); /* C1 /5 ib */
8541   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst), Con8or32(shift));
8542   ins_pipe(ialu_mem_imm);
8543 %}
8544 
8545 // Logical Shift Right by variable
8546 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
8547 %{
8548   match(Set dst (URShiftI dst shift));
8549   effect(KILL cr);
8550 
8551   format %{ "shrl    $dst, $shift" %}
8552   opcode(0xD3, 0x5); /* D3 /5 */
8553   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8554   ins_pipe(ialu_reg_reg);
8555 %}
8556 
8557 // Logical Shift Right by variable
8558 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8559 %{
8560   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
8561   effect(KILL cr);
8562 
8563   format %{ "shrl    $dst, $shift" %}
8564   opcode(0xD3, 0x5); /* D3 /5 */
8565   ins_encode(REX_mem(dst), OpcP, RM_opc_mem(secondary, dst));
8566   ins_pipe(ialu_mem_reg);
8567 %}
8568 
8569 // Long Shift Instructions
8570 // Shift Left by one
8571 instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8572 %{
8573   match(Set dst (LShiftL dst shift));
8574   effect(KILL cr);
8575 
8576   format %{ "salq    $dst, $shift" %}
8577   opcode(0xD1, 0x4); /* D1 /4 */
8578   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8579   ins_pipe(ialu_reg);
8580 %}
8581 
8582 // Shift Left by one
8583 instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8584 %{
8585   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8586   effect(KILL cr);
8587 
8588   format %{ "salq    $dst, $shift" %}
8589   opcode(0xD1, 0x4); /* D1 /4 */
8590   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8591   ins_pipe(ialu_mem_imm);
8592 %}
8593 
8594 // Shift Left by 8-bit immediate
8595 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8596 %{
8597   match(Set dst (LShiftL dst shift));
8598   effect(KILL cr);
8599 
8600   format %{ "salq    $dst, $shift" %}
8601   opcode(0xC1, 0x4); /* C1 /4 ib */
8602   ins_encode(reg_opc_imm_wide(dst, shift));
8603   ins_pipe(ialu_reg);
8604 %}
8605 
8606 // Shift Left by 8-bit immediate
8607 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8608 %{
8609   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8610   effect(KILL cr);
8611 
8612   format %{ "salq    $dst, $shift" %}
8613   opcode(0xC1, 0x4); /* C1 /4 ib */
8614   ins_encode(REX_mem_wide(dst), OpcP,
8615              RM_opc_mem(secondary, dst), Con8or32(shift));
8616   ins_pipe(ialu_mem_imm);
8617 %}
8618 
8619 // Shift Left by variable
8620 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8621 %{
8622   match(Set dst (LShiftL dst shift));
8623   effect(KILL cr);
8624 
8625   format %{ "salq    $dst, $shift" %}
8626   opcode(0xD3, 0x4); /* D3 /4 */
8627   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8628   ins_pipe(ialu_reg_reg);
8629 %}
8630 
8631 // Shift Left by variable
8632 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8633 %{
8634   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
8635   effect(KILL cr);
8636 
8637   format %{ "salq    $dst, $shift" %}
8638   opcode(0xD3, 0x4); /* D3 /4 */
8639   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8640   ins_pipe(ialu_mem_reg);
8641 %}
8642 
8643 // Arithmetic shift right by one
8644 instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8645 %{
8646   match(Set dst (RShiftL dst shift));
8647   effect(KILL cr);
8648 
8649   format %{ "sarq    $dst, $shift" %}
8650   opcode(0xD1, 0x7); /* D1 /7 */
8651   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8652   ins_pipe(ialu_reg);
8653 %}
8654 
8655 // Arithmetic shift right by one
8656 instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8657 %{
8658   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8659   effect(KILL cr);
8660 
8661   format %{ "sarq    $dst, $shift" %}
8662   opcode(0xD1, 0x7); /* D1 /7 */
8663   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8664   ins_pipe(ialu_mem_imm);
8665 %}
8666 
8667 // Arithmetic Shift Right by 8-bit immediate
8668 instruct sarL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8669 %{
8670   match(Set dst (RShiftL dst shift));
8671   effect(KILL cr);
8672 
8673   format %{ "sarq    $dst, $shift" %}
8674   opcode(0xC1, 0x7); /* C1 /7 ib */
8675   ins_encode(reg_opc_imm_wide(dst, shift));
8676   ins_pipe(ialu_mem_imm);
8677 %}
8678 
8679 // Arithmetic Shift Right by 8-bit immediate
8680 instruct sarL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8681 %{
8682   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8683   effect(KILL cr);
8684 
8685   format %{ "sarq    $dst, $shift" %}
8686   opcode(0xC1, 0x7); /* C1 /7 ib */
8687   ins_encode(REX_mem_wide(dst), OpcP,
8688              RM_opc_mem(secondary, dst), Con8or32(shift));
8689   ins_pipe(ialu_mem_imm);
8690 %}
8691 
8692 // Arithmetic Shift Right by variable
8693 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8694 %{
8695   match(Set dst (RShiftL dst shift));
8696   effect(KILL cr);
8697 
8698   format %{ "sarq    $dst, $shift" %}
8699   opcode(0xD3, 0x7); /* D3 /7 */
8700   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8701   ins_pipe(ialu_reg_reg);
8702 %}
8703 
8704 // Arithmetic Shift Right by variable
8705 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8706 %{
8707   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
8708   effect(KILL cr);
8709 
8710   format %{ "sarq    $dst, $shift" %}
8711   opcode(0xD3, 0x7); /* D3 /7 */
8712   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8713   ins_pipe(ialu_mem_reg);
8714 %}
8715 
8716 // Logical shift right by one
8717 instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
8718 %{
8719   match(Set dst (URShiftL dst shift));
8720   effect(KILL cr);
8721 
8722   format %{ "shrq    $dst, $shift" %}
8723   opcode(0xD1, 0x5); /* D1 /5 */
8724   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst ));
8725   ins_pipe(ialu_reg);
8726 %}
8727 
8728 // Logical shift right by one
8729 instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
8730 %{
8731   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8732   effect(KILL cr);
8733 
8734   format %{ "shrq    $dst, $shift" %}
8735   opcode(0xD1, 0x5); /* D1 /5 */
8736   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8737   ins_pipe(ialu_mem_imm);
8738 %}
8739 
8740 // Logical Shift Right by 8-bit immediate
8741 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
8742 %{
8743   match(Set dst (URShiftL dst shift));
8744   effect(KILL cr);
8745 
8746   format %{ "shrq    $dst, $shift" %}
8747   opcode(0xC1, 0x5); /* C1 /5 ib */
8748   ins_encode(reg_opc_imm_wide(dst, shift));
8749   ins_pipe(ialu_reg);
8750 %}
8751 
8752 
8753 // Logical Shift Right by 8-bit immediate
8754 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
8755 %{
8756   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8757   effect(KILL cr);
8758 
8759   format %{ "shrq    $dst, $shift" %}
8760   opcode(0xC1, 0x5); /* C1 /5 ib */
8761   ins_encode(REX_mem_wide(dst), OpcP,
8762              RM_opc_mem(secondary, dst), Con8or32(shift));
8763   ins_pipe(ialu_mem_imm);
8764 %}
8765 
8766 // Logical Shift Right by variable
8767 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
8768 %{
8769   match(Set dst (URShiftL dst shift));
8770   effect(KILL cr);
8771 
8772   format %{ "shrq    $dst, $shift" %}
8773   opcode(0xD3, 0x5); /* D3 /5 */
8774   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8775   ins_pipe(ialu_reg_reg);
8776 %}
8777 
8778 // Logical Shift Right by variable
8779 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
8780 %{
8781   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
8782   effect(KILL cr);
8783 
8784   format %{ "shrq    $dst, $shift" %}
8785   opcode(0xD3, 0x5); /* D3 /5 */
8786   ins_encode(REX_mem_wide(dst), OpcP, RM_opc_mem(secondary, dst));
8787   ins_pipe(ialu_mem_reg);
8788 %}
8789 
8790 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8791 // This idiom is used by the compiler for the i2b bytecode.
8792 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
8793 %{
8794   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8795 
8796   format %{ "movsbl  $dst, $src\t# i2b" %}
8797   opcode(0x0F, 0xBE);
8798   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8799   ins_pipe(ialu_reg_reg);
8800 %}
8801 
8802 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8803 // This idiom is used by the compiler the i2s bytecode.
8804 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
8805 %{
8806   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8807 
8808   format %{ "movswl  $dst, $src\t# i2s" %}
8809   opcode(0x0F, 0xBF);
8810   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
8811   ins_pipe(ialu_reg_reg);
8812 %}
8813 
8814 // ROL/ROR instructions
8815 
8816 // ROL expand
8817 instruct rolI_rReg_imm1(rRegI dst, rFlagsReg cr) %{
8818   effect(KILL cr, USE_DEF dst);
8819 
8820   format %{ "roll    $dst" %}
8821   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8822   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8823   ins_pipe(ialu_reg);
8824 %}
8825 
8826 instruct rolI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr) %{
8827   effect(USE_DEF dst, USE shift, KILL cr);
8828 
8829   format %{ "roll    $dst, $shift" %}
8830   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8831   ins_encode( reg_opc_imm(dst, shift) );
8832   ins_pipe(ialu_reg);
8833 %}
8834 
8835 instruct rolI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8836 %{
8837   effect(USE_DEF dst, USE shift, KILL cr);
8838 
8839   format %{ "roll    $dst, $shift" %}
8840   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8841   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8842   ins_pipe(ialu_reg_reg);
8843 %}
8844 // end of ROL expand
8845 
8846 // Rotate Left by one
8847 instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8848 %{
8849   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8850 
8851   expand %{
8852     rolI_rReg_imm1(dst, cr);
8853   %}
8854 %}
8855 
8856 // Rotate Left by 8-bit immediate
8857 instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
8858 %{
8859   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8860   match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8861 
8862   expand %{
8863     rolI_rReg_imm8(dst, lshift, cr);
8864   %}
8865 %}
8866 
8867 // Rotate Left by variable
8868 instruct rolI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8869 %{
8870   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8871 
8872   expand %{
8873     rolI_rReg_CL(dst, shift, cr);
8874   %}
8875 %}
8876 
8877 // Rotate Left by variable
8878 instruct rolI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8879 %{
8880   match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8881 
8882   expand %{
8883     rolI_rReg_CL(dst, shift, cr);
8884   %}
8885 %}
8886 
8887 // ROR expand
8888 instruct rorI_rReg_imm1(rRegI dst, rFlagsReg cr)
8889 %{
8890   effect(USE_DEF dst, KILL cr);
8891 
8892   format %{ "rorl    $dst" %}
8893   opcode(0xD1, 0x1); /* D1 /1 */
8894   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8895   ins_pipe(ialu_reg);
8896 %}
8897 
8898 instruct rorI_rReg_imm8(rRegI dst, immI8 shift, rFlagsReg cr)
8899 %{
8900   effect(USE_DEF dst, USE shift, KILL cr);
8901 
8902   format %{ "rorl    $dst, $shift" %}
8903   opcode(0xC1, 0x1); /* C1 /1 ib */
8904   ins_encode(reg_opc_imm(dst, shift));
8905   ins_pipe(ialu_reg);
8906 %}
8907 
8908 instruct rorI_rReg_CL(no_rcx_RegI dst, rcx_RegI shift, rFlagsReg cr)
8909 %{
8910   effect(USE_DEF dst, USE shift, KILL cr);
8911 
8912   format %{ "rorl    $dst, $shift" %}
8913   opcode(0xD3, 0x1); /* D3 /1 */
8914   ins_encode(REX_reg(dst), OpcP, reg_opc(dst));
8915   ins_pipe(ialu_reg_reg);
8916 %}
8917 // end of ROR expand
8918 
8919 // Rotate Right by one
8920 instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
8921 %{
8922   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8923 
8924   expand %{
8925     rorI_rReg_imm1(dst, cr);
8926   %}
8927 %}
8928 
8929 // Rotate Right by 8-bit immediate
8930 instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
8931 %{
8932   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8933   match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8934 
8935   expand %{
8936     rorI_rReg_imm8(dst, rshift, cr);
8937   %}
8938 %}
8939 
8940 // Rotate Right by variable
8941 instruct rorI_rReg_Var_C0(no_rcx_RegI dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
8942 %{
8943   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8944 
8945   expand %{
8946     rorI_rReg_CL(dst, shift, cr);
8947   %}
8948 %}
8949 
8950 // Rotate Right by variable
8951 instruct rorI_rReg_Var_C32(no_rcx_RegI dst, rcx_RegI shift, immI_32 c32, rFlagsReg cr)
8952 %{
8953   match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8954 
8955   expand %{
8956     rorI_rReg_CL(dst, shift, cr);
8957   %}
8958 %}
8959 
8960 // for long rotate
8961 // ROL expand
8962 instruct rolL_rReg_imm1(rRegL dst, rFlagsReg cr) %{
8963   effect(USE_DEF dst, KILL cr);
8964 
8965   format %{ "rolq    $dst" %}
8966   opcode(0xD1, 0x0); /* Opcode  D1 /0 */
8967   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8968   ins_pipe(ialu_reg);
8969 %}
8970 
8971 instruct rolL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr) %{
8972   effect(USE_DEF dst, USE shift, KILL cr);
8973 
8974   format %{ "rolq    $dst, $shift" %}
8975   opcode(0xC1, 0x0); /* Opcode C1 /0 ib */
8976   ins_encode( reg_opc_imm_wide(dst, shift) );
8977   ins_pipe(ialu_reg);
8978 %}
8979 
8980 instruct rolL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
8981 %{
8982   effect(USE_DEF dst, USE shift, KILL cr);
8983 
8984   format %{ "rolq    $dst, $shift" %}
8985   opcode(0xD3, 0x0); /* Opcode D3 /0 */
8986   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
8987   ins_pipe(ialu_reg_reg);
8988 %}
8989 // end of ROL expand
8990 
8991 // Rotate Left by one
8992 instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift, rFlagsReg cr)
8993 %{
8994   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
8995 
8996   expand %{
8997     rolL_rReg_imm1(dst, cr);
8998   %}
8999 %}
9000 
9001 // Rotate Left by 8-bit immediate
9002 instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift, rFlagsReg cr)
9003 %{
9004   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9005   match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
9006 
9007   expand %{
9008     rolL_rReg_imm8(dst, lshift, cr);
9009   %}
9010 %}
9011 
9012 // Rotate Left by variable
9013 instruct rolL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9014 %{
9015   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
9016 
9017   expand %{
9018     rolL_rReg_CL(dst, shift, cr);
9019   %}
9020 %}
9021 
9022 // Rotate Left by variable
9023 instruct rolL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9024 %{
9025   match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
9026 
9027   expand %{
9028     rolL_rReg_CL(dst, shift, cr);
9029   %}
9030 %}
9031 
9032 // ROR expand
9033 instruct rorL_rReg_imm1(rRegL dst, rFlagsReg cr)
9034 %{
9035   effect(USE_DEF dst, KILL cr);
9036 
9037   format %{ "rorq    $dst" %}
9038   opcode(0xD1, 0x1); /* D1 /1 */
9039   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9040   ins_pipe(ialu_reg);
9041 %}
9042 
9043 instruct rorL_rReg_imm8(rRegL dst, immI8 shift, rFlagsReg cr)
9044 %{
9045   effect(USE_DEF dst, USE shift, KILL cr);
9046 
9047   format %{ "rorq    $dst, $shift" %}
9048   opcode(0xC1, 0x1); /* C1 /1 ib */
9049   ins_encode(reg_opc_imm_wide(dst, shift));
9050   ins_pipe(ialu_reg);
9051 %}
9052 
9053 instruct rorL_rReg_CL(no_rcx_RegL dst, rcx_RegI shift, rFlagsReg cr)
9054 %{
9055   effect(USE_DEF dst, USE shift, KILL cr);
9056 
9057   format %{ "rorq    $dst, $shift" %}
9058   opcode(0xD3, 0x1); /* D3 /1 */
9059   ins_encode(REX_reg_wide(dst), OpcP, reg_opc(dst));
9060   ins_pipe(ialu_reg_reg);
9061 %}
9062 // end of ROR expand
9063 
9064 // Rotate Right by one
9065 instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift, rFlagsReg cr)
9066 %{
9067   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9068 
9069   expand %{
9070     rorL_rReg_imm1(dst, cr);
9071   %}
9072 %}
9073 
9074 // Rotate Right by 8-bit immediate
9075 instruct rorL_rReg_i8(rRegL dst, immI8 rshift, immI8 lshift, rFlagsReg cr)
9076 %{
9077   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
9078   match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
9079 
9080   expand %{
9081     rorL_rReg_imm8(dst, rshift, cr);
9082   %}
9083 %}
9084 
9085 // Rotate Right by variable
9086 instruct rorL_rReg_Var_C0(no_rcx_RegL dst, rcx_RegI shift, immI0 zero, rFlagsReg cr)
9087 %{
9088   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
9089 
9090   expand %{
9091     rorL_rReg_CL(dst, shift, cr);
9092   %}
9093 %}
9094 
9095 // Rotate Right by variable
9096 instruct rorL_rReg_Var_C64(no_rcx_RegL dst, rcx_RegI shift, immI_64 c64, rFlagsReg cr)
9097 %{
9098   match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
9099 
9100   expand %{
9101     rorL_rReg_CL(dst, shift, cr);
9102   %}
9103 %}
9104 
9105 // Logical Instructions
9106 
9107 // Integer Logical Instructions
9108 
9109 // And Instructions
9110 // And Register with Register
9111 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9112 %{
9113   match(Set dst (AndI dst src));
9114   effect(KILL cr);
9115 
9116   format %{ "andl    $dst, $src\t# int" %}
9117   opcode(0x23);
9118   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9119   ins_pipe(ialu_reg_reg);
9120 %}
9121 
9122 // And Register with Immediate 255
9123 instruct andI_rReg_imm255(rRegI dst, immI_255 src)
9124 %{
9125   match(Set dst (AndI dst src));
9126 
9127   format %{ "movzbl  $dst, $dst\t# int & 0xFF" %}
9128   opcode(0x0F, 0xB6);
9129   ins_encode(REX_reg_breg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9130   ins_pipe(ialu_reg);
9131 %}
9132 
9133 // And Register with Immediate 255 and promote to long
9134 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
9135 %{
9136   match(Set dst (ConvI2L (AndI src mask)));
9137 
9138   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
9139   opcode(0x0F, 0xB6);
9140   ins_encode(REX_reg_breg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9141   ins_pipe(ialu_reg);
9142 %}
9143 
9144 // And Register with Immediate 65535
9145 instruct andI_rReg_imm65535(rRegI dst, immI_65535 src)
9146 %{
9147   match(Set dst (AndI dst src));
9148 
9149   format %{ "movzwl  $dst, $dst\t# int & 0xFFFF" %}
9150   opcode(0x0F, 0xB7);
9151   ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9152   ins_pipe(ialu_reg);
9153 %}
9154 
9155 // And Register with Immediate 65535 and promote to long
9156 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
9157 %{
9158   match(Set dst (ConvI2L (AndI src mask)));
9159 
9160   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
9161   opcode(0x0F, 0xB7);
9162   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
9163   ins_pipe(ialu_reg);
9164 %}
9165 
9166 // And Register with Immediate
9167 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9168 %{
9169   match(Set dst (AndI dst src));
9170   effect(KILL cr);
9171 
9172   format %{ "andl    $dst, $src\t# int" %}
9173   opcode(0x81, 0x04); /* Opcode 81 /4 */
9174   ins_encode(OpcSErm(dst, src), Con8or32(src));
9175   ins_pipe(ialu_reg);
9176 %}
9177 
9178 // And Register with Memory
9179 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9180 %{
9181   match(Set dst (AndI dst (LoadI src)));
9182   effect(KILL cr);
9183 
9184   ins_cost(125);
9185   format %{ "andl    $dst, $src\t# int" %}
9186   opcode(0x23);
9187   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9188   ins_pipe(ialu_reg_mem);
9189 %}
9190 
9191 // And Memory with Register
9192 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9193 %{
9194   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9195   effect(KILL cr);
9196 
9197   ins_cost(150);
9198   format %{ "andl    $dst, $src\t# int" %}
9199   opcode(0x21); /* Opcode 21 /r */
9200   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9201   ins_pipe(ialu_mem_reg);
9202 %}
9203 
9204 // And Memory with Immediate
9205 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
9206 %{
9207   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9208   effect(KILL cr);
9209 
9210   ins_cost(125);
9211   format %{ "andl    $dst, $src\t# int" %}
9212   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9213   ins_encode(REX_mem(dst), OpcSE(src),
9214              RM_opc_mem(secondary, dst), Con8or32(src));
9215   ins_pipe(ialu_mem_imm);
9216 %}
9217 
9218 // BMI1 instructions
9219 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
9220   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
9221   predicate(UseBMI1Instructions);
9222   effect(KILL cr);
9223 
9224   ins_cost(125);
9225   format %{ "andnl  $dst, $src1, $src2" %}
9226 
9227   ins_encode %{
9228     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
9229   %}
9230   ins_pipe(ialu_reg_mem);
9231 %}
9232 
9233 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
9234   match(Set dst (AndI (XorI src1 minus_1) src2));
9235   predicate(UseBMI1Instructions);
9236   effect(KILL cr);
9237 
9238   format %{ "andnl  $dst, $src1, $src2" %}
9239 
9240   ins_encode %{
9241     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
9242   %}
9243   ins_pipe(ialu_reg);
9244 %}
9245 
9246 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
9247   match(Set dst (AndI (SubI imm_zero src) src));
9248   predicate(UseBMI1Instructions);
9249   effect(KILL cr);
9250 
9251   format %{ "blsil  $dst, $src" %}
9252 
9253   ins_encode %{
9254     __ blsil($dst$$Register, $src$$Register);
9255   %}
9256   ins_pipe(ialu_reg);
9257 %}
9258 
9259 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
9260   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
9261   predicate(UseBMI1Instructions);
9262   effect(KILL cr);
9263 
9264   ins_cost(125);
9265   format %{ "blsil  $dst, $src" %}
9266 
9267   ins_encode %{
9268     __ blsil($dst$$Register, $src$$Address);
9269   %}
9270   ins_pipe(ialu_reg_mem);
9271 %}
9272 
9273 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9274 %{
9275   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
9276   predicate(UseBMI1Instructions);
9277   effect(KILL cr);
9278 
9279   ins_cost(125);
9280   format %{ "blsmskl $dst, $src" %}
9281 
9282   ins_encode %{
9283     __ blsmskl($dst$$Register, $src$$Address);
9284   %}
9285   ins_pipe(ialu_reg_mem);
9286 %}
9287 
9288 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9289 %{
9290   match(Set dst (XorI (AddI src minus_1) src));
9291   predicate(UseBMI1Instructions);
9292   effect(KILL cr);
9293 
9294   format %{ "blsmskl $dst, $src" %}
9295 
9296   ins_encode %{
9297     __ blsmskl($dst$$Register, $src$$Register);
9298   %}
9299 
9300   ins_pipe(ialu_reg);
9301 %}
9302 
9303 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
9304 %{
9305   match(Set dst (AndI (AddI src minus_1) src) );
9306   predicate(UseBMI1Instructions);
9307   effect(KILL cr);
9308 
9309   format %{ "blsrl  $dst, $src" %}
9310 
9311   ins_encode %{
9312     __ blsrl($dst$$Register, $src$$Register);
9313   %}
9314 
9315   ins_pipe(ialu_reg_mem);
9316 %}
9317 
9318 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
9319 %{
9320   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
9321   predicate(UseBMI1Instructions);
9322   effect(KILL cr);
9323 
9324   ins_cost(125);
9325   format %{ "blsrl  $dst, $src" %}
9326 
9327   ins_encode %{
9328     __ blsrl($dst$$Register, $src$$Address);
9329   %}
9330 
9331   ins_pipe(ialu_reg);
9332 %}
9333 
9334 // Or Instructions
9335 // Or Register with Register
9336 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9337 %{
9338   match(Set dst (OrI dst src));
9339   effect(KILL cr);
9340 
9341   format %{ "orl     $dst, $src\t# int" %}
9342   opcode(0x0B);
9343   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9344   ins_pipe(ialu_reg_reg);
9345 %}
9346 
9347 // Or Register with Immediate
9348 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9349 %{
9350   match(Set dst (OrI dst src));
9351   effect(KILL cr);
9352 
9353   format %{ "orl     $dst, $src\t# int" %}
9354   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9355   ins_encode(OpcSErm(dst, src), Con8or32(src));
9356   ins_pipe(ialu_reg);
9357 %}
9358 
9359 // Or Register with Memory
9360 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9361 %{
9362   match(Set dst (OrI dst (LoadI src)));
9363   effect(KILL cr);
9364 
9365   ins_cost(125);
9366   format %{ "orl     $dst, $src\t# int" %}
9367   opcode(0x0B);
9368   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9369   ins_pipe(ialu_reg_mem);
9370 %}
9371 
9372 // Or Memory with Register
9373 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9374 %{
9375   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9376   effect(KILL cr);
9377 
9378   ins_cost(150);
9379   format %{ "orl     $dst, $src\t# int" %}
9380   opcode(0x09); /* Opcode 09 /r */
9381   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9382   ins_pipe(ialu_mem_reg);
9383 %}
9384 
9385 // Or Memory with Immediate
9386 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
9387 %{
9388   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9389   effect(KILL cr);
9390 
9391   ins_cost(125);
9392   format %{ "orl     $dst, $src\t# int" %}
9393   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9394   ins_encode(REX_mem(dst), OpcSE(src),
9395              RM_opc_mem(secondary, dst), Con8or32(src));
9396   ins_pipe(ialu_mem_imm);
9397 %}
9398 
9399 // Xor Instructions
9400 // Xor Register with Register
9401 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9402 %{
9403   match(Set dst (XorI dst src));
9404   effect(KILL cr);
9405 
9406   format %{ "xorl    $dst, $src\t# int" %}
9407   opcode(0x33);
9408   ins_encode(REX_reg_reg(dst, src), OpcP, reg_reg(dst, src));
9409   ins_pipe(ialu_reg_reg);
9410 %}
9411 
9412 // Xor Register with Immediate -1
9413 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
9414   match(Set dst (XorI dst imm));
9415 
9416   format %{ "not    $dst" %}
9417   ins_encode %{
9418      __ notl($dst$$Register);
9419   %}
9420   ins_pipe(ialu_reg);
9421 %}
9422 
9423 // Xor Register with Immediate
9424 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9425 %{
9426   match(Set dst (XorI dst src));
9427   effect(KILL cr);
9428 
9429   format %{ "xorl    $dst, $src\t# int" %}
9430   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9431   ins_encode(OpcSErm(dst, src), Con8or32(src));
9432   ins_pipe(ialu_reg);
9433 %}
9434 
9435 // Xor Register with Memory
9436 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9437 %{
9438   match(Set dst (XorI dst (LoadI src)));
9439   effect(KILL cr);
9440 
9441   ins_cost(125);
9442   format %{ "xorl    $dst, $src\t# int" %}
9443   opcode(0x33);
9444   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
9445   ins_pipe(ialu_reg_mem);
9446 %}
9447 
9448 // Xor Memory with Register
9449 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9450 %{
9451   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9452   effect(KILL cr);
9453 
9454   ins_cost(150);
9455   format %{ "xorl    $dst, $src\t# int" %}
9456   opcode(0x31); /* Opcode 31 /r */
9457   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
9458   ins_pipe(ialu_mem_reg);
9459 %}
9460 
9461 // Xor Memory with Immediate
9462 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
9463 %{
9464   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9465   effect(KILL cr);
9466 
9467   ins_cost(125);
9468   format %{ "xorl    $dst, $src\t# int" %}
9469   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9470   ins_encode(REX_mem(dst), OpcSE(src),
9471              RM_opc_mem(secondary, dst), Con8or32(src));
9472   ins_pipe(ialu_mem_imm);
9473 %}
9474 
9475 
9476 // Long Logical Instructions
9477 
9478 // And Instructions
9479 // And Register with Register
9480 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9481 %{
9482   match(Set dst (AndL dst src));
9483   effect(KILL cr);
9484 
9485   format %{ "andq    $dst, $src\t# long" %}
9486   opcode(0x23);
9487   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9488   ins_pipe(ialu_reg_reg);
9489 %}
9490 
9491 // And Register with Immediate 255
9492 instruct andL_rReg_imm255(rRegL dst, immL_255 src)
9493 %{
9494   match(Set dst (AndL dst src));
9495 
9496   format %{ "movzbq  $dst, $dst\t# long & 0xFF" %}
9497   opcode(0x0F, 0xB6);
9498   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9499   ins_pipe(ialu_reg);
9500 %}
9501 
9502 // And Register with Immediate 65535
9503 instruct andL_rReg_imm65535(rRegL dst, immL_65535 src)
9504 %{
9505   match(Set dst (AndL dst src));
9506 
9507   format %{ "movzwq  $dst, $dst\t# long & 0xFFFF" %}
9508   opcode(0x0F, 0xB7);
9509   ins_encode(REX_reg_reg_wide(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
9510   ins_pipe(ialu_reg);
9511 %}
9512 
9513 // And Register with Immediate
9514 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9515 %{
9516   match(Set dst (AndL dst src));
9517   effect(KILL cr);
9518 
9519   format %{ "andq    $dst, $src\t# long" %}
9520   opcode(0x81, 0x04); /* Opcode 81 /4 */
9521   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9522   ins_pipe(ialu_reg);
9523 %}
9524 
9525 // And Register with Memory
9526 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9527 %{
9528   match(Set dst (AndL dst (LoadL src)));
9529   effect(KILL cr);
9530 
9531   ins_cost(125);
9532   format %{ "andq    $dst, $src\t# long" %}
9533   opcode(0x23);
9534   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9535   ins_pipe(ialu_reg_mem);
9536 %}
9537 
9538 // And Memory with Register
9539 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9540 %{
9541   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9542   effect(KILL cr);
9543 
9544   ins_cost(150);
9545   format %{ "andq    $dst, $src\t# long" %}
9546   opcode(0x21); /* Opcode 21 /r */
9547   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9548   ins_pipe(ialu_mem_reg);
9549 %}
9550 
9551 // And Memory with Immediate
9552 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9553 %{
9554   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
9555   effect(KILL cr);
9556 
9557   ins_cost(125);
9558   format %{ "andq    $dst, $src\t# long" %}
9559   opcode(0x81, 0x4); /* Opcode 81 /4 id */
9560   ins_encode(REX_mem_wide(dst), OpcSE(src),
9561              RM_opc_mem(secondary, dst), Con8or32(src));
9562   ins_pipe(ialu_mem_imm);
9563 %}
9564 
9565 // BMI1 instructions
9566 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
9567   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
9568   predicate(UseBMI1Instructions);
9569   effect(KILL cr);
9570 
9571   ins_cost(125);
9572   format %{ "andnq  $dst, $src1, $src2" %}
9573 
9574   ins_encode %{
9575     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
9576   %}
9577   ins_pipe(ialu_reg_mem);
9578 %}
9579 
9580 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
9581   match(Set dst (AndL (XorL src1 minus_1) src2));
9582   predicate(UseBMI1Instructions);
9583   effect(KILL cr);
9584 
9585   format %{ "andnq  $dst, $src1, $src2" %}
9586 
9587   ins_encode %{
9588   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
9589   %}
9590   ins_pipe(ialu_reg_mem);
9591 %}
9592 
9593 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
9594   match(Set dst (AndL (SubL imm_zero src) src));
9595   predicate(UseBMI1Instructions);
9596   effect(KILL cr);
9597 
9598   format %{ "blsiq  $dst, $src" %}
9599 
9600   ins_encode %{
9601     __ blsiq($dst$$Register, $src$$Register);
9602   %}
9603   ins_pipe(ialu_reg);
9604 %}
9605 
9606 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
9607   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9608   predicate(UseBMI1Instructions);
9609   effect(KILL cr);
9610 
9611   ins_cost(125);
9612   format %{ "blsiq  $dst, $src" %}
9613 
9614   ins_encode %{
9615     __ blsiq($dst$$Register, $src$$Address);
9616   %}
9617   ins_pipe(ialu_reg_mem);
9618 %}
9619 
9620 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
9621 %{
9622   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
9623   predicate(UseBMI1Instructions);
9624   effect(KILL cr);
9625 
9626   ins_cost(125);
9627   format %{ "blsmskq $dst, $src" %}
9628 
9629   ins_encode %{
9630     __ blsmskq($dst$$Register, $src$$Address);
9631   %}
9632   ins_pipe(ialu_reg_mem);
9633 %}
9634 
9635 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
9636 %{
9637   match(Set dst (XorL (AddL src minus_1) src));
9638   predicate(UseBMI1Instructions);
9639   effect(KILL cr);
9640 
9641   format %{ "blsmskq $dst, $src" %}
9642 
9643   ins_encode %{
9644     __ blsmskq($dst$$Register, $src$$Register);
9645   %}
9646 
9647   ins_pipe(ialu_reg);
9648 %}
9649 
9650 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
9651 %{
9652   match(Set dst (AndL (AddL src minus_1) src) );
9653   predicate(UseBMI1Instructions);
9654   effect(KILL cr);
9655 
9656   format %{ "blsrq  $dst, $src" %}
9657 
9658   ins_encode %{
9659     __ blsrq($dst$$Register, $src$$Register);
9660   %}
9661 
9662   ins_pipe(ialu_reg);
9663 %}
9664 
9665 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
9666 %{
9667   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
9668   predicate(UseBMI1Instructions);
9669   effect(KILL cr);
9670 
9671   ins_cost(125);
9672   format %{ "blsrq  $dst, $src" %}
9673 
9674   ins_encode %{
9675     __ blsrq($dst$$Register, $src$$Address);
9676   %}
9677 
9678   ins_pipe(ialu_reg);
9679 %}
9680 
9681 // Or Instructions
9682 // Or Register with Register
9683 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9684 %{
9685   match(Set dst (OrL dst src));
9686   effect(KILL cr);
9687 
9688   format %{ "orq     $dst, $src\t# long" %}
9689   opcode(0x0B);
9690   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9691   ins_pipe(ialu_reg_reg);
9692 %}
9693 
9694 // Use any_RegP to match R15 (TLS register) without spilling.
9695 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
9696   match(Set dst (OrL dst (CastP2X src)));
9697   effect(KILL cr);
9698 
9699   format %{ "orq     $dst, $src\t# long" %}
9700   opcode(0x0B);
9701   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9702   ins_pipe(ialu_reg_reg);
9703 %}
9704 
9705 
9706 // Or Register with Immediate
9707 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9708 %{
9709   match(Set dst (OrL dst src));
9710   effect(KILL cr);
9711 
9712   format %{ "orq     $dst, $src\t# long" %}
9713   opcode(0x81, 0x01); /* Opcode 81 /1 id */
9714   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9715   ins_pipe(ialu_reg);
9716 %}
9717 
9718 // Or Register with Memory
9719 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9720 %{
9721   match(Set dst (OrL dst (LoadL src)));
9722   effect(KILL cr);
9723 
9724   ins_cost(125);
9725   format %{ "orq     $dst, $src\t# long" %}
9726   opcode(0x0B);
9727   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9728   ins_pipe(ialu_reg_mem);
9729 %}
9730 
9731 // Or Memory with Register
9732 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9733 %{
9734   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9735   effect(KILL cr);
9736 
9737   ins_cost(150);
9738   format %{ "orq     $dst, $src\t# long" %}
9739   opcode(0x09); /* Opcode 09 /r */
9740   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9741   ins_pipe(ialu_mem_reg);
9742 %}
9743 
9744 // Or Memory with Immediate
9745 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9746 %{
9747   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
9748   effect(KILL cr);
9749 
9750   ins_cost(125);
9751   format %{ "orq     $dst, $src\t# long" %}
9752   opcode(0x81, 0x1); /* Opcode 81 /1 id */
9753   ins_encode(REX_mem_wide(dst), OpcSE(src),
9754              RM_opc_mem(secondary, dst), Con8or32(src));
9755   ins_pipe(ialu_mem_imm);
9756 %}
9757 
9758 // Xor Instructions
9759 // Xor Register with Register
9760 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
9761 %{
9762   match(Set dst (XorL dst src));
9763   effect(KILL cr);
9764 
9765   format %{ "xorq    $dst, $src\t# long" %}
9766   opcode(0x33);
9767   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
9768   ins_pipe(ialu_reg_reg);
9769 %}
9770 
9771 // Xor Register with Immediate -1
9772 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
9773   match(Set dst (XorL dst imm));
9774 
9775   format %{ "notq   $dst" %}
9776   ins_encode %{
9777      __ notq($dst$$Register);
9778   %}
9779   ins_pipe(ialu_reg);
9780 %}
9781 
9782 // Xor Register with Immediate
9783 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
9784 %{
9785   match(Set dst (XorL dst src));
9786   effect(KILL cr);
9787 
9788   format %{ "xorq    $dst, $src\t# long" %}
9789   opcode(0x81, 0x06); /* Opcode 81 /6 id */
9790   ins_encode(OpcSErm_wide(dst, src), Con8or32(src));
9791   ins_pipe(ialu_reg);
9792 %}
9793 
9794 // Xor Register with Memory
9795 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
9796 %{
9797   match(Set dst (XorL dst (LoadL src)));
9798   effect(KILL cr);
9799 
9800   ins_cost(125);
9801   format %{ "xorq    $dst, $src\t# long" %}
9802   opcode(0x33);
9803   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
9804   ins_pipe(ialu_reg_mem);
9805 %}
9806 
9807 // Xor Memory with Register
9808 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
9809 %{
9810   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9811   effect(KILL cr);
9812 
9813   ins_cost(150);
9814   format %{ "xorq    $dst, $src\t# long" %}
9815   opcode(0x31); /* Opcode 31 /r */
9816   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
9817   ins_pipe(ialu_mem_reg);
9818 %}
9819 
9820 // Xor Memory with Immediate
9821 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
9822 %{
9823   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
9824   effect(KILL cr);
9825 
9826   ins_cost(125);
9827   format %{ "xorq    $dst, $src\t# long" %}
9828   opcode(0x81, 0x6); /* Opcode 81 /6 id */
9829   ins_encode(REX_mem_wide(dst), OpcSE(src),
9830              RM_opc_mem(secondary, dst), Con8or32(src));
9831   ins_pipe(ialu_mem_imm);
9832 %}
9833 
9834 // Convert Int to Boolean
9835 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
9836 %{
9837   match(Set dst (Conv2B src));
9838   effect(KILL cr);
9839 
9840   format %{ "testl   $src, $src\t# ci2b\n\t"
9841             "setnz   $dst\n\t"
9842             "movzbl  $dst, $dst" %}
9843   ins_encode(REX_reg_reg(src, src), opc_reg_reg(0x85, src, src), // testl
9844              setNZ_reg(dst),
9845              REX_reg_breg(dst, dst), // movzbl
9846              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9847   ins_pipe(pipe_slow); // XXX
9848 %}
9849 
9850 // Convert Pointer to Boolean
9851 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
9852 %{
9853   match(Set dst (Conv2B src));
9854   effect(KILL cr);
9855 
9856   format %{ "testq   $src, $src\t# cp2b\n\t"
9857             "setnz   $dst\n\t"
9858             "movzbl  $dst, $dst" %}
9859   ins_encode(REX_reg_reg_wide(src, src), opc_reg_reg(0x85, src, src), // testq
9860              setNZ_reg(dst),
9861              REX_reg_breg(dst, dst), // movzbl
9862              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst));
9863   ins_pipe(pipe_slow); // XXX
9864 %}
9865 
9866 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
9867 %{
9868   match(Set dst (CmpLTMask p q));
9869   effect(KILL cr);
9870 
9871   ins_cost(400);
9872   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
9873             "setlt   $dst\n\t"
9874             "movzbl  $dst, $dst\n\t"
9875             "negl    $dst" %}
9876   ins_encode(REX_reg_reg(p, q), opc_reg_reg(0x3B, p, q), // cmpl
9877              setLT_reg(dst),
9878              REX_reg_breg(dst, dst), // movzbl
9879              Opcode(0x0F), Opcode(0xB6), reg_reg(dst, dst),
9880              neg_reg(dst));
9881   ins_pipe(pipe_slow);
9882 %}
9883 
9884 instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
9885 %{
9886   match(Set dst (CmpLTMask dst zero));
9887   effect(KILL cr);
9888 
9889   ins_cost(100);
9890   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
9891   ins_encode %{
9892   __ sarl($dst$$Register, 31);
9893   %}
9894   ins_pipe(ialu_reg);
9895 %}
9896 
9897 /* Better to save a register than avoid a branch */
9898 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9899 %{
9900   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9901   effect(KILL cr);
9902   ins_cost(300);
9903   format %{ "subl   $p,$q\t# cadd_cmpLTMask\n\t"
9904             "jge    done\n\t"
9905             "addl   $p,$y\n"
9906             "done:  " %}
9907   ins_encode %{
9908     Register Rp = $p$$Register;
9909     Register Rq = $q$$Register;
9910     Register Ry = $y$$Register;
9911     Label done;
9912     __ subl(Rp, Rq);
9913     __ jccb(Assembler::greaterEqual, done);
9914     __ addl(Rp, Ry);
9915     __ bind(done);
9916   %}
9917   ins_pipe(pipe_cmplt);
9918 %}
9919 
9920 /* Better to save a register than avoid a branch */
9921 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
9922 %{
9923   match(Set y (AndI (CmpLTMask p q) y));
9924   effect(KILL cr);
9925 
9926   ins_cost(300);
9927 
9928   format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
9929             "jlt      done\n\t"
9930             "xorl     $y, $y\n"
9931             "done:  " %}
9932   ins_encode %{
9933     Register Rp = $p$$Register;
9934     Register Rq = $q$$Register;
9935     Register Ry = $y$$Register;
9936     Label done;
9937     __ cmpl(Rp, Rq);
9938     __ jccb(Assembler::less, done);
9939     __ xorl(Ry, Ry);
9940     __ bind(done);
9941   %}
9942   ins_pipe(pipe_cmplt);
9943 %}
9944 
9945 
9946 //---------- FP Instructions------------------------------------------------
9947 
9948 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
9949 %{
9950   match(Set cr (CmpF src1 src2));
9951 
9952   ins_cost(145);
9953   format %{ "ucomiss $src1, $src2\n\t"
9954             "jnp,s   exit\n\t"
9955             "pushfq\t# saw NaN, set CF\n\t"
9956             "andq    [rsp], #0xffffff2b\n\t"
9957             "popfq\n"
9958     "exit:" %}
9959   ins_encode %{
9960     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9961     emit_cmpfp_fixup(_masm);
9962   %}
9963   ins_pipe(pipe_slow);
9964 %}
9965 
9966 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
9967   match(Set cr (CmpF src1 src2));
9968 
9969   ins_cost(100);
9970   format %{ "ucomiss $src1, $src2" %}
9971   ins_encode %{
9972     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9973   %}
9974   ins_pipe(pipe_slow);
9975 %}
9976 
9977 instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
9978 %{
9979   match(Set cr (CmpF src1 (LoadF src2)));
9980 
9981   ins_cost(145);
9982   format %{ "ucomiss $src1, $src2\n\t"
9983             "jnp,s   exit\n\t"
9984             "pushfq\t# saw NaN, set CF\n\t"
9985             "andq    [rsp], #0xffffff2b\n\t"
9986             "popfq\n"
9987     "exit:" %}
9988   ins_encode %{
9989     __ ucomiss($src1$$XMMRegister, $src2$$Address);
9990     emit_cmpfp_fixup(_masm);
9991   %}
9992   ins_pipe(pipe_slow);
9993 %}
9994 
9995 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
9996   match(Set cr (CmpF src1 (LoadF src2)));
9997 
9998   ins_cost(100);
9999   format %{ "ucomiss $src1, $src2" %}
10000   ins_encode %{
10001     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10002   %}
10003   ins_pipe(pipe_slow);
10004 %}
10005 
10006 instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
10007   match(Set cr (CmpF src con));
10008 
10009   ins_cost(145);
10010   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10011             "jnp,s   exit\n\t"
10012             "pushfq\t# saw NaN, set CF\n\t"
10013             "andq    [rsp], #0xffffff2b\n\t"
10014             "popfq\n"
10015     "exit:" %}
10016   ins_encode %{
10017     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10018     emit_cmpfp_fixup(_masm);
10019   %}
10020   ins_pipe(pipe_slow);
10021 %}
10022 
10023 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10024   match(Set cr (CmpF src con));
10025   ins_cost(100);
10026   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10027   ins_encode %{
10028     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10029   %}
10030   ins_pipe(pipe_slow);
10031 %}
10032 
10033 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10034 %{
10035   match(Set cr (CmpD src1 src2));
10036 
10037   ins_cost(145);
10038   format %{ "ucomisd $src1, $src2\n\t"
10039             "jnp,s   exit\n\t"
10040             "pushfq\t# saw NaN, set CF\n\t"
10041             "andq    [rsp], #0xffffff2b\n\t"
10042             "popfq\n"
10043     "exit:" %}
10044   ins_encode %{
10045     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10046     emit_cmpfp_fixup(_masm);
10047   %}
10048   ins_pipe(pipe_slow);
10049 %}
10050 
10051 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10052   match(Set cr (CmpD src1 src2));
10053 
10054   ins_cost(100);
10055   format %{ "ucomisd $src1, $src2 test" %}
10056   ins_encode %{
10057     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10058   %}
10059   ins_pipe(pipe_slow);
10060 %}
10061 
10062 instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
10063 %{
10064   match(Set cr (CmpD src1 (LoadD src2)));
10065 
10066   ins_cost(145);
10067   format %{ "ucomisd $src1, $src2\n\t"
10068             "jnp,s   exit\n\t"
10069             "pushfq\t# saw NaN, set CF\n\t"
10070             "andq    [rsp], #0xffffff2b\n\t"
10071             "popfq\n"
10072     "exit:" %}
10073   ins_encode %{
10074     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10075     emit_cmpfp_fixup(_masm);
10076   %}
10077   ins_pipe(pipe_slow);
10078 %}
10079 
10080 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10081   match(Set cr (CmpD src1 (LoadD src2)));
10082 
10083   ins_cost(100);
10084   format %{ "ucomisd $src1, $src2" %}
10085   ins_encode %{
10086     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10087   %}
10088   ins_pipe(pipe_slow);
10089 %}
10090 
10091 instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
10092   match(Set cr (CmpD src con));
10093 
10094   ins_cost(145);
10095   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10096             "jnp,s   exit\n\t"
10097             "pushfq\t# saw NaN, set CF\n\t"
10098             "andq    [rsp], #0xffffff2b\n\t"
10099             "popfq\n"
10100     "exit:" %}
10101   ins_encode %{
10102     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10103     emit_cmpfp_fixup(_masm);
10104   %}
10105   ins_pipe(pipe_slow);
10106 %}
10107 
10108 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10109   match(Set cr (CmpD src con));
10110   ins_cost(100);
10111   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10112   ins_encode %{
10113     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10114   %}
10115   ins_pipe(pipe_slow);
10116 %}
10117 
10118 // Compare into -1,0,1
10119 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10120 %{
10121   match(Set dst (CmpF3 src1 src2));
10122   effect(KILL cr);
10123 
10124   ins_cost(275);
10125   format %{ "ucomiss $src1, $src2\n\t"
10126             "movl    $dst, #-1\n\t"
10127             "jp,s    done\n\t"
10128             "jb,s    done\n\t"
10129             "setne   $dst\n\t"
10130             "movzbl  $dst, $dst\n"
10131     "done:" %}
10132   ins_encode %{
10133     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10134     emit_cmpfp3(_masm, $dst$$Register);
10135   %}
10136   ins_pipe(pipe_slow);
10137 %}
10138 
10139 // Compare into -1,0,1
10140 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10141 %{
10142   match(Set dst (CmpF3 src1 (LoadF src2)));
10143   effect(KILL cr);
10144 
10145   ins_cost(275);
10146   format %{ "ucomiss $src1, $src2\n\t"
10147             "movl    $dst, #-1\n\t"
10148             "jp,s    done\n\t"
10149             "jb,s    done\n\t"
10150             "setne   $dst\n\t"
10151             "movzbl  $dst, $dst\n"
10152     "done:" %}
10153   ins_encode %{
10154     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10155     emit_cmpfp3(_masm, $dst$$Register);
10156   %}
10157   ins_pipe(pipe_slow);
10158 %}
10159 
10160 // Compare into -1,0,1
10161 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10162   match(Set dst (CmpF3 src con));
10163   effect(KILL cr);
10164 
10165   ins_cost(275);
10166   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10167             "movl    $dst, #-1\n\t"
10168             "jp,s    done\n\t"
10169             "jb,s    done\n\t"
10170             "setne   $dst\n\t"
10171             "movzbl  $dst, $dst\n"
10172     "done:" %}
10173   ins_encode %{
10174     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10175     emit_cmpfp3(_masm, $dst$$Register);
10176   %}
10177   ins_pipe(pipe_slow);
10178 %}
10179 
10180 // Compare into -1,0,1
10181 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10182 %{
10183   match(Set dst (CmpD3 src1 src2));
10184   effect(KILL cr);
10185 
10186   ins_cost(275);
10187   format %{ "ucomisd $src1, $src2\n\t"
10188             "movl    $dst, #-1\n\t"
10189             "jp,s    done\n\t"
10190             "jb,s    done\n\t"
10191             "setne   $dst\n\t"
10192             "movzbl  $dst, $dst\n"
10193     "done:" %}
10194   ins_encode %{
10195     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10196     emit_cmpfp3(_masm, $dst$$Register);
10197   %}
10198   ins_pipe(pipe_slow);
10199 %}
10200 
10201 // Compare into -1,0,1
10202 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10203 %{
10204   match(Set dst (CmpD3 src1 (LoadD src2)));
10205   effect(KILL cr);
10206 
10207   ins_cost(275);
10208   format %{ "ucomisd $src1, $src2\n\t"
10209             "movl    $dst, #-1\n\t"
10210             "jp,s    done\n\t"
10211             "jb,s    done\n\t"
10212             "setne   $dst\n\t"
10213             "movzbl  $dst, $dst\n"
10214     "done:" %}
10215   ins_encode %{
10216     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10217     emit_cmpfp3(_masm, $dst$$Register);
10218   %}
10219   ins_pipe(pipe_slow);
10220 %}
10221 
10222 // Compare into -1,0,1
10223 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10224   match(Set dst (CmpD3 src con));
10225   effect(KILL cr);
10226 
10227   ins_cost(275);
10228   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10229             "movl    $dst, #-1\n\t"
10230             "jp,s    done\n\t"
10231             "jb,s    done\n\t"
10232             "setne   $dst\n\t"
10233             "movzbl  $dst, $dst\n"
10234     "done:" %}
10235   ins_encode %{
10236     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10237     emit_cmpfp3(_masm, $dst$$Register);
10238   %}
10239   ins_pipe(pipe_slow);
10240 %}
10241 
10242 //----------Arithmetic Conversion Instructions---------------------------------
10243 
10244 instruct roundFloat_nop(regF dst)
10245 %{
10246   match(Set dst (RoundFloat dst));
10247 
10248   ins_cost(0);
10249   ins_encode();
10250   ins_pipe(empty);
10251 %}
10252 
10253 instruct roundDouble_nop(regD dst)
10254 %{
10255   match(Set dst (RoundDouble dst));
10256 
10257   ins_cost(0);
10258   ins_encode();
10259   ins_pipe(empty);
10260 %}
10261 
10262 instruct convF2D_reg_reg(regD dst, regF src)
10263 %{
10264   match(Set dst (ConvF2D src));
10265 
10266   format %{ "cvtss2sd $dst, $src" %}
10267   ins_encode %{
10268     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10269   %}
10270   ins_pipe(pipe_slow); // XXX
10271 %}
10272 
10273 instruct convF2D_reg_mem(regD dst, memory src)
10274 %{
10275   match(Set dst (ConvF2D (LoadF src)));
10276 
10277   format %{ "cvtss2sd $dst, $src" %}
10278   ins_encode %{
10279     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
10280   %}
10281   ins_pipe(pipe_slow); // XXX
10282 %}
10283 
10284 instruct convD2F_reg_reg(regF dst, regD src)
10285 %{
10286   match(Set dst (ConvD2F src));
10287 
10288   format %{ "cvtsd2ss $dst, $src" %}
10289   ins_encode %{
10290     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10291   %}
10292   ins_pipe(pipe_slow); // XXX
10293 %}
10294 
10295 instruct convD2F_reg_mem(regF dst, memory src)
10296 %{
10297   match(Set dst (ConvD2F (LoadD src)));
10298 
10299   format %{ "cvtsd2ss $dst, $src" %}
10300   ins_encode %{
10301     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
10302   %}
10303   ins_pipe(pipe_slow); // XXX
10304 %}
10305 
10306 // XXX do mem variants
10307 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10308 %{
10309   match(Set dst (ConvF2I src));
10310   effect(KILL cr);
10311 
10312   format %{ "cvttss2sil $dst, $src\t# f2i\n\t"
10313             "cmpl    $dst, #0x80000000\n\t"
10314             "jne,s   done\n\t"
10315             "subq    rsp, #8\n\t"
10316             "movss   [rsp], $src\n\t"
10317             "call    f2i_fixup\n\t"
10318             "popq    $dst\n"
10319     "done:   "%}
10320   ins_encode %{
10321     Label done;
10322     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10323     __ cmpl($dst$$Register, 0x80000000);
10324     __ jccb(Assembler::notEqual, done);
10325     __ subptr(rsp, 8);
10326     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10327     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
10328     __ pop($dst$$Register);
10329     __ bind(done);
10330   %}
10331   ins_pipe(pipe_slow);
10332 %}
10333 
10334 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10335 %{
10336   match(Set dst (ConvF2L src));
10337   effect(KILL cr);
10338 
10339   format %{ "cvttss2siq $dst, $src\t# f2l\n\t"
10340             "cmpq    $dst, [0x8000000000000000]\n\t"
10341             "jne,s   done\n\t"
10342             "subq    rsp, #8\n\t"
10343             "movss   [rsp], $src\n\t"
10344             "call    f2l_fixup\n\t"
10345             "popq    $dst\n"
10346     "done:   "%}
10347   ins_encode %{
10348     Label done;
10349     __ cvttss2siq($dst$$Register, $src$$XMMRegister);
10350     __ cmp64($dst$$Register,
10351              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10352     __ jccb(Assembler::notEqual, done);
10353     __ subptr(rsp, 8);
10354     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10355     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
10356     __ pop($dst$$Register);
10357     __ bind(done);
10358   %}
10359   ins_pipe(pipe_slow);
10360 %}
10361 
10362 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10363 %{
10364   match(Set dst (ConvD2I src));
10365   effect(KILL cr);
10366 
10367   format %{ "cvttsd2sil $dst, $src\t# d2i\n\t"
10368             "cmpl    $dst, #0x80000000\n\t"
10369             "jne,s   done\n\t"
10370             "subq    rsp, #8\n\t"
10371             "movsd   [rsp], $src\n\t"
10372             "call    d2i_fixup\n\t"
10373             "popq    $dst\n"
10374     "done:   "%}
10375   ins_encode %{
10376     Label done;
10377     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10378     __ cmpl($dst$$Register, 0x80000000);
10379     __ jccb(Assembler::notEqual, done);
10380     __ subptr(rsp, 8);
10381     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10382     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
10383     __ pop($dst$$Register);
10384     __ bind(done);
10385   %}
10386   ins_pipe(pipe_slow);
10387 %}
10388 
10389 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10390 %{
10391   match(Set dst (ConvD2L src));
10392   effect(KILL cr);
10393 
10394   format %{ "cvttsd2siq $dst, $src\t# d2l\n\t"
10395             "cmpq    $dst, [0x8000000000000000]\n\t"
10396             "jne,s   done\n\t"
10397             "subq    rsp, #8\n\t"
10398             "movsd   [rsp], $src\n\t"
10399             "call    d2l_fixup\n\t"
10400             "popq    $dst\n"
10401     "done:   "%}
10402   ins_encode %{
10403     Label done;
10404     __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
10405     __ cmp64($dst$$Register,
10406              ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
10407     __ jccb(Assembler::notEqual, done);
10408     __ subptr(rsp, 8);
10409     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10410     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
10411     __ pop($dst$$Register);
10412     __ bind(done);
10413   %}
10414   ins_pipe(pipe_slow);
10415 %}
10416 
10417 instruct convI2F_reg_reg(regF dst, rRegI src)
10418 %{
10419   predicate(!UseXmmI2F);
10420   match(Set dst (ConvI2F src));
10421 
10422   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10423   ins_encode %{
10424     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10425   %}
10426   ins_pipe(pipe_slow); // XXX
10427 %}
10428 
10429 instruct convI2F_reg_mem(regF dst, memory src)
10430 %{
10431   match(Set dst (ConvI2F (LoadI src)));
10432 
10433   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10434   ins_encode %{
10435     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10436   %}
10437   ins_pipe(pipe_slow); // XXX
10438 %}
10439 
10440 instruct convI2D_reg_reg(regD dst, rRegI src)
10441 %{
10442   predicate(!UseXmmI2D);
10443   match(Set dst (ConvI2D src));
10444 
10445   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10446   ins_encode %{
10447     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10448   %}
10449   ins_pipe(pipe_slow); // XXX
10450 %}
10451 
10452 instruct convI2D_reg_mem(regD dst, memory src)
10453 %{
10454   match(Set dst (ConvI2D (LoadI src)));
10455 
10456   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10457   ins_encode %{
10458     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10459   %}
10460   ins_pipe(pipe_slow); // XXX
10461 %}
10462 
10463 instruct convXI2F_reg(regF dst, rRegI src)
10464 %{
10465   predicate(UseXmmI2F);
10466   match(Set dst (ConvI2F src));
10467 
10468   format %{ "movdl $dst, $src\n\t"
10469             "cvtdq2psl $dst, $dst\t# i2f" %}
10470   ins_encode %{
10471     __ movdl($dst$$XMMRegister, $src$$Register);
10472     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10473   %}
10474   ins_pipe(pipe_slow); // XXX
10475 %}
10476 
10477 instruct convXI2D_reg(regD dst, rRegI src)
10478 %{
10479   predicate(UseXmmI2D);
10480   match(Set dst (ConvI2D src));
10481 
10482   format %{ "movdl $dst, $src\n\t"
10483             "cvtdq2pdl $dst, $dst\t# i2d" %}
10484   ins_encode %{
10485     __ movdl($dst$$XMMRegister, $src$$Register);
10486     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10487   %}
10488   ins_pipe(pipe_slow); // XXX
10489 %}
10490 
10491 instruct convL2F_reg_reg(regF dst, rRegL src)
10492 %{
10493   match(Set dst (ConvL2F src));
10494 
10495   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10496   ins_encode %{
10497     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10498   %}
10499   ins_pipe(pipe_slow); // XXX
10500 %}
10501 
10502 instruct convL2F_reg_mem(regF dst, memory src)
10503 %{
10504   match(Set dst (ConvL2F (LoadL src)));
10505 
10506   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10507   ins_encode %{
10508     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10509   %}
10510   ins_pipe(pipe_slow); // XXX
10511 %}
10512 
10513 instruct convL2D_reg_reg(regD dst, rRegL src)
10514 %{
10515   match(Set dst (ConvL2D src));
10516 
10517   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10518   ins_encode %{
10519     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10520   %}
10521   ins_pipe(pipe_slow); // XXX
10522 %}
10523 
10524 instruct convL2D_reg_mem(regD dst, memory src)
10525 %{
10526   match(Set dst (ConvL2D (LoadL src)));
10527 
10528   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10529   ins_encode %{
10530     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10531   %}
10532   ins_pipe(pipe_slow); // XXX
10533 %}
10534 
10535 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10536 %{
10537   match(Set dst (ConvI2L src));
10538 
10539   ins_cost(125);
10540   format %{ "movslq  $dst, $src\t# i2l" %}
10541   ins_encode %{
10542     __ movslq($dst$$Register, $src$$Register);
10543   %}
10544   ins_pipe(ialu_reg_reg);
10545 %}
10546 
10547 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
10548 // %{
10549 //   match(Set dst (ConvI2L src));
10550 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
10551 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
10552 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
10553 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
10554 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
10555 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
10556 
10557 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
10558 //   ins_encode(enc_copy(dst, src));
10559 // //   opcode(0x63); // needs REX.W
10560 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
10561 //   ins_pipe(ialu_reg_reg);
10562 // %}
10563 
10564 // Zero-extend convert int to long
10565 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10566 %{
10567   match(Set dst (AndL (ConvI2L src) mask));
10568 
10569   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10570   ins_encode %{
10571     if ($dst$$reg != $src$$reg) {
10572       __ movl($dst$$Register, $src$$Register);
10573     }
10574   %}
10575   ins_pipe(ialu_reg_reg);
10576 %}
10577 
10578 // Zero-extend convert int to long
10579 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10580 %{
10581   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10582 
10583   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10584   ins_encode %{
10585     __ movl($dst$$Register, $src$$Address);
10586   %}
10587   ins_pipe(ialu_reg_mem);
10588 %}
10589 
10590 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10591 %{
10592   match(Set dst (AndL src mask));
10593 
10594   format %{ "movl    $dst, $src\t# zero-extend long" %}
10595   ins_encode %{
10596     __ movl($dst$$Register, $src$$Register);
10597   %}
10598   ins_pipe(ialu_reg_reg);
10599 %}
10600 
10601 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10602 %{
10603   match(Set dst (ConvL2I src));
10604 
10605   format %{ "movl    $dst, $src\t# l2i" %}
10606   ins_encode %{
10607     __ movl($dst$$Register, $src$$Register);
10608   %}
10609   ins_pipe(ialu_reg_reg);
10610 %}
10611 
10612 
10613 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10614   match(Set dst (MoveF2I src));
10615   effect(DEF dst, USE src);
10616 
10617   ins_cost(125);
10618   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10619   ins_encode %{
10620     __ movl($dst$$Register, Address(rsp, $src$$disp));
10621   %}
10622   ins_pipe(ialu_reg_mem);
10623 %}
10624 
10625 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10626   match(Set dst (MoveI2F src));
10627   effect(DEF dst, USE src);
10628 
10629   ins_cost(125);
10630   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10631   ins_encode %{
10632     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10633   %}
10634   ins_pipe(pipe_slow);
10635 %}
10636 
10637 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10638   match(Set dst (MoveD2L src));
10639   effect(DEF dst, USE src);
10640 
10641   ins_cost(125);
10642   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10643   ins_encode %{
10644     __ movq($dst$$Register, Address(rsp, $src$$disp));
10645   %}
10646   ins_pipe(ialu_reg_mem);
10647 %}
10648 
10649 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10650   predicate(!UseXmmLoadAndClearUpper);
10651   match(Set dst (MoveL2D src));
10652   effect(DEF dst, USE src);
10653 
10654   ins_cost(125);
10655   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10656   ins_encode %{
10657     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10658   %}
10659   ins_pipe(pipe_slow);
10660 %}
10661 
10662 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10663   predicate(UseXmmLoadAndClearUpper);
10664   match(Set dst (MoveL2D src));
10665   effect(DEF dst, USE src);
10666 
10667   ins_cost(125);
10668   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10669   ins_encode %{
10670     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10671   %}
10672   ins_pipe(pipe_slow);
10673 %}
10674 
10675 
10676 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10677   match(Set dst (MoveF2I src));
10678   effect(DEF dst, USE src);
10679 
10680   ins_cost(95); // XXX
10681   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10682   ins_encode %{
10683     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10684   %}
10685   ins_pipe(pipe_slow);
10686 %}
10687 
10688 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10689   match(Set dst (MoveI2F src));
10690   effect(DEF dst, USE src);
10691 
10692   ins_cost(100);
10693   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10694   ins_encode %{
10695     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10696   %}
10697   ins_pipe( ialu_mem_reg );
10698 %}
10699 
10700 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10701   match(Set dst (MoveD2L src));
10702   effect(DEF dst, USE src);
10703 
10704   ins_cost(95); // XXX
10705   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10706   ins_encode %{
10707     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10708   %}
10709   ins_pipe(pipe_slow);
10710 %}
10711 
10712 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10713   match(Set dst (MoveL2D src));
10714   effect(DEF dst, USE src);
10715 
10716   ins_cost(100);
10717   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10718   ins_encode %{
10719     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10720   %}
10721   ins_pipe(ialu_mem_reg);
10722 %}
10723 
10724 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10725   match(Set dst (MoveF2I src));
10726   effect(DEF dst, USE src);
10727   ins_cost(85);
10728   format %{ "movd    $dst,$src\t# MoveF2I" %}
10729   ins_encode %{
10730     __ movdl($dst$$Register, $src$$XMMRegister);
10731   %}
10732   ins_pipe( pipe_slow );
10733 %}
10734 
10735 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10736   match(Set dst (MoveD2L src));
10737   effect(DEF dst, USE src);
10738   ins_cost(85);
10739   format %{ "movd    $dst,$src\t# MoveD2L" %}
10740   ins_encode %{
10741     __ movdq($dst$$Register, $src$$XMMRegister);
10742   %}
10743   ins_pipe( pipe_slow );
10744 %}
10745 
10746 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10747   match(Set dst (MoveI2F src));
10748   effect(DEF dst, USE src);
10749   ins_cost(100);
10750   format %{ "movd    $dst,$src\t# MoveI2F" %}
10751   ins_encode %{
10752     __ movdl($dst$$XMMRegister, $src$$Register);
10753   %}
10754   ins_pipe( pipe_slow );
10755 %}
10756 
10757 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10758   match(Set dst (MoveL2D src));
10759   effect(DEF dst, USE src);
10760   ins_cost(100);
10761   format %{ "movd    $dst,$src\t# MoveL2D" %}
10762   ins_encode %{
10763      __ movdq($dst$$XMMRegister, $src$$Register);
10764   %}
10765   ins_pipe( pipe_slow );
10766 %}
10767 
10768 
10769 // =======================================================================
10770 // fast clearing of an array
10771 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10772                   Universe dummy, rFlagsReg cr)
10773 %{
10774   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
10775   match(Set dummy (ClearArray (Binary cnt base) val));
10776   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10777 
10778   format %{ $$template
10779     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10780     $$emit$$"jg      LARGE\n\t"
10781     $$emit$$"dec     rcx\n\t"
10782     $$emit$$"js      DONE\t# Zero length\n\t"
10783     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10784     $$emit$$"dec     rcx\n\t"
10785     $$emit$$"jge     LOOP\n\t"
10786     $$emit$$"jmp     DONE\n\t"
10787     $$emit$$"# LARGE:\n\t"
10788     if (UseFastStosb) {
10789        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10790        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10791     } else if (UseXMMForObjInit) {
10792        $$emit$$"movdq   $tmp, $val\n\t"
10793        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10794        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10795        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10796        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10797        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10798        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10799        $$emit$$"add     0x40,rax\n\t"
10800        $$emit$$"# L_zero_64_bytes:\n\t"
10801        $$emit$$"sub     0x8,rcx\n\t"
10802        $$emit$$"jge     L_loop\n\t"
10803        $$emit$$"add     0x4,rcx\n\t"
10804        $$emit$$"jl      L_tail\n\t"
10805        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10806        $$emit$$"add     0x20,rax\n\t"
10807        $$emit$$"sub     0x4,rcx\n\t"
10808        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10809        $$emit$$"add     0x4,rcx\n\t"
10810        $$emit$$"jle     L_end\n\t"
10811        $$emit$$"dec     rcx\n\t"
10812        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10813        $$emit$$"vmovq   xmm0,(rax)\n\t"
10814        $$emit$$"add     0x8,rax\n\t"
10815        $$emit$$"dec     rcx\n\t"
10816        $$emit$$"jge     L_sloop\n\t"
10817        $$emit$$"# L_end:\n\t"
10818     } else {
10819        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10820     }
10821     $$emit$$"# DONE"
10822   %}
10823   ins_encode %{
10824     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10825                  $tmp$$XMMRegister, false, false);
10826   %}
10827   ins_pipe(pipe_slow);
10828 %}
10829 
10830 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10831                   Universe dummy, rFlagsReg cr)
10832 %{
10833   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
10834   match(Set dummy (ClearArray (Binary cnt base) val));
10835   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10836 
10837   format %{ $$template
10838     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10839     $$emit$$"jg      LARGE\n\t"
10840     $$emit$$"dec     rcx\n\t"
10841     $$emit$$"js      DONE\t# Zero length\n\t"
10842     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10843     $$emit$$"dec     rcx\n\t"
10844     $$emit$$"jge     LOOP\n\t"
10845     $$emit$$"jmp     DONE\n\t"
10846     $$emit$$"# LARGE:\n\t"
10847     if (UseXMMForObjInit) {
10848        $$emit$$"movdq   $tmp, $val\n\t"
10849        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10850        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10851        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10852        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10853        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10854        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10855        $$emit$$"add     0x40,rax\n\t"
10856        $$emit$$"# L_zero_64_bytes:\n\t"
10857        $$emit$$"sub     0x8,rcx\n\t"
10858        $$emit$$"jge     L_loop\n\t"
10859        $$emit$$"add     0x4,rcx\n\t"
10860        $$emit$$"jl      L_tail\n\t"
10861        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10862        $$emit$$"add     0x20,rax\n\t"
10863        $$emit$$"sub     0x4,rcx\n\t"
10864        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10865        $$emit$$"add     0x4,rcx\n\t"
10866        $$emit$$"jle     L_end\n\t"
10867        $$emit$$"dec     rcx\n\t"
10868        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10869        $$emit$$"vmovq   xmm0,(rax)\n\t"
10870        $$emit$$"add     0x8,rax\n\t"
10871        $$emit$$"dec     rcx\n\t"
10872        $$emit$$"jge     L_sloop\n\t"
10873        $$emit$$"# L_end:\n\t"
10874     } else {
10875        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10876     }
10877     $$emit$$"# DONE"
10878   %}
10879   ins_encode %{
10880     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10881                  $tmp$$XMMRegister, false, true);
10882   %}
10883   ins_pipe(pipe_slow);
10884 %}
10885 
10886 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, 
10887                         Universe dummy, rFlagsReg cr)
10888 %{
10889   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
10890   match(Set dummy (ClearArray (Binary cnt base) val));
10891   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10892 
10893   format %{ $$template
10894     if (UseFastStosb) {
10895        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10896        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10897     } else if (UseXMMForObjInit) {
10898        $$emit$$"movdq   $tmp, $val\n\t"
10899        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10900        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10901        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10902        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10903        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10904        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10905        $$emit$$"add     0x40,rax\n\t"
10906        $$emit$$"# L_zero_64_bytes:\n\t"
10907        $$emit$$"sub     0x8,rcx\n\t"
10908        $$emit$$"jge     L_loop\n\t"
10909        $$emit$$"add     0x4,rcx\n\t"
10910        $$emit$$"jl      L_tail\n\t"
10911        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10912        $$emit$$"add     0x20,rax\n\t"
10913        $$emit$$"sub     0x4,rcx\n\t"
10914        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10915        $$emit$$"add     0x4,rcx\n\t"
10916        $$emit$$"jle     L_end\n\t"
10917        $$emit$$"dec     rcx\n\t"
10918        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10919        $$emit$$"vmovq   xmm0,(rax)\n\t"
10920        $$emit$$"add     0x8,rax\n\t"
10921        $$emit$$"dec     rcx\n\t"
10922        $$emit$$"jge     L_sloop\n\t"
10923        $$emit$$"# L_end:\n\t"
10924     } else {
10925        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10926     }
10927   %}
10928   ins_encode %{
10929     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, 
10930                  $tmp$$XMMRegister, true, false);
10931   %}
10932   ins_pipe(pipe_slow);
10933 %}
10934 
10935 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, 
10936                         Universe dummy, rFlagsReg cr)
10937 %{
10938   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
10939   match(Set dummy (ClearArray (Binary cnt base) val));
10940   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10941 
10942   format %{ $$template
10943     if (UseXMMForObjInit) {
10944        $$emit$$"movdq   $tmp, $val\n\t"
10945        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10946        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10947        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10948        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10949        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10950        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10951        $$emit$$"add     0x40,rax\n\t"
10952        $$emit$$"# L_zero_64_bytes:\n\t"
10953        $$emit$$"sub     0x8,rcx\n\t"
10954        $$emit$$"jge     L_loop\n\t"
10955        $$emit$$"add     0x4,rcx\n\t"
10956        $$emit$$"jl      L_tail\n\t"
10957        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10958        $$emit$$"add     0x20,rax\n\t"
10959        $$emit$$"sub     0x4,rcx\n\t"
10960        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10961        $$emit$$"add     0x4,rcx\n\t"
10962        $$emit$$"jle     L_end\n\t"
10963        $$emit$$"dec     rcx\n\t"
10964        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10965        $$emit$$"vmovq   xmm0,(rax)\n\t"
10966        $$emit$$"add     0x8,rax\n\t"
10967        $$emit$$"dec     rcx\n\t"
10968        $$emit$$"jge     L_sloop\n\t"
10969        $$emit$$"# L_end:\n\t"
10970     } else {
10971        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10972     }
10973   %}
10974   ins_encode %{
10975     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, 
10976                  $tmp$$XMMRegister, true, true);
10977   %}
10978   ins_pipe(pipe_slow);
10979 %}
10980 
10981 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10982                          rax_RegI result, regD tmp1, rFlagsReg cr)
10983 %{
10984   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10985   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10986   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10987 
10988   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10989   ins_encode %{
10990     __ string_compare($str1$$Register, $str2$$Register,
10991                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10992                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
10993   %}
10994   ins_pipe( pipe_slow );
10995 %}
10996 
10997 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10998                          rax_RegI result, regD tmp1, rFlagsReg cr)
10999 %{
11000   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11001   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11002   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11003 
11004   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11005   ins_encode %{
11006     __ string_compare($str1$$Register, $str2$$Register,
11007                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11008                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11009   %}
11010   ins_pipe( pipe_slow );
11011 %}
11012 
11013 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11014                           rax_RegI result, regD tmp1, rFlagsReg cr)
11015 %{
11016   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11017   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11018   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11019 
11020   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11021   ins_encode %{
11022     __ string_compare($str1$$Register, $str2$$Register,
11023                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11024                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11025   %}
11026   ins_pipe( pipe_slow );
11027 %}
11028 
11029 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11030                           rax_RegI result, regD tmp1, rFlagsReg cr)
11031 %{
11032   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11033   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11034   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11035 
11036   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11037   ins_encode %{
11038     __ string_compare($str2$$Register, $str1$$Register,
11039                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11040                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11041   %}
11042   ins_pipe( pipe_slow );
11043 %}
11044 
11045 // fast search of substring with known size.
11046 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11047                              rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11048 %{
11049   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11050   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11051   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11052 
11053   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11054   ins_encode %{
11055     int icnt2 = (int)$int_cnt2$$constant;
11056     if (icnt2 >= 16) {
11057       // IndexOf for constant substrings with size >= 16 elements
11058       // which don't need to be loaded through stack.
11059       __ string_indexofC8($str1$$Register, $str2$$Register,
11060                           $cnt1$$Register, $cnt2$$Register,
11061                           icnt2, $result$$Register,
11062                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11063     } else {
11064       // Small strings are loaded through stack if they cross page boundary.
11065       __ string_indexof($str1$$Register, $str2$$Register,
11066                         $cnt1$$Register, $cnt2$$Register,
11067                         icnt2, $result$$Register,
11068                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11069     }
11070   %}
11071   ins_pipe( pipe_slow );
11072 %}
11073 
11074 // fast search of substring with known size.
11075 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11076                              rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11077 %{
11078   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11079   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11080   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11081 
11082   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11083   ins_encode %{
11084     int icnt2 = (int)$int_cnt2$$constant;
11085     if (icnt2 >= 8) {
11086       // IndexOf for constant substrings with size >= 8 elements
11087       // which don't need to be loaded through stack.
11088       __ string_indexofC8($str1$$Register, $str2$$Register,
11089                           $cnt1$$Register, $cnt2$$Register,
11090                           icnt2, $result$$Register,
11091                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11092     } else {
11093       // Small strings are loaded through stack if they cross page boundary.
11094       __ string_indexof($str1$$Register, $str2$$Register,
11095                         $cnt1$$Register, $cnt2$$Register,
11096                         icnt2, $result$$Register,
11097                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11098     }
11099   %}
11100   ins_pipe( pipe_slow );
11101 %}
11102 
11103 // fast search of substring with known size.
11104 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11105                              rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11106 %{
11107   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11108   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11109   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11110 
11111   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11112   ins_encode %{
11113     int icnt2 = (int)$int_cnt2$$constant;
11114     if (icnt2 >= 8) {
11115       // IndexOf for constant substrings with size >= 8 elements
11116       // which don't need to be loaded through stack.
11117       __ string_indexofC8($str1$$Register, $str2$$Register,
11118                           $cnt1$$Register, $cnt2$$Register,
11119                           icnt2, $result$$Register,
11120                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11121     } else {
11122       // Small strings are loaded through stack if they cross page boundary.
11123       __ string_indexof($str1$$Register, $str2$$Register,
11124                         $cnt1$$Register, $cnt2$$Register,
11125                         icnt2, $result$$Register,
11126                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11127     }
11128   %}
11129   ins_pipe( pipe_slow );
11130 %}
11131 
11132 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11133                          rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11134 %{
11135   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11136   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11137   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11138 
11139   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11140   ins_encode %{
11141     __ string_indexof($str1$$Register, $str2$$Register,
11142                       $cnt1$$Register, $cnt2$$Register,
11143                       (-1), $result$$Register,
11144                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11145   %}
11146   ins_pipe( pipe_slow );
11147 %}
11148 
11149 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11150                          rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11151 %{
11152   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11153   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11154   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11155 
11156   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11157   ins_encode %{
11158     __ string_indexof($str1$$Register, $str2$$Register,
11159                       $cnt1$$Register, $cnt2$$Register,
11160                       (-1), $result$$Register,
11161                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11162   %}
11163   ins_pipe( pipe_slow );
11164 %}
11165 
11166 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11167                          rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
11168 %{
11169   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11170   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11171   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11172 
11173   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11174   ins_encode %{
11175     __ string_indexof($str1$$Register, $str2$$Register,
11176                       $cnt1$$Register, $cnt2$$Register,
11177                       (-1), $result$$Register,
11178                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11179   %}
11180   ins_pipe( pipe_slow );
11181 %}
11182 
11183 instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11184                               rbx_RegI result, regD vec1, regD vec2, regD vec3, rcx_RegI tmp, rFlagsReg cr)
11185 %{
11186   predicate(UseSSE42Intrinsics);
11187   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11188   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11189   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11190   ins_encode %{
11191     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11192                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11193   %}
11194   ins_pipe( pipe_slow );
11195 %}
11196 
11197 // fast string equals
11198 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11199                        regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11200 %{
11201   match(Set result (StrEquals (Binary str1 str2) cnt));
11202   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11203 
11204   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11205   ins_encode %{
11206     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11207                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11208                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11209   %}
11210   ins_pipe( pipe_slow );
11211 %}
11212 
11213 // fast array equals
11214 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11215                        regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11216 %{
11217   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11218   match(Set result (AryEq ary1 ary2));
11219   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11220 
11221   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11222   ins_encode %{
11223     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11224                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11225                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11226   %}
11227   ins_pipe( pipe_slow );
11228 %}
11229 
11230 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11231                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11232 %{
11233   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11234   match(Set result (AryEq ary1 ary2));
11235   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11236 
11237   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11238   ins_encode %{
11239     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11240                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11241                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11242   %}
11243   ins_pipe( pipe_slow );
11244 %}
11245 
11246 instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
11247                       regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11248 %{
11249   match(Set result (HasNegatives ary1 len));
11250   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11251 
11252   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11253   ins_encode %{
11254     __ has_negatives($ary1$$Register, $len$$Register,
11255                      $result$$Register, $tmp3$$Register,
11256                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11257   %}
11258   ins_pipe( pipe_slow );
11259 %}
11260 
11261 // fast char[] to byte[] compression
11262 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11263                          rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11264   match(Set result (StrCompressedCopy src (Binary dst len)));
11265   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11266 
11267   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11268   ins_encode %{
11269     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11270                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11271                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11272   %}
11273   ins_pipe( pipe_slow );
11274 %}
11275 
11276 // fast byte[] to char[] inflation
11277 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11278                         regD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
11279   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11280   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11281 
11282   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11283   ins_encode %{
11284     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11285                           $tmp1$$XMMRegister, $tmp2$$Register);
11286   %}
11287   ins_pipe( pipe_slow );
11288 %}
11289 
11290 // encode char[] to byte[] in ISO_8859_1
11291 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11292                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11293                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11294   match(Set result (EncodeISOArray src (Binary dst len)));
11295   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11296 
11297   format %{ "Encode array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
11298   ins_encode %{
11299     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11300                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11301                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11302   %}
11303   ins_pipe( pipe_slow );
11304 %}
11305 
11306 //----------Overflow Math Instructions-----------------------------------------
11307 
11308 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11309 %{
11310   match(Set cr (OverflowAddI op1 op2));
11311   effect(DEF cr, USE_KILL op1, USE op2);
11312 
11313   format %{ "addl    $op1, $op2\t# overflow check int" %}
11314 
11315   ins_encode %{
11316     __ addl($op1$$Register, $op2$$Register);
11317   %}
11318   ins_pipe(ialu_reg_reg);
11319 %}
11320 
11321 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
11322 %{
11323   match(Set cr (OverflowAddI op1 op2));
11324   effect(DEF cr, USE_KILL op1, USE op2);
11325 
11326   format %{ "addl    $op1, $op2\t# overflow check int" %}
11327 
11328   ins_encode %{
11329     __ addl($op1$$Register, $op2$$constant);
11330   %}
11331   ins_pipe(ialu_reg_reg);
11332 %}
11333 
11334 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11335 %{
11336   match(Set cr (OverflowAddL op1 op2));
11337   effect(DEF cr, USE_KILL op1, USE op2);
11338 
11339   format %{ "addq    $op1, $op2\t# overflow check long" %}
11340   ins_encode %{
11341     __ addq($op1$$Register, $op2$$Register);
11342   %}
11343   ins_pipe(ialu_reg_reg);
11344 %}
11345 
11346 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
11347 %{
11348   match(Set cr (OverflowAddL op1 op2));
11349   effect(DEF cr, USE_KILL op1, USE op2);
11350 
11351   format %{ "addq    $op1, $op2\t# overflow check long" %}
11352   ins_encode %{
11353     __ addq($op1$$Register, $op2$$constant);
11354   %}
11355   ins_pipe(ialu_reg_reg);
11356 %}
11357 
11358 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11359 %{
11360   match(Set cr (OverflowSubI op1 op2));
11361 
11362   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11363   ins_encode %{
11364     __ cmpl($op1$$Register, $op2$$Register);
11365   %}
11366   ins_pipe(ialu_reg_reg);
11367 %}
11368 
11369 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11370 %{
11371   match(Set cr (OverflowSubI op1 op2));
11372 
11373   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11374   ins_encode %{
11375     __ cmpl($op1$$Register, $op2$$constant);
11376   %}
11377   ins_pipe(ialu_reg_reg);
11378 %}
11379 
11380 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11381 %{
11382   match(Set cr (OverflowSubL op1 op2));
11383 
11384   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11385   ins_encode %{
11386     __ cmpq($op1$$Register, $op2$$Register);
11387   %}
11388   ins_pipe(ialu_reg_reg);
11389 %}
11390 
11391 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11392 %{
11393   match(Set cr (OverflowSubL op1 op2));
11394 
11395   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11396   ins_encode %{
11397     __ cmpq($op1$$Register, $op2$$constant);
11398   %}
11399   ins_pipe(ialu_reg_reg);
11400 %}
11401 
11402 instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
11403 %{
11404   match(Set cr (OverflowSubI zero op2));
11405   effect(DEF cr, USE_KILL op2);
11406 
11407   format %{ "negl    $op2\t# overflow check int" %}
11408   ins_encode %{
11409     __ negl($op2$$Register);
11410   %}
11411   ins_pipe(ialu_reg_reg);
11412 %}
11413 
11414 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
11415 %{
11416   match(Set cr (OverflowSubL zero op2));
11417   effect(DEF cr, USE_KILL op2);
11418 
11419   format %{ "negq    $op2\t# overflow check long" %}
11420   ins_encode %{
11421     __ negq($op2$$Register);
11422   %}
11423   ins_pipe(ialu_reg_reg);
11424 %}
11425 
11426 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11427 %{
11428   match(Set cr (OverflowMulI op1 op2));
11429   effect(DEF cr, USE_KILL op1, USE op2);
11430 
11431   format %{ "imull    $op1, $op2\t# overflow check int" %}
11432   ins_encode %{
11433     __ imull($op1$$Register, $op2$$Register);
11434   %}
11435   ins_pipe(ialu_reg_reg_alu0);
11436 %}
11437 
11438 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
11439 %{
11440   match(Set cr (OverflowMulI op1 op2));
11441   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11442 
11443   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
11444   ins_encode %{
11445     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
11446   %}
11447   ins_pipe(ialu_reg_reg_alu0);
11448 %}
11449 
11450 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11451 %{
11452   match(Set cr (OverflowMulL op1 op2));
11453   effect(DEF cr, USE_KILL op1, USE op2);
11454 
11455   format %{ "imulq    $op1, $op2\t# overflow check long" %}
11456   ins_encode %{
11457     __ imulq($op1$$Register, $op2$$Register);
11458   %}
11459   ins_pipe(ialu_reg_reg_alu0);
11460 %}
11461 
11462 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
11463 %{
11464   match(Set cr (OverflowMulL op1 op2));
11465   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11466 
11467   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
11468   ins_encode %{
11469     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
11470   %}
11471   ins_pipe(ialu_reg_reg_alu0);
11472 %}
11473 
11474 
11475 //----------Control Flow Instructions------------------------------------------
11476 // Signed compare Instructions
11477 
11478 // XXX more variants!!
11479 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11480 %{
11481   match(Set cr (CmpI op1 op2));
11482   effect(DEF cr, USE op1, USE op2);
11483 
11484   format %{ "cmpl    $op1, $op2" %}
11485   opcode(0x3B);  /* Opcode 3B /r */
11486   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11487   ins_pipe(ialu_cr_reg_reg);
11488 %}
11489 
11490 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11491 %{
11492   match(Set cr (CmpI op1 op2));
11493 
11494   format %{ "cmpl    $op1, $op2" %}
11495   opcode(0x81, 0x07); /* Opcode 81 /7 */
11496   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11497   ins_pipe(ialu_cr_reg_imm);
11498 %}
11499 
11500 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11501 %{
11502   match(Set cr (CmpI op1 (LoadI op2)));
11503 
11504   ins_cost(500); // XXX
11505   format %{ "cmpl    $op1, $op2" %}
11506   opcode(0x3B); /* Opcode 3B /r */
11507   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11508   ins_pipe(ialu_cr_reg_mem);
11509 %}
11510 
11511 instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
11512 %{
11513   match(Set cr (CmpI src zero));
11514 
11515   format %{ "testl   $src, $src" %}
11516   opcode(0x85);
11517   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11518   ins_pipe(ialu_cr_reg_imm);
11519 %}
11520 
11521 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11522 %{
11523   match(Set cr (CmpI (AndI src con) zero));
11524 
11525   format %{ "testl   $src, $con" %}
11526   opcode(0xF7, 0x00);
11527   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11528   ins_pipe(ialu_cr_reg_imm);
11529 %}
11530 
11531 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11532 %{
11533   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11534 
11535   format %{ "testl   $src, $mem" %}
11536   opcode(0x85);
11537   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11538   ins_pipe(ialu_cr_reg_mem);
11539 %}
11540 
11541 // Unsigned compare Instructions; really, same as signed except they
11542 // produce an rFlagsRegU instead of rFlagsReg.
11543 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11544 %{
11545   match(Set cr (CmpU op1 op2));
11546 
11547   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11548   opcode(0x3B); /* Opcode 3B /r */
11549   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11550   ins_pipe(ialu_cr_reg_reg);
11551 %}
11552 
11553 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11554 %{
11555   match(Set cr (CmpU op1 op2));
11556 
11557   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11558   opcode(0x81,0x07); /* Opcode 81 /7 */
11559   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11560   ins_pipe(ialu_cr_reg_imm);
11561 %}
11562 
11563 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11564 %{
11565   match(Set cr (CmpU op1 (LoadI op2)));
11566 
11567   ins_cost(500); // XXX
11568   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11569   opcode(0x3B); /* Opcode 3B /r */
11570   ins_encode(REX_reg_mem(op1, op2), OpcP, reg_mem(op1, op2));
11571   ins_pipe(ialu_cr_reg_mem);
11572 %}
11573 
11574 // // // Cisc-spilled version of cmpU_rReg
11575 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
11576 // //%{
11577 // //  match(Set cr (CmpU (LoadI op1) op2));
11578 // //
11579 // //  format %{ "CMPu   $op1,$op2" %}
11580 // //  ins_cost(500);
11581 // //  opcode(0x39);  /* Opcode 39 /r */
11582 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11583 // //%}
11584 
11585 instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
11586 %{
11587   match(Set cr (CmpU src zero));
11588 
11589   format %{ "testl  $src, $src\t# unsigned" %}
11590   opcode(0x85);
11591   ins_encode(REX_reg_reg(src, src), OpcP, reg_reg(src, src));
11592   ins_pipe(ialu_cr_reg_imm);
11593 %}
11594 
11595 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11596 %{
11597   match(Set cr (CmpP op1 op2));
11598   predicate(!((CmpPNode*)n)->followed_by_equals() && !((CmpPNode*)n)->not_followed_by_equals());
11599 
11600   format %{ "cmpq    $op1, $op2\t# ptr" %}
11601   ins_encode %{
11602     __ cmpq($op1$$Register, $op2$$Register);
11603   %}
11604   ins_pipe(ialu_cr_reg_reg);
11605 %}
11606 
11607 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11608 %{
11609   match(Set cr (CmpP op1 (LoadP op2)));
11610   predicate(!((CmpPNode*)n)->followed_by_equals() && !((CmpPNode*)n)->not_followed_by_equals());
11611 
11612   ins_cost(500); // XXX
11613   format %{ "cmpq    $op1, $op2\t# ptr" %}
11614   ins_encode %{
11615     __ cmpq($op1$$Register, $op2$$Address);
11616   %}
11617   ins_pipe(ialu_cr_reg_mem);
11618 %}
11619 
11620 instruct compP_rReg_followed_by_equals(rFlagsRegU cr, rRegP op1, rRegP op2, rRegP tmp)
11621 %{
11622   match(Set cr (CmpP op1 op2));
11623   predicate(((CmpPNode*)n)->followed_by_equals());
11624   effect(TEMP tmp);
11625 
11626   format %{ "cmpq    $op1, $op2\t# ptr" %}
11627   ins_encode %{
11628     __ lea($tmp$$Register, ExternalAddress((address)&followed_by_equals));
11629     __ lock(); __ addq(Address($tmp$$Register, 0), 1);
11630     __ cmpq($op1$$Register, $op2$$Register);
11631   %}
11632   ins_pipe(ialu_cr_reg_reg);
11633 %}
11634 
11635 instruct compP_rReg_mem_followed_by_equals(rFlagsRegU cr, rRegP op1, memory op2, rRegP tmp)
11636 %{
11637   match(Set cr (CmpP op1 (LoadP op2)));
11638   predicate(((CmpPNode*)n)->followed_by_equals());
11639   effect(TEMP tmp);
11640 
11641   ins_cost(500); // XXX
11642   format %{ "cmpq    $op1, $op2\t# ptr" %}
11643   ins_encode %{
11644     __ lea($tmp$$Register, ExternalAddress((address)&followed_by_equals));
11645     __ lock(); __ addq(Address($tmp$$Register, 0), 1);
11646     __ cmpq($op1$$Register, $op2$$Address);
11647   %}
11648   ins_pipe(ialu_cr_reg_mem);
11649 %}
11650 
11651 instruct compP_rReg_not_followed_by_equals(rFlagsRegU cr, rRegP op1, rRegP op2, rRegP tmp)
11652 %{
11653   match(Set cr (CmpP op1 op2));
11654   predicate(((CmpPNode*)n)->not_followed_by_equals());
11655   effect(TEMP tmp);
11656 
11657   format %{ "cmpq    $op1, $op2\t# ptr" %}
11658   ins_encode %{
11659     __ lea($tmp$$Register, ExternalAddress((address)&not_followed_by_equals));
11660     __ lock(); __ addq(Address($tmp$$Register, 0), 1);
11661     __ cmpq($op1$$Register, $op2$$Register);
11662   %}
11663   ins_pipe(ialu_cr_reg_reg);
11664 %}
11665 
11666 instruct compP_rReg_mem_not_followed_by_equals(rFlagsRegU cr, rRegP op1, memory op2, rRegP tmp)
11667 %{
11668   match(Set cr (CmpP op1 (LoadP op2)));
11669   predicate(((CmpPNode*)n)->not_followed_by_equals());
11670   effect(TEMP tmp);
11671 
11672   ins_cost(500); // XXX
11673   format %{ "cmpq    $op1, $op2\t# ptr" %}
11674   ins_encode %{
11675     __ lea($tmp$$Register, ExternalAddress((address)&not_followed_by_equals));
11676     __ lock(); __ addq(Address($tmp$$Register, 0), 1);
11677     __ cmpq($op1$$Register, $op2$$Address);
11678   %}
11679   ins_pipe(ialu_cr_reg_mem);
11680 %}
11681 
11682 
11683 // // // Cisc-spilled version of cmpP_rReg
11684 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
11685 // //%{
11686 // //  match(Set cr (CmpP (LoadP op1) op2));
11687 // //
11688 // //  format %{ "CMPu   $op1,$op2" %}
11689 // //  ins_cost(500);
11690 // //  opcode(0x39);  /* Opcode 39 /r */
11691 // //  ins_encode( OpcP, reg_mem( op1, op2) );
11692 // //%}
11693 
11694 // XXX this is generalized by compP_rReg_mem???
11695 // Compare raw pointer (used in out-of-heap check).
11696 // Only works because non-oop pointers must be raw pointers
11697 // and raw pointers have no anti-dependencies.
11698 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11699 %{
11700   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);
11701   match(Set cr (CmpP op1 (LoadP op2)));
11702 
11703   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11704   opcode(0x3B); /* Opcode 3B /r */
11705   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11706   ins_pipe(ialu_cr_reg_mem);
11707 %}
11708 
11709 // This will generate a signed flags result. This should be OK since
11710 // any compare to a zero should be eq/neq.
11711 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11712 %{
11713   match(Set cr (CmpP src zero));
11714 
11715   format %{ "testq   $src, $src\t# ptr" %}
11716   opcode(0x85);
11717   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11718   ins_pipe(ialu_cr_reg_imm);
11719 %}
11720 
11721 // This will generate a signed flags result. This should be OK since
11722 // any compare to a zero should be eq/neq.
11723 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11724 %{
11725   predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
11726   match(Set cr (CmpP (LoadP op) zero));
11727 
11728   ins_cost(500); // XXX
11729   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11730   opcode(0xF7); /* Opcode F7 /0 */
11731   ins_encode(REX_mem_wide(op),
11732              OpcP, RM_opc_mem(0x00, op), Con_d32(0xFFFFFFFF));
11733   ins_pipe(ialu_cr_reg_imm);
11734 %}
11735 
11736 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
11737 %{
11738   predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
11739   match(Set cr (CmpP (LoadP mem) zero));
11740 
11741   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
11742   ins_encode %{
11743     __ cmpq(r12, $mem$$Address);
11744   %}
11745   ins_pipe(ialu_cr_reg_mem);
11746 %}
11747 
11748 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
11749 %{
11750   match(Set cr (CmpN op1 op2));
11751 
11752   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11753   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
11754   ins_pipe(ialu_cr_reg_reg);
11755 %}
11756 
11757 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
11758 %{
11759   match(Set cr (CmpN src (LoadN mem)));
11760 
11761   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
11762   ins_encode %{
11763     __ cmpl($src$$Register, $mem$$Address);
11764   %}
11765   ins_pipe(ialu_cr_reg_mem);
11766 %}
11767 
11768 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
11769   match(Set cr (CmpN op1 op2));
11770 
11771   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
11772   ins_encode %{
11773     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
11774   %}
11775   ins_pipe(ialu_cr_reg_imm);
11776 %}
11777 
11778 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
11779 %{
11780   match(Set cr (CmpN src (LoadN mem)));
11781 
11782   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
11783   ins_encode %{
11784     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
11785   %}
11786   ins_pipe(ialu_cr_reg_mem);
11787 %}
11788 
11789 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
11790   match(Set cr (CmpN op1 op2));
11791 
11792   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
11793   ins_encode %{
11794     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
11795   %}
11796   ins_pipe(ialu_cr_reg_imm);
11797 %}
11798 
11799 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
11800 %{
11801   match(Set cr (CmpN src (LoadNKlass mem)));
11802 
11803   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
11804   ins_encode %{
11805     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
11806   %}
11807   ins_pipe(ialu_cr_reg_mem);
11808 %}
11809 
11810 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
11811   match(Set cr (CmpN src zero));
11812 
11813   format %{ "testl   $src, $src\t# compressed ptr" %}
11814   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
11815   ins_pipe(ialu_cr_reg_imm);
11816 %}
11817 
11818 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
11819 %{
11820   predicate(Universe::narrow_oop_base() != NULL);
11821   match(Set cr (CmpN (LoadN mem) zero));
11822 
11823   ins_cost(500); // XXX
11824   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
11825   ins_encode %{
11826     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
11827   %}
11828   ins_pipe(ialu_cr_reg_mem);
11829 %}
11830 
11831 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
11832 %{
11833   predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
11834   match(Set cr (CmpN (LoadN mem) zero));
11835 
11836   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
11837   ins_encode %{
11838     __ cmpl(r12, $mem$$Address);
11839   %}
11840   ins_pipe(ialu_cr_reg_mem);
11841 %}
11842 
11843 // Yanked all unsigned pointer compare operations.
11844 // Pointer compares are done with CmpP which is already unsigned.
11845 
11846 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11847 %{
11848   match(Set cr (CmpL op1 op2));
11849 
11850   format %{ "cmpq    $op1, $op2" %}
11851   opcode(0x3B);  /* Opcode 3B /r */
11852   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11853   ins_pipe(ialu_cr_reg_reg);
11854 %}
11855 
11856 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11857 %{
11858   match(Set cr (CmpL op1 op2));
11859 
11860   format %{ "cmpq    $op1, $op2" %}
11861   opcode(0x81, 0x07); /* Opcode 81 /7 */
11862   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11863   ins_pipe(ialu_cr_reg_imm);
11864 %}
11865 
11866 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
11867 %{
11868   match(Set cr (CmpL op1 (LoadL op2)));
11869 
11870   format %{ "cmpq    $op1, $op2" %}
11871   opcode(0x3B); /* Opcode 3B /r */
11872   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11873   ins_pipe(ialu_cr_reg_mem);
11874 %}
11875 
11876 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
11877 %{
11878   match(Set cr (CmpL src zero));
11879 
11880   format %{ "testq   $src, $src" %}
11881   opcode(0x85);
11882   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11883   ins_pipe(ialu_cr_reg_imm);
11884 %}
11885 
11886 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
11887 %{
11888   match(Set cr (CmpL (AndL src con) zero));
11889 
11890   format %{ "testq   $src, $con\t# long" %}
11891   opcode(0xF7, 0x00);
11892   ins_encode(REX_reg_wide(src), OpcP, reg_opc(src), Con32(con));
11893   ins_pipe(ialu_cr_reg_imm);
11894 %}
11895 
11896 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11897 %{
11898   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11899 
11900   format %{ "testq   $src, $mem" %}
11901   opcode(0x85);
11902   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11903   ins_pipe(ialu_cr_reg_mem);
11904 %}
11905 
11906 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
11907 %{
11908   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
11909 
11910   format %{ "testq   $src, $mem" %}
11911   opcode(0x85);
11912   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11913   ins_pipe(ialu_cr_reg_mem);
11914 %}
11915 
11916 // Manifest a CmpL result in an integer register.  Very painful.
11917 // This is the test to avoid.
11918 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11919 %{
11920   match(Set dst (CmpL3 src1 src2));
11921   effect(KILL flags);
11922 
11923   ins_cost(275); // XXX
11924   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11925             "movl    $dst, -1\n\t"
11926             "jl,s    done\n\t"
11927             "setne   $dst\n\t"
11928             "movzbl  $dst, $dst\n\t"
11929     "done:" %}
11930   ins_encode(cmpl3_flag(src1, src2, dst));
11931   ins_pipe(pipe_slow);
11932 %}
11933 
11934 // Unsigned long compare Instructions; really, same as signed long except they
11935 // produce an rFlagsRegU instead of rFlagsReg.
11936 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
11937 %{
11938   match(Set cr (CmpUL op1 op2));
11939 
11940   format %{ "cmpq    $op1, $op2\t# unsigned" %}
11941   opcode(0x3B);  /* Opcode 3B /r */
11942   ins_encode(REX_reg_reg_wide(op1, op2), OpcP, reg_reg(op1, op2));
11943   ins_pipe(ialu_cr_reg_reg);
11944 %}
11945 
11946 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
11947 %{
11948   match(Set cr (CmpUL op1 op2));
11949 
11950   format %{ "cmpq    $op1, $op2\t# unsigned" %}
11951   opcode(0x81, 0x07); /* Opcode 81 /7 */
11952   ins_encode(OpcSErm_wide(op1, op2), Con8or32(op2));
11953   ins_pipe(ialu_cr_reg_imm);
11954 %}
11955 
11956 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
11957 %{
11958   match(Set cr (CmpUL op1 (LoadL op2)));
11959 
11960   format %{ "cmpq    $op1, $op2\t# unsigned" %}
11961   opcode(0x3B); /* Opcode 3B /r */
11962   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
11963   ins_pipe(ialu_cr_reg_mem);
11964 %}
11965 
11966 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
11967 %{
11968   match(Set cr (CmpUL src zero));
11969 
11970   format %{ "testq   $src, $src\t# unsigned" %}
11971   opcode(0x85);
11972   ins_encode(REX_reg_reg_wide(src, src), OpcP, reg_reg(src, src));
11973   ins_pipe(ialu_cr_reg_imm);
11974 %}
11975 
11976 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
11977 %{
11978   match(Set cr (CmpI (LoadB mem) imm));
11979 
11980   ins_cost(125);
11981   format %{ "cmpb    $mem, $imm" %}
11982   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
11983   ins_pipe(ialu_cr_reg_mem);
11984 %}
11985 
11986 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero)
11987 %{
11988   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
11989 
11990   ins_cost(125);
11991   format %{ "testb   $mem, $imm" %}
11992   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
11993   ins_pipe(ialu_cr_reg_mem);
11994 %}
11995 
11996 //----------Max and Min--------------------------------------------------------
11997 // Min Instructions
11998 
11999 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12000 %{
12001   effect(USE_DEF dst, USE src, USE cr);
12002 
12003   format %{ "cmovlgt $dst, $src\t# min" %}
12004   opcode(0x0F, 0x4F);
12005   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12006   ins_pipe(pipe_cmov_reg);
12007 %}
12008 
12009 
12010 instruct minI_rReg(rRegI dst, rRegI src)
12011 %{
12012   match(Set dst (MinI dst src));
12013 
12014   ins_cost(200);
12015   expand %{
12016     rFlagsReg cr;
12017     compI_rReg(cr, dst, src);
12018     cmovI_reg_g(dst, src, cr);
12019   %}
12020 %}
12021 
12022 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12023 %{
12024   effect(USE_DEF dst, USE src, USE cr);
12025 
12026   format %{ "cmovllt $dst, $src\t# max" %}
12027   opcode(0x0F, 0x4C);
12028   ins_encode(REX_reg_reg(dst, src), OpcP, OpcS, reg_reg(dst, src));
12029   ins_pipe(pipe_cmov_reg);
12030 %}
12031 
12032 
12033 instruct maxI_rReg(rRegI dst, rRegI src)
12034 %{
12035   match(Set dst (MaxI dst src));
12036 
12037   ins_cost(200);
12038   expand %{
12039     rFlagsReg cr;
12040     compI_rReg(cr, dst, src);
12041     cmovI_reg_l(dst, src, cr);
12042   %}
12043 %}
12044 
12045 // ============================================================================
12046 // Branch Instructions
12047 
12048 // Jump Direct - Label defines a relative address from JMP+1
12049 instruct jmpDir(label labl)
12050 %{
12051   match(Goto);
12052   effect(USE labl);
12053 
12054   ins_cost(300);
12055   format %{ "jmp     $labl" %}
12056   size(5);
12057   ins_encode %{
12058     Label* L = $labl$$label;
12059     __ jmp(*L, false); // Always long jump
12060   %}
12061   ins_pipe(pipe_jmp);
12062 %}
12063 
12064 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12065 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12066 %{
12067   match(If cop cr);
12068   effect(USE labl);
12069 
12070   ins_cost(300);
12071   format %{ "j$cop     $labl" %}
12072   size(6);
12073   ins_encode %{
12074     Label* L = $labl$$label;
12075     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12076   %}
12077   ins_pipe(pipe_jcc);
12078 %}
12079 
12080 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12081 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12082 %{
12083   predicate(!n->has_vector_mask_set());
12084   match(CountedLoopEnd cop cr);
12085   effect(USE labl);
12086 
12087   ins_cost(300);
12088   format %{ "j$cop     $labl\t# loop end" %}
12089   size(6);
12090   ins_encode %{
12091     Label* L = $labl$$label;
12092     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12093   %}
12094   ins_pipe(pipe_jcc);
12095 %}
12096 
12097 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12098 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12099   predicate(!n->has_vector_mask_set());
12100   match(CountedLoopEnd cop cmp);
12101   effect(USE labl);
12102 
12103   ins_cost(300);
12104   format %{ "j$cop,u   $labl\t# loop end" %}
12105   size(6);
12106   ins_encode %{
12107     Label* L = $labl$$label;
12108     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12109   %}
12110   ins_pipe(pipe_jcc);
12111 %}
12112 
12113 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12114   predicate(!n->has_vector_mask_set());
12115   match(CountedLoopEnd cop cmp);
12116   effect(USE labl);
12117 
12118   ins_cost(200);
12119   format %{ "j$cop,u   $labl\t# loop end" %}
12120   size(6);
12121   ins_encode %{
12122     Label* L = $labl$$label;
12123     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12124   %}
12125   ins_pipe(pipe_jcc);
12126 %}
12127 
12128 // mask version
12129 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12130 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
12131 %{
12132   predicate(n->has_vector_mask_set());
12133   match(CountedLoopEnd cop cr);
12134   effect(USE labl);
12135 
12136   ins_cost(400);
12137   format %{ "j$cop     $labl\t# loop end\n\t"
12138             "restorevectmask \t# vector mask restore for loops" %}
12139   size(10);
12140   ins_encode %{
12141     Label* L = $labl$$label;
12142     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12143     __ restorevectmask();
12144   %}
12145   ins_pipe(pipe_jcc);
12146 %}
12147 
12148 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12149 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12150   predicate(n->has_vector_mask_set());
12151   match(CountedLoopEnd cop cmp);
12152   effect(USE labl);
12153 
12154   ins_cost(400);
12155   format %{ "j$cop,u   $labl\t# loop end\n\t"
12156             "restorevectmask \t# vector mask restore for loops" %}
12157   size(10);
12158   ins_encode %{
12159     Label* L = $labl$$label;
12160     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12161     __ restorevectmask();
12162   %}
12163   ins_pipe(pipe_jcc);
12164 %}
12165 
12166 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12167   predicate(n->has_vector_mask_set());
12168   match(CountedLoopEnd cop cmp);
12169   effect(USE labl);
12170 
12171   ins_cost(300);
12172   format %{ "j$cop,u   $labl\t# loop end\n\t"
12173             "restorevectmask \t# vector mask restore for loops" %}
12174   size(10);
12175   ins_encode %{
12176     Label* L = $labl$$label;
12177     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12178     __ restorevectmask();
12179   %}
12180   ins_pipe(pipe_jcc);
12181 %}
12182 
12183 // Jump Direct Conditional - using unsigned comparison
12184 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12185   match(If cop cmp);
12186   effect(USE labl);
12187 
12188   ins_cost(300);
12189   format %{ "j$cop,u  $labl" %}
12190   size(6);
12191   ins_encode %{
12192     Label* L = $labl$$label;
12193     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12194   %}
12195   ins_pipe(pipe_jcc);
12196 %}
12197 
12198 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12199   match(If cop cmp);
12200   effect(USE labl);
12201 
12202   ins_cost(200);
12203   format %{ "j$cop,u  $labl" %}
12204   size(6);
12205   ins_encode %{
12206     Label* L = $labl$$label;
12207     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12208   %}
12209   ins_pipe(pipe_jcc);
12210 %}
12211 
12212 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12213   match(If cop cmp);
12214   effect(USE labl);
12215 
12216   ins_cost(200);
12217   format %{ $$template
12218     if ($cop$$cmpcode == Assembler::notEqual) {
12219       $$emit$$"jp,u   $labl\n\t"
12220       $$emit$$"j$cop,u   $labl"
12221     } else {
12222       $$emit$$"jp,u   done\n\t"
12223       $$emit$$"j$cop,u   $labl\n\t"
12224       $$emit$$"done:"
12225     }
12226   %}
12227   ins_encode %{
12228     Label* l = $labl$$label;
12229     if ($cop$$cmpcode == Assembler::notEqual) {
12230       __ jcc(Assembler::parity, *l, false);
12231       __ jcc(Assembler::notEqual, *l, false);
12232     } else if ($cop$$cmpcode == Assembler::equal) {
12233       Label done;
12234       __ jccb(Assembler::parity, done);
12235       __ jcc(Assembler::equal, *l, false);
12236       __ bind(done);
12237     } else {
12238        ShouldNotReachHere();
12239     }
12240   %}
12241   ins_pipe(pipe_jcc);
12242 %}
12243 
12244 // ============================================================================
12245 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12246 // superklass array for an instance of the superklass.  Set a hidden
12247 // internal cache on a hit (cache is checked with exposed code in
12248 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12249 // encoding ALSO sets flags.
12250 
12251 instruct partialSubtypeCheck(rdi_RegP result,
12252                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12253                              rFlagsReg cr)
12254 %{
12255   match(Set result (PartialSubtypeCheck sub super));
12256   effect(KILL rcx, KILL cr);
12257 
12258   ins_cost(1100);  // slightly larger than the next version
12259   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12260             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12261             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12262             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12263             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12264             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12265             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12266     "miss:\t" %}
12267 
12268   opcode(0x1); // Force a XOR of RDI
12269   ins_encode(enc_PartialSubtypeCheck());
12270   ins_pipe(pipe_slow);
12271 %}
12272 
12273 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12274                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12275                                      immP0 zero,
12276                                      rdi_RegP result)
12277 %{
12278   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12279   effect(KILL rcx, KILL result);
12280 
12281   ins_cost(1000);
12282   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12283             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12284             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12285             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12286             "jne,s   miss\t\t# Missed: flags nz\n\t"
12287             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12288     "miss:\t" %}
12289 
12290   opcode(0x0); // No need to XOR RDI
12291   ins_encode(enc_PartialSubtypeCheck());
12292   ins_pipe(pipe_slow);
12293 %}
12294 
12295 // ============================================================================
12296 // Branch Instructions -- short offset versions
12297 //
12298 // These instructions are used to replace jumps of a long offset (the default
12299 // match) with jumps of a shorter offset.  These instructions are all tagged
12300 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12301 // match rules in general matching.  Instead, the ADLC generates a conversion
12302 // method in the MachNode which can be used to do in-place replacement of the
12303 // long variant with the shorter variant.  The compiler will determine if a
12304 // branch can be taken by the is_short_branch_offset() predicate in the machine
12305 // specific code section of the file.
12306 
12307 // Jump Direct - Label defines a relative address from JMP+1
12308 instruct jmpDir_short(label labl) %{
12309   match(Goto);
12310   effect(USE labl);
12311 
12312   ins_cost(300);
12313   format %{ "jmp,s   $labl" %}
12314   size(2);
12315   ins_encode %{
12316     Label* L = $labl$$label;
12317     __ jmpb(*L);
12318   %}
12319   ins_pipe(pipe_jmp);
12320   ins_short_branch(1);
12321 %}
12322 
12323 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12324 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12325   match(If cop cr);
12326   effect(USE labl);
12327 
12328   ins_cost(300);
12329   format %{ "j$cop,s   $labl" %}
12330   size(2);
12331   ins_encode %{
12332     Label* L = $labl$$label;
12333     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12334   %}
12335   ins_pipe(pipe_jcc);
12336   ins_short_branch(1);
12337 %}
12338 
12339 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12340 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12341   match(CountedLoopEnd cop cr);
12342   effect(USE labl);
12343 
12344   ins_cost(300);
12345   format %{ "j$cop,s   $labl\t# loop end" %}
12346   size(2);
12347   ins_encode %{
12348     Label* L = $labl$$label;
12349     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12350   %}
12351   ins_pipe(pipe_jcc);
12352   ins_short_branch(1);
12353 %}
12354 
12355 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12356 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12357   match(CountedLoopEnd cop cmp);
12358   effect(USE labl);
12359 
12360   ins_cost(300);
12361   format %{ "j$cop,us  $labl\t# loop end" %}
12362   size(2);
12363   ins_encode %{
12364     Label* L = $labl$$label;
12365     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12366   %}
12367   ins_pipe(pipe_jcc);
12368   ins_short_branch(1);
12369 %}
12370 
12371 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12372   match(CountedLoopEnd cop cmp);
12373   effect(USE labl);
12374 
12375   ins_cost(300);
12376   format %{ "j$cop,us  $labl\t# loop end" %}
12377   size(2);
12378   ins_encode %{
12379     Label* L = $labl$$label;
12380     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12381   %}
12382   ins_pipe(pipe_jcc);
12383   ins_short_branch(1);
12384 %}
12385 
12386 // Jump Direct Conditional - using unsigned comparison
12387 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12388   match(If cop cmp);
12389   effect(USE labl);
12390 
12391   ins_cost(300);
12392   format %{ "j$cop,us  $labl" %}
12393   size(2);
12394   ins_encode %{
12395     Label* L = $labl$$label;
12396     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12397   %}
12398   ins_pipe(pipe_jcc);
12399   ins_short_branch(1);
12400 %}
12401 
12402 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12403   match(If cop cmp);
12404   effect(USE labl);
12405 
12406   ins_cost(300);
12407   format %{ "j$cop,us  $labl" %}
12408   size(2);
12409   ins_encode %{
12410     Label* L = $labl$$label;
12411     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12412   %}
12413   ins_pipe(pipe_jcc);
12414   ins_short_branch(1);
12415 %}
12416 
12417 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12418   match(If cop cmp);
12419   effect(USE labl);
12420 
12421   ins_cost(300);
12422   format %{ $$template
12423     if ($cop$$cmpcode == Assembler::notEqual) {
12424       $$emit$$"jp,u,s   $labl\n\t"
12425       $$emit$$"j$cop,u,s   $labl"
12426     } else {
12427       $$emit$$"jp,u,s   done\n\t"
12428       $$emit$$"j$cop,u,s  $labl\n\t"
12429       $$emit$$"done:"
12430     }
12431   %}
12432   size(4);
12433   ins_encode %{
12434     Label* l = $labl$$label;
12435     if ($cop$$cmpcode == Assembler::notEqual) {
12436       __ jccb(Assembler::parity, *l);
12437       __ jccb(Assembler::notEqual, *l);
12438     } else if ($cop$$cmpcode == Assembler::equal) {
12439       Label done;
12440       __ jccb(Assembler::parity, done);
12441       __ jccb(Assembler::equal, *l);
12442       __ bind(done);
12443     } else {
12444        ShouldNotReachHere();
12445     }
12446   %}
12447   ins_pipe(pipe_jcc);
12448   ins_short_branch(1);
12449 %}
12450 
12451 // ============================================================================
12452 // inlined locking and unlocking
12453 
12454 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
12455   predicate(Compile::current()->use_rtm());
12456   match(Set cr (FastLock object box));
12457   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12458   ins_cost(300);
12459   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12460   ins_encode %{
12461     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12462                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12463                  _counters, _rtm_counters, _stack_rtm_counters,
12464                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12465                  true, ra_->C->profile_rtm());
12466   %}
12467   ins_pipe(pipe_slow);
12468 %}
12469 
12470 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
12471   predicate(!Compile::current()->use_rtm());
12472   match(Set cr (FastLock object box));
12473   effect(TEMP tmp, TEMP scr, USE_KILL box);
12474   ins_cost(300);
12475   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
12476   ins_encode %{
12477     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12478                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12479   %}
12480   ins_pipe(pipe_slow);
12481 %}
12482 
12483 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
12484   match(Set cr (FastUnlock object box));
12485   effect(TEMP tmp, USE_KILL box);
12486   ins_cost(300);
12487   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
12488   ins_encode %{
12489     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12490   %}
12491   ins_pipe(pipe_slow);
12492 %}
12493 
12494 
12495 // ============================================================================
12496 // Safepoint Instructions
12497 instruct safePoint_poll(rFlagsReg cr)
12498 %{
12499   predicate(!Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
12500   match(SafePoint);
12501   effect(KILL cr);
12502 
12503   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12504             "# Safepoint: poll for GC" %}
12505   ins_cost(125);
12506   ins_encode %{
12507     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12508     __ testl(rax, addr);
12509   %}
12510   ins_pipe(ialu_reg_mem);
12511 %}
12512 
12513 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12514 %{
12515   predicate(Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
12516   match(SafePoint poll);
12517   effect(KILL cr, USE poll);
12518 
12519   format %{ "testl  rax, [$poll]\t"
12520             "# Safepoint: poll for GC" %}
12521   ins_cost(125);
12522   ins_encode %{
12523     __ relocate(relocInfo::poll_type);
12524     __ testl(rax, Address($poll$$Register, 0));
12525   %}
12526   ins_pipe(ialu_reg_mem);
12527 %}
12528 
12529 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
12530 %{
12531   predicate(SafepointMechanism::uses_thread_local_poll());
12532   match(SafePoint poll);
12533   effect(KILL cr, USE poll);
12534 
12535   format %{ "testl  rax, [$poll]\t"
12536             "# Safepoint: poll for GC" %}
12537   ins_cost(125);
12538   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
12539   ins_encode %{
12540     __ relocate(relocInfo::poll_type);
12541     address pre_pc = __ pc();
12542     __ testl(rax, Address($poll$$Register, 0));
12543     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
12544   %}
12545   ins_pipe(ialu_reg_mem);
12546 %}
12547 
12548 // ============================================================================
12549 // Procedure Call/Return Instructions
12550 // Call Java Static Instruction
12551 // Note: If this code changes, the corresponding ret_addr_offset() and
12552 //       compute_padding() functions will have to be adjusted.
12553 instruct CallStaticJavaDirect(method meth) %{
12554   match(CallStaticJava);
12555   effect(USE meth);
12556 
12557   ins_cost(300);
12558   format %{ "call,static " %}
12559   opcode(0xE8); /* E8 cd */
12560   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
12561   ins_pipe(pipe_slow);
12562   ins_alignment(4);
12563 %}
12564 
12565 // Call Java Dynamic Instruction
12566 // Note: If this code changes, the corresponding ret_addr_offset() and
12567 //       compute_padding() functions will have to be adjusted.
12568 instruct CallDynamicJavaDirect(method meth)
12569 %{
12570   match(CallDynamicJava);
12571   effect(USE meth);
12572 
12573   ins_cost(300);
12574   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12575             "call,dynamic " %}
12576   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
12577   ins_pipe(pipe_slow);
12578   ins_alignment(4);
12579 %}
12580 
12581 // Call Runtime Instruction
12582 instruct CallRuntimeDirect(method meth)
12583 %{
12584   match(CallRuntime);
12585   effect(USE meth);
12586 
12587   ins_cost(300);
12588   format %{ "call,runtime " %}
12589   ins_encode(clear_avx, Java_To_Runtime(meth));
12590   ins_pipe(pipe_slow);
12591 %}
12592 
12593 // Call runtime without safepoint
12594 instruct CallLeafDirect(method meth)
12595 %{
12596   match(CallLeaf);
12597   effect(USE meth);
12598 
12599   ins_cost(300);
12600   format %{ "call_leaf,runtime " %}
12601   ins_encode(clear_avx, Java_To_Runtime(meth));
12602   ins_pipe(pipe_slow);
12603 %}
12604 
12605 // Call runtime without safepoint
12606 // entry point is null, target holds the address to call
12607 instruct CallLeafNoFPInDirect(rRegP target)
12608 %{
12609   predicate(n->as_Call()->entry_point() == NULL);
12610   match(CallLeafNoFP target);
12611 
12612   ins_cost(300);
12613   format %{ "call_leaf_nofp,runtime indirect " %}
12614   ins_encode %{
12615      __ call($target$$Register);
12616   %}
12617 
12618   ins_pipe(pipe_slow);
12619 %}
12620 
12621 instruct CallLeafNoFPDirect(method meth)
12622 %{
12623   predicate(n->as_Call()->entry_point() != NULL);
12624   match(CallLeafNoFP);
12625   effect(USE meth);
12626 
12627   ins_cost(300);
12628   format %{ "call_leaf_nofp,runtime " %}
12629   ins_encode(clear_avx, Java_To_Runtime(meth));
12630   ins_pipe(pipe_slow);
12631 %}
12632 
12633 // Return Instruction
12634 // Remove the return address & jump to it.
12635 // Notice: We always emit a nop after a ret to make sure there is room
12636 // for safepoint patching
12637 instruct Ret()
12638 %{
12639   match(Return);
12640 
12641   format %{ "ret" %}
12642   opcode(0xC3);
12643   ins_encode(OpcP);
12644   ins_pipe(pipe_jmp);
12645 %}
12646 
12647 // Tail Call; Jump from runtime stub to Java code.
12648 // Also known as an 'interprocedural jump'.
12649 // Target of jump will eventually return to caller.
12650 // TailJump below removes the return address.
12651 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_oop)
12652 %{
12653   match(TailCall jump_target method_oop);
12654 
12655   ins_cost(300);
12656   format %{ "jmp     $jump_target\t# rbx holds method oop" %}
12657   opcode(0xFF, 0x4); /* Opcode FF /4 */
12658   ins_encode(REX_reg(jump_target), OpcP, reg_opc(jump_target));
12659   ins_pipe(pipe_jmp);
12660 %}
12661 
12662 // Tail Jump; remove the return address; jump to target.
12663 // TailCall above leaves the return address around.
12664 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12665 %{
12666   match(TailJump jump_target ex_oop);
12667 
12668   ins_cost(300);
12669   format %{ "popq    rdx\t# pop return address\n\t"
12670             "jmp     $jump_target" %}
12671   opcode(0xFF, 0x4); /* Opcode FF /4 */
12672   ins_encode(Opcode(0x5a), // popq rdx
12673              REX_reg(jump_target), OpcP, reg_opc(jump_target));
12674   ins_pipe(pipe_jmp);
12675 %}
12676 
12677 // Create exception oop: created by stack-crawling runtime code.
12678 // Created exception is now available to this handler, and is setup
12679 // just prior to jumping to this handler.  No code emitted.
12680 instruct CreateException(rax_RegP ex_oop)
12681 %{
12682   match(Set ex_oop (CreateEx));
12683 
12684   size(0);
12685   // use the following format syntax
12686   format %{ "# exception oop is in rax; no code emitted" %}
12687   ins_encode();
12688   ins_pipe(empty);
12689 %}
12690 
12691 // Rethrow exception:
12692 // The exception oop will come in the first argument position.
12693 // Then JUMP (not call) to the rethrow stub code.
12694 instruct RethrowException()
12695 %{
12696   match(Rethrow);
12697 
12698   // use the following format syntax
12699   format %{ "jmp     rethrow_stub" %}
12700   ins_encode(enc_rethrow);
12701   ins_pipe(pipe_jmp);
12702 %}
12703 
12704 //
12705 // Execute ZGC load barrier (strong) slow path
12706 //
12707 
12708 // When running without XMM regs
12709 instruct loadBarrierSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{
12710 
12711   match(Set dst (LoadBarrierSlowReg mem));
12712   predicate(MaxVectorSize < 16);
12713 
12714   effect(DEF dst, KILL cr);
12715 
12716   format %{"LoadBarrierSlowRegNoVec $dst, $mem" %}
12717   ins_encode %{
12718 #if INCLUDE_ZGC
12719     Register d = $dst$$Register;
12720     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12721 
12722     assert(d != r12, "Can't be R12!");
12723     assert(d != r15, "Can't be R15!");
12724     assert(d != rsp, "Can't be RSP!");
12725 
12726     __ lea(d, $mem$$Address);
12727     __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
12728 #else
12729     ShouldNotReachHere();
12730 #endif
12731   %}
12732   ins_pipe(pipe_slow);
12733 %}
12734 
12735 // For XMM and YMM enabled processors
12736 instruct loadBarrierSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr,
12737                                      rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
12738                                      rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
12739                                      rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
12740                                      rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
12741 
12742   match(Set dst (LoadBarrierSlowReg mem));
12743   predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16));
12744 
12745   effect(DEF dst, KILL cr,
12746          KILL x0, KILL x1, KILL x2, KILL x3,
12747          KILL x4, KILL x5, KILL x6, KILL x7,
12748          KILL x8, KILL x9, KILL x10, KILL x11,
12749          KILL x12, KILL x13, KILL x14, KILL x15);
12750 
12751   format %{"LoadBarrierSlowRegXmm $dst, $mem" %}
12752   ins_encode %{
12753 #if INCLUDE_ZGC
12754     Register d = $dst$$Register;
12755     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12756 
12757     assert(d != r12, "Can't be R12!");
12758     assert(d != r15, "Can't be R15!");
12759     assert(d != rsp, "Can't be RSP!");
12760 
12761     __ lea(d, $mem$$Address);
12762     __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
12763 #else
12764     ShouldNotReachHere();
12765 #endif
12766   %}
12767   ins_pipe(pipe_slow);
12768 %}
12769 
12770 // For ZMM enabled processors
12771 instruct loadBarrierSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr,
12772                                rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
12773                                rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
12774                                rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
12775                                rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
12776                                rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
12777                                rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
12778                                rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
12779                                rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
12780 
12781   match(Set dst (LoadBarrierSlowReg mem));
12782   predicate((UseAVX == 3) && (MaxVectorSize >= 16));
12783 
12784   effect(DEF dst, KILL cr,
12785          KILL x0, KILL x1, KILL x2, KILL x3,
12786          KILL x4, KILL x5, KILL x6, KILL x7,
12787          KILL x8, KILL x9, KILL x10, KILL x11,
12788          KILL x12, KILL x13, KILL x14, KILL x15,
12789          KILL x16, KILL x17, KILL x18, KILL x19,
12790          KILL x20, KILL x21, KILL x22, KILL x23,
12791          KILL x24, KILL x25, KILL x26, KILL x27,
12792          KILL x28, KILL x29, KILL x30, KILL x31);
12793 
12794   format %{"LoadBarrierSlowRegZmm $dst, $mem" %}
12795   ins_encode %{
12796 #if INCLUDE_ZGC
12797     Register d = $dst$$Register;
12798     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12799 
12800     assert(d != r12, "Can't be R12!");
12801     assert(d != r15, "Can't be R15!");
12802     assert(d != rsp, "Can't be RSP!");
12803 
12804     __ lea(d, $mem$$Address);
12805     __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
12806 #else
12807     ShouldNotReachHere();
12808 #endif
12809   %}
12810   ins_pipe(pipe_slow);
12811 %}
12812 
12813 //
12814 // Execute ZGC load barrier (weak) slow path
12815 //
12816 
12817 // When running without XMM regs
12818 instruct loadBarrierWeakSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{
12819 
12820   match(Set dst (LoadBarrierSlowReg mem));
12821   predicate(MaxVectorSize < 16);
12822 
12823   effect(DEF dst, KILL cr);
12824 
12825   format %{"LoadBarrierSlowRegNoVec $dst, $mem" %}
12826   ins_encode %{
12827 #if INCLUDE_ZGC
12828     Register d = $dst$$Register;
12829     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12830 
12831     assert(d != r12, "Can't be R12!");
12832     assert(d != r15, "Can't be R15!");
12833     assert(d != rsp, "Can't be RSP!");
12834 
12835     __ lea(d, $mem$$Address);
12836     __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
12837 #else
12838     ShouldNotReachHere();
12839 #endif
12840   %}
12841   ins_pipe(pipe_slow);
12842 %}
12843 
12844 // For XMM and YMM enabled processors
12845 instruct loadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr,
12846                                          rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
12847                                          rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
12848                                          rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
12849                                          rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
12850 
12851   match(Set dst (LoadBarrierWeakSlowReg mem));
12852   predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16));
12853 
12854   effect(DEF dst, KILL cr,
12855          KILL x0, KILL x1, KILL x2, KILL x3,
12856          KILL x4, KILL x5, KILL x6, KILL x7,
12857          KILL x8, KILL x9, KILL x10, KILL x11,
12858          KILL x12, KILL x13, KILL x14, KILL x15);
12859 
12860   format %{"LoadBarrierWeakSlowRegXmm $dst, $mem" %}
12861   ins_encode %{
12862 #if INCLUDE_ZGC
12863     Register d = $dst$$Register;
12864     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12865 
12866     assert(d != r12, "Can't be R12!");
12867     assert(d != r15, "Can't be R15!");
12868     assert(d != rsp, "Can't be RSP!");
12869 
12870     __ lea(d,$mem$$Address);
12871     __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
12872 #else
12873     ShouldNotReachHere();
12874 #endif
12875   %}
12876   ins_pipe(pipe_slow);
12877 %}
12878 
12879 // For ZMM enabled processors
12880 instruct loadBarrierWeakSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr,
12881                                    rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
12882                                    rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
12883                                    rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
12884                                    rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
12885                                    rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
12886                                    rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
12887                                    rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
12888                                    rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
12889 
12890   match(Set dst (LoadBarrierWeakSlowReg mem));
12891   predicate((UseAVX == 3) && (MaxVectorSize >= 16));
12892 
12893   effect(DEF dst, KILL cr,
12894          KILL x0, KILL x1, KILL x2, KILL x3,
12895          KILL x4, KILL x5, KILL x6, KILL x7,
12896          KILL x8, KILL x9, KILL x10, KILL x11,
12897          KILL x12, KILL x13, KILL x14, KILL x15,
12898          KILL x16, KILL x17, KILL x18, KILL x19,
12899          KILL x20, KILL x21, KILL x22, KILL x23,
12900          KILL x24, KILL x25, KILL x26, KILL x27,
12901          KILL x28, KILL x29, KILL x30, KILL x31);
12902 
12903   format %{"LoadBarrierWeakSlowRegZmm $dst, $mem" %}
12904   ins_encode %{
12905 #if INCLUDE_ZGC
12906     Register d = $dst$$Register;
12907     ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
12908 
12909     assert(d != r12, "Can't be R12!");
12910     assert(d != r15, "Can't be R15!");
12911     assert(d != rsp, "Can't be RSP!");
12912 
12913     __ lea(d,$mem$$Address);
12914     __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
12915 #else
12916     ShouldNotReachHere();
12917 #endif
12918   %}
12919   ins_pipe(pipe_slow);
12920 %}
12921 
12922 // ============================================================================
12923 // This name is KNOWN by the ADLC and cannot be changed.
12924 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12925 // for this guy.
12926 instruct tlsLoadP(r15_RegP dst) %{
12927   match(Set dst (ThreadLocal));
12928   effect(DEF dst);
12929 
12930   size(0);
12931   format %{ "# TLS is in R15" %}
12932   ins_encode( /*empty encoding*/ );
12933   ins_pipe(ialu_reg_reg);
12934 %}
12935 
12936 
12937 //----------PEEPHOLE RULES-----------------------------------------------------
12938 // These must follow all instruction definitions as they use the names
12939 // defined in the instructions definitions.
12940 //
12941 // peepmatch ( root_instr_name [preceding_instruction]* );
12942 //
12943 // peepconstraint %{
12944 // (instruction_number.operand_name relational_op instruction_number.operand_name
12945 //  [, ...] );
12946 // // instruction numbers are zero-based using left to right order in peepmatch
12947 //
12948 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12949 // // provide an instruction_number.operand_name for each operand that appears
12950 // // in the replacement instruction's match rule
12951 //
12952 // ---------VM FLAGS---------------------------------------------------------
12953 //
12954 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12955 //
12956 // Each peephole rule is given an identifying number starting with zero and
12957 // increasing by one in the order seen by the parser.  An individual peephole
12958 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12959 // on the command-line.
12960 //
12961 // ---------CURRENT LIMITATIONS----------------------------------------------
12962 //
12963 // Only match adjacent instructions in same basic block
12964 // Only equality constraints
12965 // Only constraints between operands, not (0.dest_reg == RAX_enc)
12966 // Only one replacement instruction
12967 //
12968 // ---------EXAMPLE----------------------------------------------------------
12969 //
12970 // // pertinent parts of existing instructions in architecture description
12971 // instruct movI(rRegI dst, rRegI src)
12972 // %{
12973 //   match(Set dst (CopyI src));
12974 // %}
12975 //
12976 // instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
12977 // %{
12978 //   match(Set dst (AddI dst src));
12979 //   effect(KILL cr);
12980 // %}
12981 //
12982 // // Change (inc mov) to lea
12983 // peephole %{
12984 //   // increment preceeded by register-register move
12985 //   peepmatch ( incI_rReg movI );
12986 //   // require that the destination register of the increment
12987 //   // match the destination register of the move
12988 //   peepconstraint ( 0.dst == 1.dst );
12989 //   // construct a replacement instruction that sets
12990 //   // the destination to ( move's source register + one )
12991 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
12992 // %}
12993 //
12994 
12995 // Implementation no longer uses movX instructions since
12996 // machine-independent system no longer uses CopyX nodes.
12997 //
12998 // peephole
12999 // %{
13000 //   peepmatch (incI_rReg movI);
13001 //   peepconstraint (0.dst == 1.dst);
13002 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13003 // %}
13004 
13005 // peephole
13006 // %{
13007 //   peepmatch (decI_rReg movI);
13008 //   peepconstraint (0.dst == 1.dst);
13009 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13010 // %}
13011 
13012 // peephole
13013 // %{
13014 //   peepmatch (addI_rReg_imm movI);
13015 //   peepconstraint (0.dst == 1.dst);
13016 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13017 // %}
13018 
13019 // peephole
13020 // %{
13021 //   peepmatch (incL_rReg movL);
13022 //   peepconstraint (0.dst == 1.dst);
13023 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13024 // %}
13025 
13026 // peephole
13027 // %{
13028 //   peepmatch (decL_rReg movL);
13029 //   peepconstraint (0.dst == 1.dst);
13030 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13031 // %}
13032 
13033 // peephole
13034 // %{
13035 //   peepmatch (addL_rReg_imm movL);
13036 //   peepconstraint (0.dst == 1.dst);
13037 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13038 // %}
13039 
13040 // peephole
13041 // %{
13042 //   peepmatch (addP_rReg_imm movP);
13043 //   peepconstraint (0.dst == 1.dst);
13044 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
13045 // %}
13046 
13047 // // Change load of spilled value to only a spill
13048 // instruct storeI(memory mem, rRegI src)
13049 // %{
13050 //   match(Set mem (StoreI mem src));
13051 // %}
13052 //
13053 // instruct loadI(rRegI dst, memory mem)
13054 // %{
13055 //   match(Set dst (LoadI mem));
13056 // %}
13057 //
13058 
13059 peephole
13060 %{
13061   peepmatch (loadI storeI);
13062   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13063   peepreplace (storeI(1.mem 1.mem 1.src));
13064 %}
13065 
13066 peephole
13067 %{
13068   peepmatch (loadL storeL);
13069   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13070   peepreplace (storeL(1.mem 1.mem 1.src));
13071 %}
13072 
13073 //----------SMARTSPILL RULES---------------------------------------------------
13074 // These must follow all instruction definitions as they use the names
13075 // defined in the instructions definitions.