1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039   // graph traversal helpers
1040 
1041   MemBarNode *parent_membar(const Node *n);
1042   MemBarNode *child_membar(const MemBarNode *n);
1043   bool leading_membar(const MemBarNode *barrier);
1044 
1045   bool is_card_mark_membar(const MemBarNode *barrier);
1046   bool is_CAS(int opcode);
1047 
1048   MemBarNode *leading_to_normal(MemBarNode *leading);
1049   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1050   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1051   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1052   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1053 
1054   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1055 
1056   bool unnecessary_acquire(const Node *barrier);
1057   bool needs_acquiring_load(const Node *load);
1058 
1059   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1060 
1061   bool unnecessary_release(const Node *barrier);
1062   bool unnecessary_volatile(const Node *barrier);
1063   bool needs_releasing_store(const Node *store);
1064 
1065   // predicate controlling translation of CompareAndSwapX
1066   bool needs_acquiring_load_exclusive(const Node *load);
1067 
1068   // predicate controlling translation of StoreCM
1069   bool unnecessary_storestore(const Node *storecm);
1070 
1071   // predicate controlling addressing modes
1072   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1073 %}
1074 
1075 source %{
1076 
1077   // Optimizaton of volatile gets and puts
1078   // -------------------------------------
1079   //
1080   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1081   // use to implement volatile reads and writes. For a volatile read
1082   // we simply need
1083   //
1084   //   ldar<x>
1085   //
1086   // and for a volatile write we need
1087   //
1088   //   stlr<x>
1089   //
1090   // Alternatively, we can implement them by pairing a normal
1091   // load/store with a memory barrier. For a volatile read we need
1092   //
1093   //   ldr<x>
1094   //   dmb ishld
1095   //
1096   // for a volatile write
1097   //
1098   //   dmb ish
1099   //   str<x>
1100   //   dmb ish
1101   //
1102   // We can also use ldaxr and stlxr to implement compare and swap CAS
1103   // sequences. These are normally translated to an instruction
1104   // sequence like the following
1105   //
1106   //   dmb      ish
1107   // retry:
1108   //   ldxr<x>   rval raddr
1109   //   cmp       rval rold
1110   //   b.ne done
1111   //   stlxr<x>  rval, rnew, rold
1112   //   cbnz      rval retry
1113   // done:
1114   //   cset      r0, eq
1115   //   dmb ishld
1116   //
1117   // Note that the exclusive store is already using an stlxr
1118   // instruction. That is required to ensure visibility to other
1119   // threads of the exclusive write (assuming it succeeds) before that
1120   // of any subsequent writes.
1121   //
1122   // The following instruction sequence is an improvement on the above
1123   //
1124   // retry:
1125   //   ldaxr<x>  rval raddr
1126   //   cmp       rval rold
1127   //   b.ne done
1128   //   stlxr<x>  rval, rnew, rold
1129   //   cbnz      rval retry
1130   // done:
1131   //   cset      r0, eq
1132   //
1133   // We don't need the leading dmb ish since the stlxr guarantees
1134   // visibility of prior writes in the case that the swap is
1135   // successful. Crucially we don't have to worry about the case where
1136   // the swap is not successful since no valid program should be
1137   // relying on visibility of prior changes by the attempting thread
1138   // in the case where the CAS fails.
1139   //
1140   // Similarly, we don't need the trailing dmb ishld if we substitute
1141   // an ldaxr instruction since that will provide all the guarantees we
1142   // require regarding observation of changes made by other threads
1143   // before any change to the CAS address observed by the load.
1144   //
1145   // In order to generate the desired instruction sequence we need to
1146   // be able to identify specific 'signature' ideal graph node
1147   // sequences which i) occur as a translation of a volatile reads or
1148   // writes or CAS operations and ii) do not occur through any other
1149   // translation or graph transformation. We can then provide
1150   // alternative aldc matching rules which translate these node
1151   // sequences to the desired machine code sequences. Selection of the
1152   // alternative rules can be implemented by predicates which identify
1153   // the relevant node sequences.
1154   //
1155   // The ideal graph generator translates a volatile read to the node
1156   // sequence
1157   //
1158   //   LoadX[mo_acquire]
1159   //   MemBarAcquire
1160   //
1161   // As a special case when using the compressed oops optimization we
1162   // may also see this variant
1163   //
1164   //   LoadN[mo_acquire]
1165   //   DecodeN
1166   //   MemBarAcquire
1167   //
1168   // A volatile write is translated to the node sequence
1169   //
1170   //   MemBarRelease
1171   //   StoreX[mo_release] {CardMark}-optional
1172   //   MemBarVolatile
1173   //
1174   // n.b. the above node patterns are generated with a strict
1175   // 'signature' configuration of input and output dependencies (see
1176   // the predicates below for exact details). The card mark may be as
1177   // simple as a few extra nodes or, in a few GC configurations, may
1178   // include more complex control flow between the leading and
1179   // trailing memory barriers. However, whatever the card mark
1180   // configuration these signatures are unique to translated volatile
1181   // reads/stores -- they will not appear as a result of any other
1182   // bytecode translation or inlining nor as a consequence of
1183   // optimizing transforms.
1184   //
1185   // We also want to catch inlined unsafe volatile gets and puts and
1186   // be able to implement them using either ldar<x>/stlr<x> or some
1187   // combination of ldr<x>/stlr<x> and dmb instructions.
1188   //
1189   // Inlined unsafe volatiles puts manifest as a minor variant of the
1190   // normal volatile put node sequence containing an extra cpuorder
1191   // membar
1192   //
1193   //   MemBarRelease
1194   //   MemBarCPUOrder
1195   //   StoreX[mo_release] {CardMark}-optional
1196   //   MemBarCPUOrder
1197   //   MemBarVolatile
1198   //
1199   // n.b. as an aside, a cpuorder membar is not itself subject to
1200   // matching and translation by adlc rules.  However, the rule
1201   // predicates need to detect its presence in order to correctly
1202   // select the desired adlc rules.
1203   //
1204   // Inlined unsafe volatile gets manifest as a slightly different
1205   // node sequence to a normal volatile get because of the
1206   // introduction of some CPUOrder memory barriers to bracket the
1207   // Load. However, but the same basic skeleton of a LoadX feeding a
1208   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1209   // present
1210   //
1211   //   MemBarCPUOrder
1212   //        ||       \\
1213   //   MemBarCPUOrder LoadX[mo_acquire]
1214   //        ||            |
1215   //        ||       {DecodeN} optional
1216   //        ||       /
1217   //     MemBarAcquire
1218   //
1219   // In this case the acquire membar does not directly depend on the
1220   // load. However, we can be sure that the load is generated from an
1221   // inlined unsafe volatile get if we see it dependent on this unique
1222   // sequence of membar nodes. Similarly, given an acquire membar we
1223   // can know that it was added because of an inlined unsafe volatile
1224   // get if it is fed and feeds a cpuorder membar and if its feed
1225   // membar also feeds an acquiring load.
1226   //
1227   // Finally an inlined (Unsafe) CAS operation is translated to the
1228   // following ideal graph
1229   //
1230   //   MemBarRelease
1231   //   MemBarCPUOrder
1232   //   CompareAndSwapX {CardMark}-optional
1233   //   MemBarCPUOrder
1234   //   MemBarAcquire
1235   //
1236   // So, where we can identify these volatile read and write
1237   // signatures we can choose to plant either of the above two code
1238   // sequences. For a volatile read we can simply plant a normal
1239   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1240   // also choose to inhibit translation of the MemBarAcquire and
1241   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1242   //
1243   // When we recognise a volatile store signature we can choose to
1244   // plant at a dmb ish as a translation for the MemBarRelease, a
1245   // normal str<x> and then a dmb ish for the MemBarVolatile.
1246   // Alternatively, we can inhibit translation of the MemBarRelease
1247   // and MemBarVolatile and instead plant a simple stlr<x>
1248   // instruction.
1249   //
1250   // when we recognise a CAS signature we can choose to plant a dmb
1251   // ish as a translation for the MemBarRelease, the conventional
1252   // macro-instruction sequence for the CompareAndSwap node (which
1253   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1254   // Alternatively, we can elide generation of the dmb instructions
1255   // and plant the alternative CompareAndSwap macro-instruction
1256   // sequence (which uses ldaxr<x>).
1257   //
1258   // Of course, the above only applies when we see these signature
1259   // configurations. We still want to plant dmb instructions in any
1260   // other cases where we may see a MemBarAcquire, MemBarRelease or
1261   // MemBarVolatile. For example, at the end of a constructor which
1262   // writes final/volatile fields we will see a MemBarRelease
1263   // instruction and this needs a 'dmb ish' lest we risk the
1264   // constructed object being visible without making the
1265   // final/volatile field writes visible.
1266   //
1267   // n.b. the translation rules below which rely on detection of the
1268   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1269   // If we see anything other than the signature configurations we
1270   // always just translate the loads and stores to ldr<x> and str<x>
1271   // and translate acquire, release and volatile membars to the
1272   // relevant dmb instructions.
1273   //
1274 
1275   // graph traversal helpers used for volatile put/get and CAS
1276   // optimization
1277 
1278   // 1) general purpose helpers
1279 
1280   // if node n is linked to a parent MemBarNode by an intervening
1281   // Control and Memory ProjNode return the MemBarNode otherwise return
1282   // NULL.
1283   //
1284   // n may only be a Load or a MemBar.
1285 
1286   MemBarNode *parent_membar(const Node *n)
1287   {
1288     Node *ctl = NULL;
1289     Node *mem = NULL;
1290     Node *membar = NULL;
1291 
1292     if (n->is_Load()) {
1293       ctl = n->lookup(LoadNode::Control);
1294       mem = n->lookup(LoadNode::Memory);
1295     } else if (n->is_MemBar()) {
1296       ctl = n->lookup(TypeFunc::Control);
1297       mem = n->lookup(TypeFunc::Memory);
1298     } else {
1299         return NULL;
1300     }
1301 
1302     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1303       return NULL;
1304     }
1305 
1306     membar = ctl->lookup(0);
1307 
1308     if (!membar || !membar->is_MemBar()) {
1309       return NULL;
1310     }
1311 
1312     if (mem->lookup(0) != membar) {
1313       return NULL;
1314     }
1315 
1316     return membar->as_MemBar();
1317   }
1318 
1319   // if n is linked to a child MemBarNode by intervening Control and
1320   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1321 
1322   MemBarNode *child_membar(const MemBarNode *n)
1323   {
1324     ProjNode *ctl = n->proj_out_or_null(TypeFunc::Control);
1325     ProjNode *mem = n->proj_out_or_null(TypeFunc::Memory);
1326 
1327     // MemBar needs to have both a Ctl and Mem projection
1328     if (! ctl || ! mem)
1329       return NULL;
1330 
1331     MemBarNode *child = NULL;
1332     Node *x;
1333 
1334     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1335       x = ctl->fast_out(i);
1336       // if we see a membar we keep hold of it. we may also see a new
1337       // arena copy of the original but it will appear later
1338       if (x->is_MemBar()) {
1339           child = x->as_MemBar();
1340           break;
1341       }
1342     }
1343 
1344     if (child == NULL) {
1345       return NULL;
1346     }
1347 
1348     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1349       x = mem->fast_out(i);
1350       // if we see a membar we keep hold of it. we may also see a new
1351       // arena copy of the original but it will appear later
1352       if (x == child) {
1353         return child;
1354       }
1355     }
1356     return NULL;
1357   }
1358 
1359   // helper predicate use to filter candidates for a leading memory
1360   // barrier
1361   //
1362   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1363   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1364 
1365   bool leading_membar(const MemBarNode *barrier)
1366   {
1367     int opcode = barrier->Opcode();
1368     // if this is a release membar we are ok
1369     if (opcode == Op_MemBarRelease) {
1370       return true;
1371     }
1372     // if its a cpuorder membar . . .
1373     if (opcode != Op_MemBarCPUOrder) {
1374       return false;
1375     }
1376     // then the parent has to be a release membar
1377     MemBarNode *parent = parent_membar(barrier);
1378     if (!parent) {
1379       return false;
1380     }
1381     opcode = parent->Opcode();
1382     return opcode == Op_MemBarRelease;
1383   }
1384 
1385   // 2) card mark detection helper
1386 
1387   // helper predicate which can be used to detect a volatile membar
1388   // introduced as part of a conditional card mark sequence either by
1389   // G1 or by CMS when UseCondCardMark is true.
1390   //
1391   // membar can be definitively determined to be part of a card mark
1392   // sequence if and only if all the following hold
1393   //
1394   // i) it is a MemBarVolatile
1395   //
1396   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1397   // true
1398   //
1399   // iii) the node's Mem projection feeds a StoreCM node.
1400 
1401   bool is_card_mark_membar(const MemBarNode *barrier)
1402   {
1403     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1404       return false;
1405     }
1406 
1407     if (barrier->Opcode() != Op_MemBarVolatile) {
1408       return false;
1409     }
1410 
1411     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1412 
1413     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1414       Node *y = mem->fast_out(i);
1415       if (y->Opcode() == Op_StoreCM) {
1416         return true;
1417       }
1418     }
1419 
1420     return false;
1421   }
1422 
1423 
1424   // 3) helper predicates to traverse volatile put or CAS graphs which
1425   // may contain GC barrier subgraphs
1426 
1427   // Preamble
1428   // --------
1429   //
1430   // for volatile writes we can omit generating barriers and employ a
1431   // releasing store when we see a node sequence sequence with a
1432   // leading MemBarRelease and a trailing MemBarVolatile as follows
1433   //
1434   //   MemBarRelease
1435   //  {      ||      } -- optional
1436   //  {MemBarCPUOrder}
1437   //         ||     \\
1438   //         ||     StoreX[mo_release]
1439   //         | \     /
1440   //         | MergeMem
1441   //         | /
1442   //  {MemBarCPUOrder} -- optional
1443   //  {      ||      }
1444   //   MemBarVolatile
1445   //
1446   // where
1447   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1448   //  | \ and / indicate further routing of the Ctl and Mem feeds
1449   //
1450   // this is the graph we see for non-object stores. however, for a
1451   // volatile Object store (StoreN/P) we may see other nodes below the
1452   // leading membar because of the need for a GC pre- or post-write
1453   // barrier.
1454   //
1455   // with most GC configurations we with see this simple variant which
1456   // includes a post-write barrier card mark.
1457   //
1458   //   MemBarRelease______________________________
1459   //         ||    \\               Ctl \        \\
1460   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1461   //         | \     /                       . . .  /
1462   //         | MergeMem
1463   //         | /
1464   //         ||      /
1465   //  {MemBarCPUOrder} -- optional
1466   //  {      ||      }
1467   //   MemBarVolatile
1468   //
1469   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1470   // the object address to an int used to compute the card offset) and
1471   // Ctl+Mem to a StoreB node (which does the actual card mark).
1472   //
1473   // n.b. a StoreCM node will only appear in this configuration when
1474   // using CMS or G1. StoreCM differs from a normal card mark write (StoreB)
1475   // because it implies a requirement to order visibility of the card
1476   // mark (StoreCM) relative to the object put (StoreP/N) using a
1477   // StoreStore memory barrier (arguably this ought to be represented
1478   // explicitly in the ideal graph but that is not how it works). This
1479   // ordering is required for both non-volatile and volatile
1480   // puts. Normally that means we need to translate a StoreCM using
1481   // the sequence
1482   //
1483   //   dmb ishst
1484   //   strb
1485   //
1486   // However, when using G1 or CMS with conditional card marking (as
1487   // we shall see) we don't need to insert the dmb when translating
1488   // StoreCM because there is already an intervening StoreLoad barrier
1489   // between it and the StoreP/N.
1490   //
1491   // It is also possible to perform the card mark conditionally on it
1492   // currently being unmarked in which case the volatile put graph
1493   // will look slightly different
1494   //
1495   //   MemBarRelease____________________________________________
1496   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1497   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1498   //         | \     /                              \            |
1499   //         | MergeMem                            . . .      StoreB
1500   //         | /                                                /
1501   //         ||     /
1502   //   MemBarVolatile
1503   //
1504   // It is worth noting at this stage that both the above
1505   // configurations can be uniquely identified by checking that the
1506   // memory flow includes the following subgraph:
1507   //
1508   //   MemBarRelease
1509   //  {MemBarCPUOrder}
1510   //          |  \      . . .
1511   //          |  StoreX[mo_release]  . . .
1512   //          |   /
1513   //         MergeMem
1514   //          |
1515   //  {MemBarCPUOrder}
1516   //   MemBarVolatile
1517   //
1518   // This is referred to as a *normal* subgraph. It can easily be
1519   // detected starting from any candidate MemBarRelease,
1520   // StoreX[mo_release] or MemBarVolatile.
1521   //
1522   // A simple variation on this normal case occurs for an unsafe CAS
1523   // operation. The basic graph for a non-object CAS is
1524   //
1525   //   MemBarRelease
1526   //         ||
1527   //   MemBarCPUOrder
1528   //         ||     \\   . . .
1529   //         ||     CompareAndSwapX
1530   //         ||       |
1531   //         ||     SCMemProj
1532   //         | \     /
1533   //         | MergeMem
1534   //         | /
1535   //   MemBarCPUOrder
1536   //         ||
1537   //   MemBarAcquire
1538   //
1539   // The same basic variations on this arrangement (mutatis mutandis)
1540   // occur when a card mark is introduced. i.e. we se the same basic
1541   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1542   // tail of the graph is a pair comprising a MemBarCPUOrder +
1543   // MemBarAcquire.
1544   //
1545   // So, in the case of a CAS the normal graph has the variant form
1546   //
1547   //   MemBarRelease
1548   //   MemBarCPUOrder
1549   //          |   \      . . .
1550   //          |  CompareAndSwapX  . . .
1551   //          |    |
1552   //          |   SCMemProj
1553   //          |   /  . . .
1554   //         MergeMem
1555   //          |
1556   //   MemBarCPUOrder
1557   //   MemBarAcquire
1558   //
1559   // This graph can also easily be detected starting from any
1560   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1561   //
1562   // the code below uses two helper predicates, leading_to_normal and
1563   // normal_to_leading to identify these normal graphs, one validating
1564   // the layout starting from the top membar and searching down and
1565   // the other validating the layout starting from the lower membar
1566   // and searching up.
1567   //
1568   // There are two special case GC configurations when a normal graph
1569   // may not be generated: when using G1 (which always employs a
1570   // conditional card mark); and when using CMS with conditional card
1571   // marking configured. These GCs are both concurrent rather than
1572   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1573   // graph between the leading and trailing membar nodes, in
1574   // particular enforcing stronger memory serialisation beween the
1575   // object put and the corresponding conditional card mark. CMS
1576   // employs a post-write GC barrier while G1 employs both a pre- and
1577   // post-write GC barrier. Of course the extra nodes may be absent --
1578   // they are only inserted for object puts/swaps. This significantly
1579   // complicates the task of identifying whether a MemBarRelease,
1580   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1581   // when using these GC configurations (see below). It adds similar
1582   // complexity to the task of identifying whether a MemBarRelease,
1583   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1584   //
1585   // In both cases the post-write subtree includes an auxiliary
1586   // MemBarVolatile (StoreLoad barrier) separating the object put/swap
1587   // and the read of the corresponding card. This poses two additional
1588   // problems.
1589   //
1590   // Firstly, a card mark MemBarVolatile needs to be distinguished
1591   // from a normal trailing MemBarVolatile. Resolving this first
1592   // problem is straightforward: a card mark MemBarVolatile always
1593   // projects a Mem feed to a StoreCM node and that is a unique marker
1594   //
1595   //      MemBarVolatile (card mark)
1596   //       C |    \     . . .
1597   //         |   StoreCM   . . .
1598   //       . . .
1599   //
1600   // The second problem is how the code generator is to translate the
1601   // card mark barrier? It always needs to be translated to a "dmb
1602   // ish" instruction whether or not it occurs as part of a volatile
1603   // put. A StoreLoad barrier is needed after the object put to ensure
1604   // i) visibility to GC threads of the object put and ii) visibility
1605   // to the mutator thread of any card clearing write by a GC
1606   // thread. Clearly a normal store (str) will not guarantee this
1607   // ordering but neither will a releasing store (stlr). The latter
1608   // guarantees that the object put is visible but does not guarantee
1609   // that writes by other threads have also been observed.
1610   //
1611   // So, returning to the task of translating the object put and the
1612   // leading/trailing membar nodes: what do the non-normal node graph
1613   // look like for these 2 special cases? and how can we determine the
1614   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1615   // in both normal and non-normal cases?
1616   //
1617   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1618   // which selects conditonal execution based on the value loaded
1619   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1620   // intervening StoreLoad barrier (MemBarVolatile).
1621   //
1622   // So, with CMS we may see a node graph for a volatile object store
1623   // which looks like this
1624   //
1625   //   MemBarRelease
1626   //  {MemBarCPUOrder}_(leading)_________________
1627   //     C |    M \       \\                   C \
1628   //       |       \    StoreN/P[mo_release]  CastP2X
1629   //       |    Bot \    /
1630   //       |       MergeMem
1631   //       |         /
1632   //      MemBarVolatile (card mark)
1633   //     C |  ||    M |
1634   //       | LoadB    |
1635   //       |   |      |
1636   //       | Cmp      |\
1637   //       | /        | \
1638   //       If         |  \
1639   //       | \        |   \
1640   // IfFalse  IfTrue  |    \
1641   //       \     / \  |     \
1642   //        \   / StoreCM    |
1643   //         \ /      |      |
1644   //        Region   . . .   |
1645   //          | \           /
1646   //          |  . . .  \  / Bot
1647   //          |       MergeMem
1648   //          |          |
1649   //       {MemBarCPUOrder}
1650   //        MemBarVolatile (trailing)
1651   //
1652   // The first MergeMem merges the AliasIdxBot Mem slice from the
1653   // leading membar and the oopptr Mem slice from the Store into the
1654   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1655   // Mem slice from the card mark membar and the AliasIdxRaw slice
1656   // from the StoreCM into the trailing membar (n.b. the latter
1657   // proceeds via a Phi associated with the If region).
1658   //
1659   // The graph for a CAS varies slightly, the difference being
1660   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1661   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1662   // MemBarAcquire pair (also the MemBarCPUOrder nodes are not optional).
1663   //
1664   //   MemBarRelease
1665   //   MemBarCPUOrder_(leading)_______________
1666   //     C |    M \       \\                C \
1667   //       |       \    CompareAndSwapN/P  CastP2X
1668   //       |        \      |
1669   //       |         \   SCMemProj
1670   //       |      Bot \   /
1671   //       |        MergeMem
1672   //       |         /
1673   //      MemBarVolatile (card mark)
1674   //     C |  ||    M |
1675   //       | LoadB    |
1676   //       |   |      |
1677   //       | Cmp      |\
1678   //       | /        | \
1679   //       If         |  \
1680   //       | \        |   \
1681   // IfFalse  IfTrue  |    \
1682   //       \     / \  |     \
1683   //        \   / StoreCM    |
1684   //         \ /      |      |
1685   //        Region   . . .   |
1686   //          | \           /
1687   //          |  . . .  \  / Bot
1688   //          |       MergeMem
1689   //          |          |
1690   //        MemBarCPUOrder
1691   //        MemBarVolatile (trailing)
1692   //
1693   //
1694   // G1 is quite a lot more complicated. The nodes inserted on behalf
1695   // of G1 may comprise: a pre-write graph which adds the old value to
1696   // the SATB queue; the releasing store itself; and, finally, a
1697   // post-write graph which performs a card mark.
1698   //
1699   // The pre-write graph may be omitted, but only when the put is
1700   // writing to a newly allocated (young gen) object and then only if
1701   // there is a direct memory chain to the Initialize node for the
1702   // object allocation. This will not happen for a volatile put since
1703   // any memory chain passes through the leading membar.
1704   //
1705   // The pre-write graph includes a series of 3 If tests. The outermost
1706   // If tests whether SATB is enabled (no else case). The next If tests
1707   // whether the old value is non-NULL (no else case). The third tests
1708   // whether the SATB queue index is > 0, if so updating the queue. The
1709   // else case for this third If calls out to the runtime to allocate a
1710   // new queue buffer.
1711   //
1712   // So with G1 the pre-write and releasing store subgraph looks like
1713   // this (the nested Ifs are omitted).
1714   //
1715   //  MemBarRelease
1716   // {MemBarCPUOrder}_(leading)___________
1717   //     C |  ||  M \   M \    M \  M \ . . .
1718   //       | LoadB   \  LoadL  LoadN   \
1719   //       | /        \                 \
1720   //       If         |\                 \
1721   //       | \        | \                 \
1722   //  IfFalse  IfTrue |  \                 \
1723   //       |     |    |   \                 |
1724   //       |     If   |   /\                |
1725   //       |     |          \               |
1726   //       |                 \              |
1727   //       |    . . .         \             |
1728   //       | /       | /       |            |
1729   //      Region  Phi[M]       |            |
1730   //       | \       |         |            |
1731   //       |  \_____ | ___     |            |
1732   //     C | C \     |   C \ M |            |
1733   //       | CastP2X | StoreN/P[mo_release] |
1734   //       |         |         |            |
1735   //     C |       M |       M |          M |
1736   //        \        |         |           /
1737   //                  . . .
1738   //          (post write subtree elided)
1739   //                    . . .
1740   //             C \         M /
1741   //                \         /
1742   //             {MemBarCPUOrder}
1743   //              MemBarVolatile (trailing)
1744   //
1745   // n.b. the LoadB in this subgraph is not the card read -- it's a
1746   // read of the SATB queue active flag.
1747   //
1748   // The G1 post-write subtree is also optional, this time when the
1749   // new value being written is either null or can be identified as a
1750   // newly allocated (young gen) object with no intervening control
1751   // flow. The latter cannot happen but the former may, in which case
1752   // the card mark membar is omitted and the memory feeds form the
1753   // leading membar and the SToreN/P are merged direct into the
1754   // trailing membar as per the normal subgraph. So, the only special
1755   // case which arises is when the post-write subgraph is generated.
1756   //
1757   // The kernel of the post-write G1 subgraph is the card mark itself
1758   // which includes a card mark memory barrier (MemBarVolatile), a
1759   // card test (LoadB), and a conditional update (If feeding a
1760   // StoreCM). These nodes are surrounded by a series of nested Ifs
1761   // which try to avoid doing the card mark. The top level If skips if
1762   // the object reference does not cross regions (i.e. it tests if
1763   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1764   // need not be recorded. The next If, which skips on a NULL value,
1765   // may be absent (it is not generated if the type of value is >=
1766   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1767   // checking if card_val != young).  n.b. although this test requires
1768   // a pre-read of the card it can safely be done before the StoreLoad
1769   // barrier. However that does not bypass the need to reread the card
1770   // after the barrier. A final, 4th If tests if the card is already
1771   // marked.
1772   //
1773   //                (pre-write subtree elided)
1774   //        . . .                  . . .    . . .  . . .
1775   //        C |                    M |     M |    M |
1776   //       Region                  Phi[M] StoreN    |
1777   //          |                     / \      |      |
1778   //         / \_______            /   \     |      |
1779   //      C / C \      . . .            \    |      |
1780   //       If   CastP2X . . .            |   |      |
1781   //       / \                           |   |      |
1782   //      /   \                          |   |      |
1783   // IfFalse IfTrue                      |   |      |
1784   //   |       |                         |   |     /|
1785   //   |       If                        |   |    / |
1786   //   |      / \                        |   |   /  |
1787   //   |     /   \                        \  |  /   |
1788   //   | IfFalse IfTrue                   MergeMem  |
1789   //   |  . . .    / \                       /      |
1790   //   |          /   \                     /       |
1791   //   |     IfFalse IfTrue                /        |
1792   //   |      . . .    |                  /         |
1793   //   |               If                /          |
1794   //   |               / \              /           |
1795   //   |              /   \            /            |
1796   //   |         IfFalse IfTrue       /             |
1797   //   |           . . .   |         /              |
1798   //   |                    \       /               |
1799   //   |                     \     /                |
1800   //   |             MemBarVolatile__(card mark)    |
1801   //   |                ||   C |  M \  M \          |
1802   //   |               LoadB   If    |    |         |
1803   //   |                      / \    |    |         |
1804   //   |                     . . .   |    |         |
1805   //   |                          \  |    |        /
1806   //   |                        StoreCM   |       /
1807   //   |                          . . .   |      /
1808   //   |                        _________/      /
1809   //   |                       /  _____________/
1810   //   |   . . .       . . .  |  /            /
1811   //   |    |                 | /   _________/
1812   //   |    |               Phi[M] /        /
1813   //   |    |                 |   /        /
1814   //   |    |                 |  /        /
1815   //   |  Region  . . .     Phi[M]  _____/
1816   //   |    /                 |    /
1817   //   |                      |   /
1818   //   | . . .   . . .        |  /
1819   //   | /                    | /
1820   // Region           |  |  Phi[M]
1821   //   |              |  |  / Bot
1822   //    \            MergeMem
1823   //     \            /
1824   //    {MemBarCPUOrder}
1825   //     MemBarVolatile
1826   //
1827   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1828   // from the leading membar and the oopptr Mem slice from the Store
1829   // into the card mark membar i.e. the memory flow to the card mark
1830   // membar still looks like a normal graph.
1831   //
1832   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1833   // Mem slices (from the StoreCM and other card mark queue stores).
1834   // However in this case the AliasIdxBot Mem slice does not come
1835   // direct from the card mark membar. It is merged through a series
1836   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1837   // from the leading membar with the Mem feed from the card mark
1838   // membar. Each Phi corresponds to one of the Ifs which may skip
1839   // around the card mark membar. So when the If implementing the NULL
1840   // value check has been elided the total number of Phis is 2
1841   // otherwise it is 3.
1842   //
1843   // The CAS graph when using G1GC also includes a pre-write subgraph
1844   // and an optional post-write subgraph. The same variations are
1845   // introduced as for CMS with conditional card marking i.e. the
1846   // StoreP/N is swapped for a CompareAndSwapP/N with a following
1847   // SCMemProj, the trailing MemBarVolatile for a MemBarCPUOrder +
1848   // MemBarAcquire pair. There may be an extra If test introduced in
1849   // the CAS case, when the boolean result of the CAS is tested by the
1850   // caller. In that case an extra Region and AliasIdxBot Phi may be
1851   // introduced before the MergeMem
1852   //
1853   // So, the upshot is that in all cases the subgraph will include a
1854   // *normal* memory subgraph betwen the leading membar and its child
1855   // membar: either a normal volatile put graph including a releasing
1856   // StoreX and terminating with a trailing volatile membar or card
1857   // mark volatile membar; or a normal CAS graph including a
1858   // CompareAndSwapX + SCMemProj pair and terminating with a card mark
1859   // volatile membar or a trailing cpu order and acquire membar
1860   // pair. If the child membar is not a (volatile) card mark membar
1861   // then it marks the end of the volatile put or CAS subgraph. If the
1862   // child is a card mark membar then the normal subgraph will form
1863   // part of a larger volatile put or CAS subgraph if and only if the
1864   // child feeds an AliasIdxBot Mem feed to a trailing barrier via a
1865   // MergeMem. That feed is either direct (for CMS) or via 2, 3 or 4
1866   // Phi nodes merging the leading barrier memory flow (for G1).
1867   //
1868   // The predicates controlling generation of instructions for store
1869   // and barrier nodes employ a few simple helper functions (described
1870   // below) which identify the presence or absence of all these
1871   // subgraph configurations and provide a means of traversing from
1872   // one node in the subgraph to another.
1873 
1874   // is_CAS(int opcode)
1875   //
1876   // return true if opcode is one of the possible CompareAndSwapX
1877   // values otherwise false.
1878 
1879   bool is_CAS(int opcode)
1880   {
1881     switch(opcode) {
1882       // We handle these
1883     case Op_CompareAndSwapI:
1884     case Op_CompareAndSwapL:
1885     case Op_CompareAndSwapP:
1886     case Op_CompareAndSwapN:
1887  // case Op_CompareAndSwapB:
1888  // case Op_CompareAndSwapS:
1889       return true;
1890       // These are TBD
1891     case Op_WeakCompareAndSwapB:
1892     case Op_WeakCompareAndSwapS:
1893     case Op_WeakCompareAndSwapI:
1894     case Op_WeakCompareAndSwapL:
1895     case Op_WeakCompareAndSwapP:
1896     case Op_WeakCompareAndSwapN:
1897     case Op_CompareAndExchangeB:
1898     case Op_CompareAndExchangeS:
1899     case Op_CompareAndExchangeI:
1900     case Op_CompareAndExchangeL:
1901     case Op_CompareAndExchangeP:
1902     case Op_CompareAndExchangeN:
1903       return false;
1904     default:
1905       return false;
1906     }
1907   }
1908 
1909   // helper to determine the maximum number of Phi nodes we may need to
1910   // traverse when searching from a card mark membar for the merge mem
1911   // feeding a trailing membar or vice versa
1912 
1913   int max_phis()
1914   {
1915     if (UseG1GC) {
1916       return 4;
1917     } else if (UseConcMarkSweepGC && UseCondCardMark) {
1918       return 1;
1919     } else {
1920       return 0;
1921     }
1922   }
1923 
1924   // leading_to_normal
1925   //
1926   // graph traversal helper which detects the normal case Mem feed
1927   // from a release membar (or, optionally, its cpuorder child) to a
1928   // dependent volatile or acquire membar i.e. it ensures that one of
1929   // the following 3 Mem flow subgraphs is present.
1930   //
1931   //   MemBarRelease
1932   //  {MemBarCPUOrder} {leading}
1933   //          |  \      . . .
1934   //          |  StoreN/P[mo_release]  . . .
1935   //          |   /
1936   //         MergeMem
1937   //          |
1938   //  {MemBarCPUOrder}
1939   //   MemBarVolatile {trailing or card mark}
1940   //
1941   //   MemBarRelease
1942   //   MemBarCPUOrder {leading}
1943   //          |  \      . . .
1944   //          |  CompareAndSwapX  . . .
1945   //          |   /
1946   //         MergeMem
1947   //          |
1948   //   MemBarVolatile {card mark}
1949   //
1950   //   MemBarRelease
1951   //   MemBarCPUOrder {leading}
1952   //          |  \      . . .
1953   //          |  CompareAndSwapX  . . .
1954   //          |   /
1955   //         MergeMem
1956   //          |
1957   //   MemBarCPUOrder
1958   //   MemBarAcquire {trailing}
1959   //
1960   // if the correct configuration is present returns the trailing
1961   // or cardmark membar otherwise NULL.
1962   //
1963   // the input membar is expected to be either a cpuorder membar or a
1964   // release membar. in the latter case it should not have a cpu membar
1965   // child.
1966   //
1967   // the returned value may be a card mark or trailing membar
1968   //
1969 
1970   MemBarNode *leading_to_normal(MemBarNode *leading)
1971   {
1972     assert((leading->Opcode() == Op_MemBarRelease ||
1973             leading->Opcode() == Op_MemBarCPUOrder),
1974            "expecting a volatile or cpuroder membar!");
1975 
1976     // check the mem flow
1977     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1978 
1979     if (!mem) {
1980       return NULL;
1981     }
1982 
1983     Node *x = NULL;
1984     StoreNode * st = NULL;
1985     LoadStoreNode *cas = NULL;
1986     MergeMemNode *mm = NULL;
1987 
1988     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1989       x = mem->fast_out(i);
1990       if (x->is_MergeMem()) {
1991         if (mm != NULL) {
1992           return NULL;
1993         }
1994         // two merge mems is one too many
1995         mm = x->as_MergeMem();
1996       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1997         // two releasing stores/CAS nodes is one too many
1998         if (st != NULL || cas != NULL) {
1999           return NULL;
2000         }
2001         st = x->as_Store();
2002       } else if (is_CAS(x->Opcode())) {
2003         if (st != NULL || cas != NULL) {
2004           return NULL;
2005         }
2006         cas = x->as_LoadStore();
2007       }
2008     }
2009 
2010     // must have a store or a cas
2011     if (!st && !cas) {
2012       return NULL;
2013     }
2014 
2015     // must have a merge
2016     if (!mm) {
2017       return NULL;
2018     }
2019 
2020     Node *feed = NULL;
2021     if (cas) {
2022       // look for an SCMemProj
2023       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2024         x = cas->fast_out(i);
2025         if (x->Opcode() == Op_SCMemProj) {
2026           feed = x;
2027           break;
2028         }
2029       }
2030       if (feed == NULL) {
2031         return NULL;
2032       }
2033     } else {
2034       feed = st;
2035     }
2036     // ensure the feed node feeds the existing mergemem;
2037     for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2038       x = feed->fast_out(i);
2039       if (x == mm) {
2040         break;
2041       }
2042     }
2043     if (x != mm) {
2044       return NULL;
2045     }
2046 
2047     MemBarNode *mbar = NULL;
2048     // ensure the merge feeds to the expected type of membar
2049     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2050       x = mm->fast_out(i);
2051       if (x->is_MemBar()) {
2052         if (x->Opcode() == Op_MemBarCPUOrder) {
2053           // with a store any cpu order membar should precede a
2054           // trailing volatile membar. with a cas it should precede a
2055           // trailing acquire membar. in either case try to skip to
2056           // that next membar
2057           MemBarNode *y =  x->as_MemBar();
2058           y = child_membar(y);
2059           if (y != NULL) {
2060             // skip to this new membar to do the check
2061             x = y;
2062           }
2063           
2064         }
2065         if (x->Opcode() == Op_MemBarVolatile) {
2066           mbar = x->as_MemBar();
2067           // for a volatile store this can be either a trailing membar
2068           // or a card mark membar. for a cas it must be a card mark
2069           // membar
2070           guarantee(cas == NULL || is_card_mark_membar(mbar),
2071                     "in CAS graph volatile membar must be a card mark");
2072         } else if (cas != NULL && x->Opcode() == Op_MemBarAcquire) {
2073           mbar = x->as_MemBar();
2074         }
2075         break;
2076       }
2077     }
2078 
2079     return mbar;
2080   }
2081 
2082   // normal_to_leading
2083   //
2084   // graph traversal helper which detects the normal case Mem feed
2085   // from either a card mark or a trailing membar to a preceding
2086   // release membar (optionally its cpuorder child) i.e. it ensures
2087   // that one of the following 3 Mem flow subgraphs is present.
2088   //
2089   //   MemBarRelease
2090   //  {MemBarCPUOrder} {leading}
2091   //          |  \      . . .
2092   //          |  StoreN/P[mo_release]  . . .
2093   //          |   /
2094   //         MergeMem
2095   //          |
2096   //  {MemBarCPUOrder}
2097   //   MemBarVolatile {trailing or card mark}
2098   //
2099   //   MemBarRelease
2100   //   MemBarCPUOrder {leading}
2101   //          |  \      . . .
2102   //          |  CompareAndSwapX  . . .
2103   //          |   /
2104   //         MergeMem
2105   //          |
2106   //   MemBarVolatile {card mark}
2107   //
2108   //   MemBarRelease
2109   //   MemBarCPUOrder {leading}
2110   //          |  \      . . .
2111   //          |  CompareAndSwapX  . . .
2112   //          |   /
2113   //         MergeMem
2114   //          |
2115   //   MemBarCPUOrder
2116   //   MemBarAcquire {trailing}
2117   //
2118   // this predicate checks for the same flow as the previous predicate
2119   // but starting from the bottom rather than the top.
2120   //
2121   // if the configuration is present returns the cpuorder member for
2122   // preference or when absent the release membar otherwise NULL.
2123   //
2124   // n.b. the input membar is expected to be a MemBarVolatile but
2125   // need not be a card mark membar.
2126 
2127   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2128   {
2129     // input must be a volatile membar
2130     assert((barrier->Opcode() == Op_MemBarVolatile ||
2131             barrier->Opcode() == Op_MemBarAcquire),
2132            "expecting a volatile or an acquire membar");
2133     bool barrier_is_acquire = barrier->Opcode() == Op_MemBarAcquire;
2134 
2135     // if we have an intervening cpu order membar then start the
2136     // search from it
2137     
2138     Node *x = parent_membar(barrier);
2139 
2140     if (x == NULL) {
2141       // stick with the original barrier
2142       x = (Node *)barrier;
2143     } else if (x->Opcode() != Op_MemBarCPUOrder) {
2144       // any other barrier means this is not the graph we want
2145       return NULL;
2146     }
2147 
2148     // the Mem feed to the membar should be a merge
2149     x = x ->in(TypeFunc::Memory);
2150     if (!x->is_MergeMem())
2151       return NULL;
2152 
2153     MergeMemNode *mm = x->as_MergeMem();
2154 
2155     // the merge should get its Bottom mem feed from the leading membar
2156     x = mm->in(Compile::AliasIdxBot);
2157 
2158     // ensure this is a non control projection
2159     if (!x->is_Proj() || x->is_CFG()) {
2160       return NULL;
2161     }
2162     // if it is fed by a membar that's the one we want
2163     x = x->in(0);
2164 
2165     if (!x->is_MemBar()) {
2166       return NULL;
2167     }
2168 
2169     MemBarNode *leading = x->as_MemBar();
2170     // reject invalid candidates
2171     if (!leading_membar(leading)) {
2172       return NULL;
2173     }
2174 
2175     // ok, we have a leading membar, now for the sanity clauses
2176 
2177     // the leading membar must feed Mem to a releasing store or CAS
2178     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2179     StoreNode *st = NULL;
2180     LoadStoreNode *cas = NULL;
2181     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2182       x = mem->fast_out(i);
2183       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2184         // two stores or CASes is one too many
2185         if (st != NULL || cas != NULL) {
2186           return NULL;
2187         }
2188         st = x->as_Store();
2189       } else if (is_CAS(x->Opcode())) {
2190         if (st != NULL || cas != NULL) {
2191           return NULL;
2192         }
2193         cas = x->as_LoadStore();
2194       }
2195     }
2196 
2197     // we cannot have both a store and a cas
2198     if (st == NULL && cas == NULL) {
2199       // we have neither -- this is not a normal graph
2200       return NULL;
2201     }
2202     if (st == NULL) {
2203       // if we started from a volatile membar and found a CAS then the
2204       // original membar ought to be for a card mark
2205       guarantee((barrier_is_acquire || is_card_mark_membar(barrier)),
2206                 "unexpected volatile barrier (i.e. not card mark) in CAS graph");
2207       // check that the CAS feeds the merge we used to get here via an
2208       // intermediary SCMemProj
2209       Node *scmemproj = NULL;
2210       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2211         x = cas->fast_out(i);
2212         if (x->Opcode() == Op_SCMemProj) {
2213           scmemproj = x;
2214           break;
2215         }
2216       }
2217       if (scmemproj == NULL) {
2218         return NULL;
2219       }
2220       for (DUIterator_Fast imax, i = scmemproj->fast_outs(imax); i < imax; i++) {
2221         x = scmemproj->fast_out(i);
2222         if (x == mm) {
2223           return leading;
2224         }
2225       }
2226     } else {
2227       // we should not have found a store if we started from an acquire
2228       guarantee(!barrier_is_acquire,
2229                 "unexpected trailing acquire barrier in volatile store graph");
2230 
2231       // the store should feed the merge we used to get here
2232       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2233         if (st->fast_out(i) == mm) {
2234           return leading;
2235         }
2236       }
2237     }
2238 
2239     return NULL;
2240   }
2241 
2242   // card_mark_to_trailing
2243   //
2244   // graph traversal helper which detects extra, non-normal Mem feed
2245   // from a card mark volatile membar to a trailing membar i.e. it
2246   // ensures that one of the following three GC post-write Mem flow
2247   // subgraphs is present.
2248   //
2249   // 1)
2250   //     . . .
2251   //       |
2252   //   MemBarVolatile (card mark)
2253   //      |          |
2254   //      |        StoreCM
2255   //      |          |
2256   //      |        . . .
2257   //  Bot |  /
2258   //   MergeMem
2259   //      |
2260   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2261   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2262   //                                 
2263   //
2264   // 2)
2265   //   MemBarRelease/CPUOrder (leading)
2266   //    |
2267   //    |
2268   //    |\       . . .
2269   //    | \        |
2270   //    |  \  MemBarVolatile (card mark)
2271   //    |   \   |     |
2272   //     \   \  |   StoreCM    . . .
2273   //      \   \ |
2274   //       \  Phi
2275   //        \ /
2276   //        Phi  . . .
2277   //     Bot |   /
2278   //       MergeMem
2279   //         |
2280   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2281   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2282   //
2283   // 3)
2284   //   MemBarRelease/CPUOrder (leading)
2285   //    |
2286   //    |\
2287   //    | \
2288   //    |  \      . . .
2289   //    |   \       |
2290   //    |\   \  MemBarVolatile (card mark)
2291   //    | \   \   |     |
2292   //    |  \   \  |   StoreCM    . . .
2293   //    |   \   \ |
2294   //     \   \  Phi
2295   //      \   \ /
2296   //       \  Phi
2297   //        \ /
2298   //        Phi  . . .
2299   //     Bot |   /
2300   //       MergeMem
2301   //         |
2302   //         |
2303   //   {MemBarCPUOrder}            OR  MemBarCPUOrder
2304   //    MemBarVolatile {trailing}      MemBarAcquire {trailing}
2305   //
2306   // 4)
2307   //   MemBarRelease/CPUOrder (leading)
2308   //    |
2309   //    |\
2310   //    | \
2311   //    |  \
2312   //    |   \
2313   //    |\   \
2314   //    | \   \
2315   //    |  \   \        . . .
2316   //    |   \   \         |
2317   //    |\   \   \   MemBarVolatile (card mark)
2318   //    | \   \   \   /   |
2319   //    |  \   \   \ /  StoreCM    . . .
2320   //    |   \   \  Phi
2321   //     \   \   \ /
2322   //      \   \  Phi
2323   //       \   \ /
2324   //        \  Phi
2325   //         \ /
2326   //         Phi  . . .
2327   //      Bot |   /
2328   //       MergeMem
2329   //          |
2330   //          |
2331   //    MemBarCPUOrder
2332   //    MemBarAcquire {trailing}
2333   //
2334   // configuration 1 is only valid if UseConcMarkSweepGC &&
2335   // UseCondCardMark
2336   //
2337   // configuration 2, is only valid if UseConcMarkSweepGC &&
2338   // UseCondCardMark or if UseG1GC
2339   //
2340   // configurations 3 and 4 are only valid if UseG1GC.
2341   //
2342   // if a valid configuration is present returns the trailing membar
2343   // otherwise NULL.
2344   //
2345   // n.b. the supplied membar is expected to be a card mark
2346   // MemBarVolatile i.e. the caller must ensure the input node has the
2347   // correct operand and feeds Mem to a StoreCM node
2348 
2349   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2350   {
2351     // input must be a card mark volatile membar
2352     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2353 
2354     Node *feed = barrier->proj_out(TypeFunc::Memory);
2355     Node *x;
2356     MergeMemNode *mm = NULL;
2357 
2358     const int MAX_PHIS = max_phis(); // max phis we will search through
2359     int phicount = 0;                // current search count
2360 
2361     bool retry_feed = true;
2362     while (retry_feed) {
2363       // see if we have a direct MergeMem feed
2364       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2365         x = feed->fast_out(i);
2366         // the correct Phi will be merging a Bot memory slice
2367         if (x->is_MergeMem()) {
2368           mm = x->as_MergeMem();
2369           break;
2370         }
2371       }
2372       if (mm) {
2373         retry_feed = false;
2374       } else if (phicount++ < MAX_PHIS) {
2375         // the barrier may feed indirectly via one or two Phi nodes
2376         PhiNode *phi = NULL;
2377         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2378           x = feed->fast_out(i);
2379           // the correct Phi will be merging a Bot memory slice
2380           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2381             phi = x->as_Phi();
2382             break;
2383           }
2384         }
2385         if (!phi) {
2386           return NULL;
2387         }
2388         // look for another merge below this phi
2389         feed = phi;
2390       } else {
2391         // couldn't find a merge
2392         return NULL;
2393       }
2394     }
2395 
2396     // sanity check this feed turns up as the expected slice
2397     guarantee(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2398 
2399     MemBarNode *trailing = NULL;
2400     // be sure we have a trailing membar fed by the merge
2401     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2402       x = mm->fast_out(i);
2403       if (x->is_MemBar()) {
2404         // if this is an intervening cpu order membar skip to the
2405         // following membar
2406         if (x->Opcode() == Op_MemBarCPUOrder) {
2407           MemBarNode *y =  x->as_MemBar();
2408           y = child_membar(y);
2409           if (y != NULL) {
2410             x = y;
2411           }
2412         }
2413         if (x->Opcode() == Op_MemBarVolatile ||
2414             x->Opcode() == Op_MemBarAcquire) {
2415           trailing = x->as_MemBar();
2416         }
2417         break;
2418       }
2419     }
2420 
2421     return trailing;
2422   }
2423 
2424   // trailing_to_card_mark
2425   //
2426   // graph traversal helper which detects extra, non-normal Mem feed
2427   // from a trailing volatile membar to a preceding card mark volatile
2428   // membar i.e. it identifies whether one of the three possible extra
2429   // GC post-write Mem flow subgraphs is present
2430   //
2431   // this predicate checks for the same flow as the previous predicate
2432   // but starting from the bottom rather than the top.
2433   //
2434   // if the configuration is present returns the card mark membar
2435   // otherwise NULL
2436   //
2437   // n.b. the supplied membar is expected to be a trailing
2438   // MemBarVolatile or MemBarAcquire i.e. the caller must ensure the
2439   // input node has the correct opcode
2440 
2441   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2442   {
2443     assert(trailing->Opcode() == Op_MemBarVolatile ||
2444            trailing->Opcode() == Op_MemBarAcquire,
2445            "expecting a volatile or acquire membar");
2446     assert(!is_card_mark_membar(trailing),
2447            "not expecting a card mark membar");
2448 
2449     Node *x = (Node *)trailing;
2450 
2451     // look for a preceding cpu order membar
2452     MemBarNode *y = parent_membar(x->as_MemBar());
2453     if (y != NULL) {
2454       // make sure it is a cpu order membar
2455       if (y->Opcode() != Op_MemBarCPUOrder) {
2456         // this is nto the graph we were looking for
2457         return NULL;
2458       }
2459       // start the search from here
2460       x = y;
2461     }
2462 
2463     // the Mem feed to the membar should be a merge
2464     x = x->in(TypeFunc::Memory);
2465     if (!x->is_MergeMem()) {
2466       return NULL;
2467     }
2468 
2469     MergeMemNode *mm = x->as_MergeMem();
2470 
2471     x = mm->in(Compile::AliasIdxBot);
2472     // with G1 we may possibly see a Phi or two before we see a Memory
2473     // Proj from the card mark membar
2474 
2475     const int MAX_PHIS = max_phis(); // max phis we will search through
2476     int phicount = 0;                    // current search count
2477 
2478     bool retry_feed = !x->is_Proj();
2479 
2480     while (retry_feed) {
2481       if (x->is_Phi() && phicount++ < MAX_PHIS) {
2482         PhiNode *phi = x->as_Phi();
2483         ProjNode *proj = NULL;
2484         PhiNode *nextphi = NULL;
2485         bool found_leading = false;
2486         for (uint i = 1; i < phi->req(); i++) {
2487           x = phi->in(i);
2488           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2489             nextphi = x->as_Phi();
2490           } else if (x->is_Proj()) {
2491             int opcode = x->in(0)->Opcode();
2492             if (opcode == Op_MemBarVolatile) {
2493               proj = x->as_Proj();
2494             } else if (opcode == Op_MemBarRelease ||
2495                        opcode == Op_MemBarCPUOrder) {
2496               // probably a leading membar
2497               found_leading = true;
2498             }
2499           }
2500         }
2501         // if we found a correct looking proj then retry from there
2502         // otherwise we must see a leading and a phi or this the
2503         // wrong config
2504         if (proj != NULL) {
2505           x = proj;
2506           retry_feed = false;
2507         } else if (found_leading && nextphi != NULL) {
2508           // retry from this phi to check phi2
2509           x = nextphi;
2510         } else {
2511           // not what we were looking for
2512           return NULL;
2513         }
2514       } else {
2515         return NULL;
2516       }
2517     }
2518     // the proj has to come from the card mark membar
2519     x = x->in(0);
2520     if (!x->is_MemBar()) {
2521       return NULL;
2522     }
2523 
2524     MemBarNode *card_mark_membar = x->as_MemBar();
2525 
2526     if (!is_card_mark_membar(card_mark_membar)) {
2527       return NULL;
2528     }
2529 
2530     return card_mark_membar;
2531   }
2532 
2533   // trailing_to_leading
2534   //
2535   // graph traversal helper which checks the Mem flow up the graph
2536   // from a (non-card mark) trailing membar attempting to locate and
2537   // return an associated leading membar. it first looks for a
2538   // subgraph in the normal configuration (relying on helper
2539   // normal_to_leading). failing that it then looks for one of the
2540   // possible post-write card mark subgraphs linking the trailing node
2541   // to a the card mark membar (relying on helper
2542   // trailing_to_card_mark), and then checks that the card mark membar
2543   // is fed by a leading membar (once again relying on auxiliary
2544   // predicate normal_to_leading).
2545   //
2546   // if the configuration is valid returns the cpuorder member for
2547   // preference or when absent the release membar otherwise NULL.
2548   //
2549   // n.b. the input membar is expected to be either a volatile or
2550   // acquire membar but in the former case must *not* be a card mark
2551   // membar.
2552 
2553   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2554   {
2555     assert((trailing->Opcode() == Op_MemBarAcquire ||
2556             trailing->Opcode() == Op_MemBarVolatile),
2557            "expecting an acquire or volatile membar");
2558     assert((trailing->Opcode() != Op_MemBarVolatile ||
2559             !is_card_mark_membar(trailing)),
2560            "not expecting a card mark membar");
2561 
2562     MemBarNode *leading = normal_to_leading(trailing);
2563 
2564     if (leading) {
2565       return leading;
2566     }
2567 
2568     // there is no normal path from trailing to leading membar. see if
2569     // we can arrive via a card mark membar
2570 
2571     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2572 
2573     if (!card_mark_membar) {
2574       return NULL;
2575     }
2576 
2577     return normal_to_leading(card_mark_membar);
2578   }
2579 
2580   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2581 
2582 bool unnecessary_acquire(const Node *barrier)
2583 {
2584   assert(barrier->is_MemBar(), "expecting a membar");
2585 
2586   if (UseBarriersForVolatile) {
2587     // we need to plant a dmb
2588     return false;
2589   }
2590 
2591   // a volatile read derived from bytecode (or also from an inlined
2592   // SHA field read via LibraryCallKit::load_field_from_object)
2593   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2594   // with a bogus read dependency on it's preceding load. so in those
2595   // cases we will find the load node at the PARMS offset of the
2596   // acquire membar.  n.b. there may be an intervening DecodeN node.
2597 
2598   Node *x = barrier->lookup(TypeFunc::Parms);
2599   if (x) {
2600     // we are starting from an acquire and it has a fake dependency
2601     //
2602     // need to check for
2603     //
2604     //   LoadX[mo_acquire]
2605     //   {  |1   }
2606     //   {DecodeN}
2607     //      |Parms
2608     //   MemBarAcquire*
2609     //
2610     // where * tags node we were passed
2611     // and |k means input k
2612     if (x->is_DecodeNarrowPtr()) {
2613       x = x->in(1);
2614     }
2615 
2616     return (x->is_Load() && x->as_Load()->is_acquire());
2617   }
2618 
2619   // other option for unnecessary membar is that it is a trailing node
2620   // belonging to a CAS
2621 
2622   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2623 
2624   return leading != NULL;
2625 }
2626 
2627 bool needs_acquiring_load(const Node *n)
2628 {
2629   assert(n->is_Load(), "expecting a load");
2630   if (UseBarriersForVolatile) {
2631     // we use a normal load and a dmb
2632     return false;
2633   }
2634 
2635   LoadNode *ld = n->as_Load();
2636 
2637   if (!ld->is_acquire()) {
2638     return false;
2639   }
2640 
2641   // check if this load is feeding an acquire membar
2642   //
2643   //   LoadX[mo_acquire]
2644   //   {  |1   }
2645   //   {DecodeN}
2646   //      |Parms
2647   //   MemBarAcquire*
2648   //
2649   // where * tags node we were passed
2650   // and |k means input k
2651 
2652   Node *start = ld;
2653   Node *mbacq = NULL;
2654 
2655   // if we hit a DecodeNarrowPtr we reset the start node and restart
2656   // the search through the outputs
2657  restart:
2658 
2659   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2660     Node *x = start->fast_out(i);
2661     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2662       mbacq = x;
2663     } else if (!mbacq &&
2664                (x->is_DecodeNarrowPtr() ||
2665                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2666       start = x;
2667       goto restart;
2668     }
2669   }
2670 
2671   if (mbacq) {
2672     return true;
2673   }
2674 
2675   return false;
2676 }
2677 
2678 bool unnecessary_release(const Node *n)
2679 {
2680   assert((n->is_MemBar() &&
2681           n->Opcode() == Op_MemBarRelease),
2682          "expecting a release membar");
2683 
2684   if (UseBarriersForVolatile) {
2685     // we need to plant a dmb
2686     return false;
2687   }
2688 
2689   // if there is a dependent CPUOrder barrier then use that as the
2690   // leading
2691 
2692   MemBarNode *barrier = n->as_MemBar();
2693   // check for an intervening cpuorder membar
2694   MemBarNode *b = child_membar(barrier);
2695   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2696     // ok, so start the check from the dependent cpuorder barrier
2697     barrier = b;
2698   }
2699 
2700   // must start with a normal feed
2701   MemBarNode *child_barrier = leading_to_normal(barrier);
2702 
2703   if (!child_barrier) {
2704     return false;
2705   }
2706 
2707   if (!is_card_mark_membar(child_barrier)) {
2708     // this is the trailing membar and we are done
2709     return true;
2710   }
2711 
2712   // must be sure this card mark feeds a trailing membar
2713   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2714   return (trailing != NULL);
2715 }
2716 
2717 bool unnecessary_volatile(const Node *n)
2718 {
2719   // assert n->is_MemBar();
2720   if (UseBarriersForVolatile) {
2721     // we need to plant a dmb
2722     return false;
2723   }
2724 
2725   MemBarNode *mbvol = n->as_MemBar();
2726 
2727   // first we check if this is part of a card mark. if so then we have
2728   // to generate a StoreLoad barrier
2729 
2730   if (is_card_mark_membar(mbvol)) {
2731       return false;
2732   }
2733 
2734   // ok, if it's not a card mark then we still need to check if it is
2735   // a trailing membar of a volatile put graph.
2736 
2737   return (trailing_to_leading(mbvol) != NULL);
2738 }
2739 
2740 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2741 
2742 bool needs_releasing_store(const Node *n)
2743 {
2744   // assert n->is_Store();
2745   if (UseBarriersForVolatile) {
2746     // we use a normal store and dmb combination
2747     return false;
2748   }
2749 
2750   StoreNode *st = n->as_Store();
2751 
2752   // the store must be marked as releasing
2753   if (!st->is_release()) {
2754     return false;
2755   }
2756 
2757   // the store must be fed by a membar
2758 
2759   Node *x = st->lookup(StoreNode::Memory);
2760 
2761   if (! x || !x->is_Proj()) {
2762     return false;
2763   }
2764 
2765   ProjNode *proj = x->as_Proj();
2766 
2767   x = proj->lookup(0);
2768 
2769   if (!x || !x->is_MemBar()) {
2770     return false;
2771   }
2772 
2773   MemBarNode *barrier = x->as_MemBar();
2774 
2775   // if the barrier is a release membar or a cpuorder mmebar fed by a
2776   // release membar then we need to check whether that forms part of a
2777   // volatile put graph.
2778 
2779   // reject invalid candidates
2780   if (!leading_membar(barrier)) {
2781     return false;
2782   }
2783 
2784   // does this lead a normal subgraph?
2785   MemBarNode *mbvol = leading_to_normal(barrier);
2786 
2787   if (!mbvol) {
2788     return false;
2789   }
2790 
2791   // all done unless this is a card mark
2792   if (!is_card_mark_membar(mbvol)) {
2793     return true;
2794   }
2795 
2796   // we found a card mark -- just make sure we have a trailing barrier
2797 
2798   return (card_mark_to_trailing(mbvol) != NULL);
2799 }
2800 
2801 // predicate controlling translation of CAS
2802 //
2803 // returns true if CAS needs to use an acquiring load otherwise false
2804 
2805 bool needs_acquiring_load_exclusive(const Node *n)
2806 {
2807   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2808   if (UseBarriersForVolatile) {
2809     return false;
2810   }
2811 
2812   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2813 #ifdef ASSERT
2814   LoadStoreNode *st = n->as_LoadStore();
2815 
2816   // the store must be fed by a membar
2817 
2818   Node *x = st->lookup(StoreNode::Memory);
2819 
2820   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2821 
2822   ProjNode *proj = x->as_Proj();
2823 
2824   x = proj->lookup(0);
2825 
2826   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2827 
2828   MemBarNode *barrier = x->as_MemBar();
2829 
2830   // the barrier must be a cpuorder mmebar fed by a release membar
2831 
2832   guarantee(barrier->Opcode() == Op_MemBarCPUOrder,
2833             "CAS not fed by cpuorder membar!");
2834 
2835   MemBarNode *b = parent_membar(barrier);
2836   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2837           "CAS not fed by cpuorder+release membar pair!");
2838 
2839   // does this lead a normal subgraph?
2840   MemBarNode *mbar = leading_to_normal(barrier);
2841 
2842   guarantee(mbar != NULL, "CAS not embedded in normal graph!");
2843 
2844   // if this is a card mark membar check we have a trailing acquire
2845 
2846   if (is_card_mark_membar(mbar)) {
2847     mbar = card_mark_to_trailing(mbar);
2848   }
2849 
2850   guarantee(mbar != NULL, "card mark membar for CAS not embedded in normal graph!");
2851 
2852   guarantee(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2853 #endif // ASSERT
2854   // so we can just return true here
2855   return true;
2856 }
2857 
2858 // predicate controlling translation of StoreCM
2859 //
2860 // returns true if a StoreStore must precede the card write otherwise
2861 // false
2862 
2863 bool unnecessary_storestore(const Node *storecm)
2864 {
2865   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2866 
2867   // we need to generate a dmb ishst between an object put and the
2868   // associated card mark when we are using CMS without conditional
2869   // card marking
2870 
2871   if (UseConcMarkSweepGC && !UseCondCardMark) {
2872     return false;
2873   }
2874 
2875   // a storestore is unnecesary in all other cases
2876 
2877   return true;
2878 }
2879 
2880 
2881 #define __ _masm.
2882 
2883 // advance declarations for helper functions to convert register
2884 // indices to register objects
2885 
2886 // the ad file has to provide implementations of certain methods
2887 // expected by the generic code
2888 //
2889 // REQUIRED FUNCTIONALITY
2890 
2891 //=============================================================================
2892 
2893 // !!!!! Special hack to get all types of calls to specify the byte offset
2894 //       from the start of the call to the point where the return address
2895 //       will point.
2896 
2897 int MachCallStaticJavaNode::ret_addr_offset()
2898 {
2899   // call should be a simple bl
2900   int off = 4;
2901   return off;
2902 }
2903 
2904 int MachCallDynamicJavaNode::ret_addr_offset()
2905 {
2906   return 16; // movz, movk, movk, bl
2907 }
2908 
2909 int MachCallRuntimeNode::ret_addr_offset() {
2910   // for generated stubs the call will be
2911   //   far_call(addr)
2912   // for real runtime callouts it will be six instructions
2913   // see aarch64_enc_java_to_runtime
2914   //   adr(rscratch2, retaddr)
2915   //   lea(rscratch1, RuntimeAddress(addr)
2916   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2917   //   blrt rscratch1
2918   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2919   if (cb) {
2920     return MacroAssembler::far_branch_size();
2921   } else {
2922     return 6 * NativeInstruction::instruction_size;
2923   }
2924 }
2925 
2926 // Indicate if the safepoint node needs the polling page as an input
2927 
2928 // the shared code plants the oop data at the start of the generated
2929 // code for the safepoint node and that needs ot be at the load
2930 // instruction itself. so we cannot plant a mov of the safepoint poll
2931 // address followed by a load. setting this to true means the mov is
2932 // scheduled as a prior instruction. that's better for scheduling
2933 // anyway.
2934 
2935 bool SafePointNode::needs_polling_address_input()
2936 {
2937   return true;
2938 }
2939 
2940 //=============================================================================
2941 
2942 #ifndef PRODUCT
2943 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2944   st->print("BREAKPOINT");
2945 }
2946 #endif
2947 
2948 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2949   MacroAssembler _masm(&cbuf);
2950   __ brk(0);
2951 }
2952 
2953 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2954   return MachNode::size(ra_);
2955 }
2956 
2957 //=============================================================================
2958 
2959 #ifndef PRODUCT
2960   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2961     st->print("nop \t# %d bytes pad for loops and calls", _count);
2962   }
2963 #endif
2964 
2965   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2966     MacroAssembler _masm(&cbuf);
2967     for (int i = 0; i < _count; i++) {
2968       __ nop();
2969     }
2970   }
2971 
2972   uint MachNopNode::size(PhaseRegAlloc*) const {
2973     return _count * NativeInstruction::instruction_size;
2974   }
2975 
2976 //=============================================================================
2977 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2978 
2979 int Compile::ConstantTable::calculate_table_base_offset() const {
2980   return 0;  // absolute addressing, no offset
2981 }
2982 
2983 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2984 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2985   ShouldNotReachHere();
2986 }
2987 
2988 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2989   // Empty encoding
2990 }
2991 
2992 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2993   return 0;
2994 }
2995 
2996 #ifndef PRODUCT
2997 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2998   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2999 }
3000 #endif
3001 
3002 #ifndef PRODUCT
3003 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3004   Compile* C = ra_->C;
3005 
3006   int framesize = C->frame_slots() << LogBytesPerInt;
3007 
3008   if (C->need_stack_bang(framesize))
3009     st->print("# stack bang size=%d\n\t", framesize);
3010 
3011   if (framesize < ((1 << 9) + 2 * wordSize)) {
3012     st->print("sub  sp, sp, #%d\n\t", framesize);
3013     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3014     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3015   } else {
3016     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3017     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3018     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3019     st->print("sub  sp, sp, rscratch1");
3020   }
3021 }
3022 #endif
3023 
3024 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3025   Compile* C = ra_->C;
3026   MacroAssembler _masm(&cbuf);
3027 
3028   // n.b. frame size includes space for return pc and rfp
3029   const long framesize = C->frame_size_in_bytes();
3030   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3031 
3032   // insert a nop at the start of the prolog so we can patch in a
3033   // branch if we need to invalidate the method later
3034   __ nop();
3035 
3036   int bangsize = C->bang_size_in_bytes();
3037   if (C->need_stack_bang(bangsize) && UseStackBanging)
3038     __ generate_stack_overflow_check(bangsize);
3039 
3040   __ build_frame(framesize);
3041 
3042   if (NotifySimulator) {
3043     __ notify(Assembler::method_entry);
3044   }
3045 
3046   if (VerifyStackAtCalls) {
3047     Unimplemented();
3048   }
3049 
3050   C->set_frame_complete(cbuf.insts_size());
3051 
3052   if (C->has_mach_constant_base_node()) {
3053     // NOTE: We set the table base offset here because users might be
3054     // emitted before MachConstantBaseNode.
3055     Compile::ConstantTable& constant_table = C->constant_table();
3056     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3057   }
3058 }
3059 
3060 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3061 {
3062   return MachNode::size(ra_); // too many variables; just compute it
3063                               // the hard way
3064 }
3065 
3066 int MachPrologNode::reloc() const
3067 {
3068   return 0;
3069 }
3070 
3071 //=============================================================================
3072 
3073 #ifndef PRODUCT
3074 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3075   Compile* C = ra_->C;
3076   int framesize = C->frame_slots() << LogBytesPerInt;
3077 
3078   st->print("# pop frame %d\n\t",framesize);
3079 
3080   if (framesize == 0) {
3081     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3082   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3083     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3084     st->print("add  sp, sp, #%d\n\t", framesize);
3085   } else {
3086     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3087     st->print("add  sp, sp, rscratch1\n\t");
3088     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3089   }
3090 
3091   if (do_polling() && C->is_method_compilation()) {
3092     st->print("# touch polling page\n\t");
3093     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3094     st->print("ldr zr, [rscratch1]");
3095   }
3096 }
3097 #endif
3098 
3099 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3100   Compile* C = ra_->C;
3101   MacroAssembler _masm(&cbuf);
3102   int framesize = C->frame_slots() << LogBytesPerInt;
3103 
3104   __ remove_frame(framesize);
3105 
3106   if (NotifySimulator) {
3107     __ notify(Assembler::method_reentry);
3108   }
3109 
3110   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3111     __ reserved_stack_check();
3112   }
3113 
3114   if (do_polling() && C->is_method_compilation()) {
3115     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3116   }
3117 }
3118 
3119 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3120   // Variable size. Determine dynamically.
3121   return MachNode::size(ra_);
3122 }
3123 
3124 int MachEpilogNode::reloc() const {
3125   // Return number of relocatable values contained in this instruction.
3126   return 1; // 1 for polling page.
3127 }
3128 
3129 const Pipeline * MachEpilogNode::pipeline() const {
3130   return MachNode::pipeline_class();
3131 }
3132 
3133 // This method seems to be obsolete. It is declared in machnode.hpp
3134 // and defined in all *.ad files, but it is never called. Should we
3135 // get rid of it?
3136 int MachEpilogNode::safepoint_offset() const {
3137   assert(do_polling(), "no return for this epilog node");
3138   return 4;
3139 }
3140 
3141 //=============================================================================
3142 
3143 // Figure out which register class each belongs in: rc_int, rc_float or
3144 // rc_stack.
3145 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3146 
3147 static enum RC rc_class(OptoReg::Name reg) {
3148 
3149   if (reg == OptoReg::Bad) {
3150     return rc_bad;
3151   }
3152 
3153   // we have 30 int registers * 2 halves
3154   // (rscratch1 and rscratch2 are omitted)
3155 
3156   if (reg < 60) {
3157     return rc_int;
3158   }
3159 
3160   // we have 32 float register * 2 halves
3161   if (reg < 60 + 128) {
3162     return rc_float;
3163   }
3164 
3165   // Between float regs & stack is the flags regs.
3166   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3167 
3168   return rc_stack;
3169 }
3170 
3171 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3172   Compile* C = ra_->C;
3173 
3174   // Get registers to move.
3175   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3176   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3177   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3178   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3179 
3180   enum RC src_hi_rc = rc_class(src_hi);
3181   enum RC src_lo_rc = rc_class(src_lo);
3182   enum RC dst_hi_rc = rc_class(dst_hi);
3183   enum RC dst_lo_rc = rc_class(dst_lo);
3184 
3185   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3186 
3187   if (src_hi != OptoReg::Bad) {
3188     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3189            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3190            "expected aligned-adjacent pairs");
3191   }
3192 
3193   if (src_lo == dst_lo && src_hi == dst_hi) {
3194     return 0;            // Self copy, no move.
3195   }
3196 
3197   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3198               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3199   int src_offset = ra_->reg2offset(src_lo);
3200   int dst_offset = ra_->reg2offset(dst_lo);
3201 
3202   if (bottom_type()->isa_vect() != NULL) {
3203     uint ireg = ideal_reg();
3204     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3205     if (cbuf) {
3206       MacroAssembler _masm(cbuf);
3207       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3208       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3209         // stack->stack
3210         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3211         if (ireg == Op_VecD) {
3212           __ unspill(rscratch1, true, src_offset);
3213           __ spill(rscratch1, true, dst_offset);
3214         } else {
3215           __ spill_copy128(src_offset, dst_offset);
3216         }
3217       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3218         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3219                ireg == Op_VecD ? __ T8B : __ T16B,
3220                as_FloatRegister(Matcher::_regEncode[src_lo]));
3221       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3222         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3223                        ireg == Op_VecD ? __ D : __ Q,
3224                        ra_->reg2offset(dst_lo));
3225       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3226         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3227                        ireg == Op_VecD ? __ D : __ Q,
3228                        ra_->reg2offset(src_lo));
3229       } else {
3230         ShouldNotReachHere();
3231       }
3232     }
3233   } else if (cbuf) {
3234     MacroAssembler _masm(cbuf);
3235     switch (src_lo_rc) {
3236     case rc_int:
3237       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3238         if (is64) {
3239             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3240                    as_Register(Matcher::_regEncode[src_lo]));
3241         } else {
3242             MacroAssembler _masm(cbuf);
3243             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3244                     as_Register(Matcher::_regEncode[src_lo]));
3245         }
3246       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3247         if (is64) {
3248             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3249                      as_Register(Matcher::_regEncode[src_lo]));
3250         } else {
3251             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3252                      as_Register(Matcher::_regEncode[src_lo]));
3253         }
3254       } else {                    // gpr --> stack spill
3255         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3256         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3257       }
3258       break;
3259     case rc_float:
3260       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3261         if (is64) {
3262             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3263                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3264         } else {
3265             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3266                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3267         }
3268       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3269           if (cbuf) {
3270             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3271                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3272         } else {
3273             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3274                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3275         }
3276       } else {                    // fpr --> stack spill
3277         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3278         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3279                  is64 ? __ D : __ S, dst_offset);
3280       }
3281       break;
3282     case rc_stack:
3283       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3284         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3285       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3286         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3287                    is64 ? __ D : __ S, src_offset);
3288       } else {                    // stack --> stack copy
3289         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3290         __ unspill(rscratch1, is64, src_offset);
3291         __ spill(rscratch1, is64, dst_offset);
3292       }
3293       break;
3294     default:
3295       assert(false, "bad rc_class for spill");
3296       ShouldNotReachHere();
3297     }
3298   }
3299 
3300   if (st) {
3301     st->print("spill ");
3302     if (src_lo_rc == rc_stack) {
3303       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3304     } else {
3305       st->print("%s -> ", Matcher::regName[src_lo]);
3306     }
3307     if (dst_lo_rc == rc_stack) {
3308       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3309     } else {
3310       st->print("%s", Matcher::regName[dst_lo]);
3311     }
3312     if (bottom_type()->isa_vect() != NULL) {
3313       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3314     } else {
3315       st->print("\t# spill size = %d", is64 ? 64:32);
3316     }
3317   }
3318 
3319   return 0;
3320 
3321 }
3322 
3323 #ifndef PRODUCT
3324 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3325   if (!ra_)
3326     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3327   else
3328     implementation(NULL, ra_, false, st);
3329 }
3330 #endif
3331 
3332 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3333   implementation(&cbuf, ra_, false, NULL);
3334 }
3335 
3336 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3337   return MachNode::size(ra_);
3338 }
3339 
3340 //=============================================================================
3341 
3342 #ifndef PRODUCT
3343 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3344   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3345   int reg = ra_->get_reg_first(this);
3346   st->print("add %s, rsp, #%d]\t# box lock",
3347             Matcher::regName[reg], offset);
3348 }
3349 #endif
3350 
3351 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3352   MacroAssembler _masm(&cbuf);
3353 
3354   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3355   int reg    = ra_->get_encode(this);
3356 
3357   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3358     __ add(as_Register(reg), sp, offset);
3359   } else {
3360     ShouldNotReachHere();
3361   }
3362 }
3363 
3364 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3365   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3366   return 4;
3367 }
3368 
3369 //=============================================================================
3370 
3371 #ifndef PRODUCT
3372 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3373 {
3374   st->print_cr("# MachUEPNode");
3375   if (UseCompressedClassPointers) {
3376     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3377     if (Universe::narrow_klass_shift() != 0) {
3378       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3379     }
3380   } else {
3381    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3382   }
3383   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3384   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3385 }
3386 #endif
3387 
3388 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3389 {
3390   // This is the unverified entry point.
3391   MacroAssembler _masm(&cbuf);
3392 
3393   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3394   Label skip;
3395   // TODO
3396   // can we avoid this skip and still use a reloc?
3397   __ br(Assembler::EQ, skip);
3398   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3399   __ bind(skip);
3400 }
3401 
3402 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3403 {
3404   return MachNode::size(ra_);
3405 }
3406 
3407 // REQUIRED EMIT CODE
3408 
3409 //=============================================================================
3410 
3411 // Emit exception handler code.
3412 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3413 {
3414   // mov rscratch1 #exception_blob_entry_point
3415   // br rscratch1
3416   // Note that the code buffer's insts_mark is always relative to insts.
3417   // That's why we must use the macroassembler to generate a handler.
3418   MacroAssembler _masm(&cbuf);
3419   address base = __ start_a_stub(size_exception_handler());
3420   if (base == NULL) {
3421     ciEnv::current()->record_failure("CodeCache is full");
3422     return 0;  // CodeBuffer::expand failed
3423   }
3424   int offset = __ offset();
3425   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3426   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3427   __ end_a_stub();
3428   return offset;
3429 }
3430 
3431 // Emit deopt handler code.
3432 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3433 {
3434   // Note that the code buffer's insts_mark is always relative to insts.
3435   // That's why we must use the macroassembler to generate a handler.
3436   MacroAssembler _masm(&cbuf);
3437   address base = __ start_a_stub(size_deopt_handler());
3438   if (base == NULL) {
3439     ciEnv::current()->record_failure("CodeCache is full");
3440     return 0;  // CodeBuffer::expand failed
3441   }
3442   int offset = __ offset();
3443 
3444   __ adr(lr, __ pc());
3445   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3446 
3447   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3448   __ end_a_stub();
3449   return offset;
3450 }
3451 
3452 // REQUIRED MATCHER CODE
3453 
3454 //=============================================================================
3455 
3456 const bool Matcher::match_rule_supported(int opcode) {
3457 
3458   switch (opcode) {
3459   default:
3460     break;
3461   }
3462 
3463   if (!has_match_rule(opcode)) {
3464     return false;
3465   }
3466 
3467   return true;  // Per default match rules are supported.
3468 }
3469 
3470 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3471 
3472   // TODO
3473   // identify extra cases that we might want to provide match rules for
3474   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3475   bool ret_value = match_rule_supported(opcode);
3476   // Add rules here.
3477 
3478   return ret_value;  // Per default match rules are supported.
3479 }
3480 
3481 const bool Matcher::has_predicated_vectors(void) {
3482   return false;
3483 }
3484 
3485 const int Matcher::float_pressure(int default_pressure_threshold) {
3486   return default_pressure_threshold;
3487 }
3488 
3489 int Matcher::regnum_to_fpu_offset(int regnum)
3490 {
3491   Unimplemented();
3492   return 0;
3493 }
3494 
3495 // Is this branch offset short enough that a short branch can be used?
3496 //
3497 // NOTE: If the platform does not provide any short branch variants, then
3498 //       this method should return false for offset 0.
3499 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3500   // The passed offset is relative to address of the branch.
3501 
3502   return (-32768 <= offset && offset < 32768);
3503 }
3504 
3505 const bool Matcher::isSimpleConstant64(jlong value) {
3506   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3507   // Probably always true, even if a temp register is required.
3508   return true;
3509 }
3510 
3511 // true just means we have fast l2f conversion
3512 const bool Matcher::convL2FSupported(void) {
3513   return true;
3514 }
3515 
3516 // Vector width in bytes.
3517 const int Matcher::vector_width_in_bytes(BasicType bt) {
3518   int size = MIN2(16,(int)MaxVectorSize);
3519   // Minimum 2 values in vector
3520   if (size < 2*type2aelembytes(bt)) size = 0;
3521   // But never < 4
3522   if (size < 4) size = 0;
3523   return size;
3524 }
3525 
3526 // Limits on vector size (number of elements) loaded into vector.
3527 const int Matcher::max_vector_size(const BasicType bt) {
3528   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3529 }
3530 const int Matcher::min_vector_size(const BasicType bt) {
3531 //  For the moment limit the vector size to 8 bytes
3532     int size = 8 / type2aelembytes(bt);
3533     if (size < 2) size = 2;
3534     return size;
3535 }
3536 
3537 // Vector ideal reg.
3538 const uint Matcher::vector_ideal_reg(int len) {
3539   switch(len) {
3540     case  8: return Op_VecD;
3541     case 16: return Op_VecX;
3542   }
3543   ShouldNotReachHere();
3544   return 0;
3545 }
3546 
3547 const uint Matcher::vector_shift_count_ideal_reg(int size) {
3548   return Op_VecX;
3549 }
3550 
3551 // AES support not yet implemented
3552 const bool Matcher::pass_original_key_for_aes() {
3553   return false;
3554 }
3555 
3556 // x86 supports misaligned vectors store/load.
3557 const bool Matcher::misaligned_vectors_ok() {
3558   return !AlignVector; // can be changed by flag
3559 }
3560 
3561 // false => size gets scaled to BytesPerLong, ok.
3562 const bool Matcher::init_array_count_is_in_bytes = false;
3563 
3564 // Use conditional move (CMOVL)
3565 const int Matcher::long_cmove_cost() {
3566   // long cmoves are no more expensive than int cmoves
3567   return 0;
3568 }
3569 
3570 const int Matcher::float_cmove_cost() {
3571   // float cmoves are no more expensive than int cmoves
3572   return 0;
3573 }
3574 
3575 // Does the CPU require late expand (see block.cpp for description of late expand)?
3576 const bool Matcher::require_postalloc_expand = false;
3577 
3578 // Do we need to mask the count passed to shift instructions or does
3579 // the cpu only look at the lower 5/6 bits anyway?
3580 const bool Matcher::need_masked_shift_count = false;
3581 
3582 // This affects two different things:
3583 //  - how Decode nodes are matched
3584 //  - how ImplicitNullCheck opportunities are recognized
3585 // If true, the matcher will try to remove all Decodes and match them
3586 // (as operands) into nodes. NullChecks are not prepared to deal with
3587 // Decodes by final_graph_reshaping().
3588 // If false, final_graph_reshaping() forces the decode behind the Cmp
3589 // for a NullCheck. The matcher matches the Decode node into a register.
3590 // Implicit_null_check optimization moves the Decode along with the
3591 // memory operation back up before the NullCheck.
3592 bool Matcher::narrow_oop_use_complex_address() {
3593   return Universe::narrow_oop_shift() == 0;
3594 }
3595 
3596 bool Matcher::narrow_klass_use_complex_address() {
3597 // TODO
3598 // decide whether we need to set this to true
3599   return false;
3600 }
3601 
3602 bool Matcher::const_oop_prefer_decode() {
3603   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3604   return Universe::narrow_oop_base() == NULL;
3605 }
3606 
3607 bool Matcher::const_klass_prefer_decode() {
3608   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3609   return Universe::narrow_klass_base() == NULL;
3610 }
3611 
3612 // Is it better to copy float constants, or load them directly from
3613 // memory?  Intel can load a float constant from a direct address,
3614 // requiring no extra registers.  Most RISCs will have to materialize
3615 // an address into a register first, so they would do better to copy
3616 // the constant from stack.
3617 const bool Matcher::rematerialize_float_constants = false;
3618 
3619 // If CPU can load and store mis-aligned doubles directly then no
3620 // fixup is needed.  Else we split the double into 2 integer pieces
3621 // and move it piece-by-piece.  Only happens when passing doubles into
3622 // C code as the Java calling convention forces doubles to be aligned.
3623 const bool Matcher::misaligned_doubles_ok = true;
3624 
3625 // No-op on amd64
3626 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3627   Unimplemented();
3628 }
3629 
3630 // Advertise here if the CPU requires explicit rounding operations to
3631 // implement the UseStrictFP mode.
3632 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3633 
3634 // Are floats converted to double when stored to stack during
3635 // deoptimization?
3636 bool Matcher::float_in_double() { return false; }
3637 
3638 // Do ints take an entire long register or just half?
3639 // The relevant question is how the int is callee-saved:
3640 // the whole long is written but de-opt'ing will have to extract
3641 // the relevant 32 bits.
3642 const bool Matcher::int_in_long = true;
3643 
3644 // Return whether or not this register is ever used as an argument.
3645 // This function is used on startup to build the trampoline stubs in
3646 // generateOptoStub.  Registers not mentioned will be killed by the VM
3647 // call in the trampoline, and arguments in those registers not be
3648 // available to the callee.
3649 bool Matcher::can_be_java_arg(int reg)
3650 {
3651   return
3652     reg ==  R0_num || reg == R0_H_num ||
3653     reg ==  R1_num || reg == R1_H_num ||
3654     reg ==  R2_num || reg == R2_H_num ||
3655     reg ==  R3_num || reg == R3_H_num ||
3656     reg ==  R4_num || reg == R4_H_num ||
3657     reg ==  R5_num || reg == R5_H_num ||
3658     reg ==  R6_num || reg == R6_H_num ||
3659     reg ==  R7_num || reg == R7_H_num ||
3660     reg ==  V0_num || reg == V0_H_num ||
3661     reg ==  V1_num || reg == V1_H_num ||
3662     reg ==  V2_num || reg == V2_H_num ||
3663     reg ==  V3_num || reg == V3_H_num ||
3664     reg ==  V4_num || reg == V4_H_num ||
3665     reg ==  V5_num || reg == V5_H_num ||
3666     reg ==  V6_num || reg == V6_H_num ||
3667     reg ==  V7_num || reg == V7_H_num;
3668 }
3669 
3670 bool Matcher::is_spillable_arg(int reg)
3671 {
3672   return can_be_java_arg(reg);
3673 }
3674 
3675 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3676   return false;
3677 }
3678 
3679 RegMask Matcher::divI_proj_mask() {
3680   ShouldNotReachHere();
3681   return RegMask();
3682 }
3683 
3684 // Register for MODI projection of divmodI.
3685 RegMask Matcher::modI_proj_mask() {
3686   ShouldNotReachHere();
3687   return RegMask();
3688 }
3689 
3690 // Register for DIVL projection of divmodL.
3691 RegMask Matcher::divL_proj_mask() {
3692   ShouldNotReachHere();
3693   return RegMask();
3694 }
3695 
3696 // Register for MODL projection of divmodL.
3697 RegMask Matcher::modL_proj_mask() {
3698   ShouldNotReachHere();
3699   return RegMask();
3700 }
3701 
3702 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3703   return FP_REG_mask();
3704 }
3705 
3706 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3707   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3708     Node* u = addp->fast_out(i);
3709     if (u->is_Mem()) {
3710       int opsize = u->as_Mem()->memory_size();
3711       assert(opsize > 0, "unexpected memory operand size");
3712       if (u->as_Mem()->memory_size() != (1<<shift)) {
3713         return false;
3714       }
3715     }
3716   }
3717   return true;
3718 }
3719 
3720 const bool Matcher::convi2l_type_required = false;
3721 
3722 // Should the Matcher clone shifts on addressing modes, expecting them
3723 // to be subsumed into complex addressing expressions or compute them
3724 // into registers?
3725 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3726   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3727     return true;
3728   }
3729 
3730   Node *off = m->in(AddPNode::Offset);
3731   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3732       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3733       // Are there other uses besides address expressions?
3734       !is_visited(off)) {
3735     address_visited.set(off->_idx); // Flag as address_visited
3736     mstack.push(off->in(2), Visit);
3737     Node *conv = off->in(1);
3738     if (conv->Opcode() == Op_ConvI2L &&
3739         // Are there other uses besides address expressions?
3740         !is_visited(conv)) {
3741       address_visited.set(conv->_idx); // Flag as address_visited
3742       mstack.push(conv->in(1), Pre_Visit);
3743     } else {
3744       mstack.push(conv, Pre_Visit);
3745     }
3746     address_visited.test_set(m->_idx); // Flag as address_visited
3747     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3748     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3749     return true;
3750   } else if (off->Opcode() == Op_ConvI2L &&
3751              // Are there other uses besides address expressions?
3752              !is_visited(off)) {
3753     address_visited.test_set(m->_idx); // Flag as address_visited
3754     address_visited.set(off->_idx); // Flag as address_visited
3755     mstack.push(off->in(1), Pre_Visit);
3756     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3757     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3758     return true;
3759   }
3760   return false;
3761 }
3762 
3763 void Compile::reshape_address(AddPNode* addp) {
3764 }
3765 
3766 // helper for encoding java_to_runtime calls on sim
3767 //
3768 // this is needed to compute the extra arguments required when
3769 // planting a call to the simulator blrt instruction. the TypeFunc
3770 // can be queried to identify the counts for integral, and floating
3771 // arguments and the return type
3772 
3773 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3774 {
3775   int gps = 0;
3776   int fps = 0;
3777   const TypeTuple *domain = tf->domain();
3778   int max = domain->cnt();
3779   for (int i = TypeFunc::Parms; i < max; i++) {
3780     const Type *t = domain->field_at(i);
3781     switch(t->basic_type()) {
3782     case T_FLOAT:
3783     case T_DOUBLE:
3784       fps++;
3785     default:
3786       gps++;
3787     }
3788   }
3789   gpcnt = gps;
3790   fpcnt = fps;
3791   BasicType rt = tf->return_type();
3792   switch (rt) {
3793   case T_VOID:
3794     rtype = MacroAssembler::ret_type_void;
3795     break;
3796   default:
3797     rtype = MacroAssembler::ret_type_integral;
3798     break;
3799   case T_FLOAT:
3800     rtype = MacroAssembler::ret_type_float;
3801     break;
3802   case T_DOUBLE:
3803     rtype = MacroAssembler::ret_type_double;
3804     break;
3805   }
3806 }
3807 
3808 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3809   MacroAssembler _masm(&cbuf);                                          \
3810   {                                                                     \
3811     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3812     guarantee(DISP == 0, "mode not permitted for volatile");            \
3813     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3814     __ INSN(REG, as_Register(BASE));                                    \
3815   }
3816 
3817 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3818 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3819 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3820                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3821 
3822   // Used for all non-volatile memory accesses.  The use of
3823   // $mem->opcode() to discover whether this pattern uses sign-extended
3824   // offsets is something of a kludge.
3825   static void loadStore(MacroAssembler masm, mem_insn insn,
3826                          Register reg, int opcode,
3827                          Register base, int index, int size, int disp)
3828   {
3829     Address::extend scale;
3830 
3831     // Hooboy, this is fugly.  We need a way to communicate to the
3832     // encoder that the index needs to be sign extended, so we have to
3833     // enumerate all the cases.
3834     switch (opcode) {
3835     case INDINDEXSCALEDI2L:
3836     case INDINDEXSCALEDI2LN:
3837     case INDINDEXI2L:
3838     case INDINDEXI2LN:
3839       scale = Address::sxtw(size);
3840       break;
3841     default:
3842       scale = Address::lsl(size);
3843     }
3844 
3845     if (index == -1) {
3846       (masm.*insn)(reg, Address(base, disp));
3847     } else {
3848       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3849       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3850     }
3851   }
3852 
3853   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3854                          FloatRegister reg, int opcode,
3855                          Register base, int index, int size, int disp)
3856   {
3857     Address::extend scale;
3858 
3859     switch (opcode) {
3860     case INDINDEXSCALEDI2L:
3861     case INDINDEXSCALEDI2LN:
3862       scale = Address::sxtw(size);
3863       break;
3864     default:
3865       scale = Address::lsl(size);
3866     }
3867 
3868      if (index == -1) {
3869       (masm.*insn)(reg, Address(base, disp));
3870     } else {
3871       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3872       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3873     }
3874   }
3875 
3876   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3877                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3878                          int opcode, Register base, int index, int size, int disp)
3879   {
3880     if (index == -1) {
3881       (masm.*insn)(reg, T, Address(base, disp));
3882     } else {
3883       assert(disp == 0, "unsupported address mode");
3884       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3885     }
3886   }
3887 
3888 %}
3889 
3890 
3891 
3892 //----------ENCODING BLOCK-----------------------------------------------------
3893 // This block specifies the encoding classes used by the compiler to
3894 // output byte streams.  Encoding classes are parameterized macros
3895 // used by Machine Instruction Nodes in order to generate the bit
3896 // encoding of the instruction.  Operands specify their base encoding
3897 // interface with the interface keyword.  There are currently
3898 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3899 // COND_INTER.  REG_INTER causes an operand to generate a function
3900 // which returns its register number when queried.  CONST_INTER causes
3901 // an operand to generate a function which returns the value of the
3902 // constant when queried.  MEMORY_INTER causes an operand to generate
3903 // four functions which return the Base Register, the Index Register,
3904 // the Scale Value, and the Offset Value of the operand when queried.
3905 // COND_INTER causes an operand to generate six functions which return
3906 // the encoding code (ie - encoding bits for the instruction)
3907 // associated with each basic boolean condition for a conditional
3908 // instruction.
3909 //
3910 // Instructions specify two basic values for encoding.  Again, a
3911 // function is available to check if the constant displacement is an
3912 // oop. They use the ins_encode keyword to specify their encoding
3913 // classes (which must be a sequence of enc_class names, and their
3914 // parameters, specified in the encoding block), and they use the
3915 // opcode keyword to specify, in order, their primary, secondary, and
3916 // tertiary opcode.  Only the opcode sections which a particular
3917 // instruction needs for encoding need to be specified.
3918 encode %{
3919   // Build emit functions for each basic byte or larger field in the
3920   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3921   // from C++ code in the enc_class source block.  Emit functions will
3922   // live in the main source block for now.  In future, we can
3923   // generalize this by adding a syntax that specifies the sizes of
3924   // fields in an order, so that the adlc can build the emit functions
3925   // automagically
3926 
3927   // catch all for unimplemented encodings
3928   enc_class enc_unimplemented %{
3929     MacroAssembler _masm(&cbuf);
3930     __ unimplemented("C2 catch all");
3931   %}
3932 
3933   // BEGIN Non-volatile memory access
3934 
3935   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3936     Register dst_reg = as_Register($dst$$reg);
3937     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3938                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3939   %}
3940 
3941   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3942     Register dst_reg = as_Register($dst$$reg);
3943     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3944                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3945   %}
3946 
3947   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3948     Register dst_reg = as_Register($dst$$reg);
3949     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3950                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3951   %}
3952 
3953   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3954     Register dst_reg = as_Register($dst$$reg);
3955     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3956                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3957   %}
3958 
3959   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3960     Register dst_reg = as_Register($dst$$reg);
3961     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3962                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3963   %}
3964 
3965   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3966     Register dst_reg = as_Register($dst$$reg);
3967     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3968                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3969   %}
3970 
3971   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3972     Register dst_reg = as_Register($dst$$reg);
3973     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3974                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3975   %}
3976 
3977   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3978     Register dst_reg = as_Register($dst$$reg);
3979     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3980                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3981   %}
3982 
3983   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3984     Register dst_reg = as_Register($dst$$reg);
3985     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3986                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3987   %}
3988 
3989   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3990     Register dst_reg = as_Register($dst$$reg);
3991     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3992                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3993   %}
3994 
3995   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3996     Register dst_reg = as_Register($dst$$reg);
3997     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3998                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3999   %}
4000 
4001   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4002     Register dst_reg = as_Register($dst$$reg);
4003     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
4004                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4005   %}
4006 
4007   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4008     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4009     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
4010                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4011   %}
4012 
4013   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4014     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4015     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
4016                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4017   %}
4018 
4019   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4020     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4021     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
4022        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4023   %}
4024 
4025   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4026     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4027     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
4028        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4029   %}
4030 
4031   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4032     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4033     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
4034        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4035   %}
4036 
4037   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4038     Register src_reg = as_Register($src$$reg);
4039     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
4040                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4041   %}
4042 
4043   enc_class aarch64_enc_strb0(memory mem) %{
4044     MacroAssembler _masm(&cbuf);
4045     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4046                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4047   %}
4048 
4049   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4050     MacroAssembler _masm(&cbuf);
4051     __ membar(Assembler::StoreStore);
4052     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4053                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4054   %}
4055 
4056   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4057     Register src_reg = as_Register($src$$reg);
4058     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4059                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4060   %}
4061 
4062   enc_class aarch64_enc_strh0(memory mem) %{
4063     MacroAssembler _masm(&cbuf);
4064     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4065                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4066   %}
4067 
4068   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4069     Register src_reg = as_Register($src$$reg);
4070     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4071                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4072   %}
4073 
4074   enc_class aarch64_enc_strw0(memory mem) %{
4075     MacroAssembler _masm(&cbuf);
4076     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4077                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4078   %}
4079 
4080   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4081     Register src_reg = as_Register($src$$reg);
4082     // we sometimes get asked to store the stack pointer into the
4083     // current thread -- we cannot do that directly on AArch64
4084     if (src_reg == r31_sp) {
4085       MacroAssembler _masm(&cbuf);
4086       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4087       __ mov(rscratch2, sp);
4088       src_reg = rscratch2;
4089     }
4090     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4091                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4092   %}
4093 
4094   enc_class aarch64_enc_str0(memory mem) %{
4095     MacroAssembler _masm(&cbuf);
4096     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4097                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4098   %}
4099 
4100   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4101     FloatRegister src_reg = as_FloatRegister($src$$reg);
4102     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4103                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4104   %}
4105 
4106   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4107     FloatRegister src_reg = as_FloatRegister($src$$reg);
4108     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4109                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4110   %}
4111 
4112   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4113     FloatRegister src_reg = as_FloatRegister($src$$reg);
4114     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4115        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4116   %}
4117 
4118   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4119     FloatRegister src_reg = as_FloatRegister($src$$reg);
4120     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4121        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4122   %}
4123 
4124   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4125     FloatRegister src_reg = as_FloatRegister($src$$reg);
4126     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4127        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4128   %}
4129 
4130   // END Non-volatile memory access
4131 
4132   // volatile loads and stores
4133 
4134   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4135     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4136                  rscratch1, stlrb);
4137   %}
4138 
4139   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4140     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4141                  rscratch1, stlrh);
4142   %}
4143 
4144   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4145     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4146                  rscratch1, stlrw);
4147   %}
4148 
4149 
4150   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4151     Register dst_reg = as_Register($dst$$reg);
4152     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4153              rscratch1, ldarb);
4154     __ sxtbw(dst_reg, dst_reg);
4155   %}
4156 
4157   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4158     Register dst_reg = as_Register($dst$$reg);
4159     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4160              rscratch1, ldarb);
4161     __ sxtb(dst_reg, dst_reg);
4162   %}
4163 
4164   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4165     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4166              rscratch1, ldarb);
4167   %}
4168 
4169   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4170     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4171              rscratch1, ldarb);
4172   %}
4173 
4174   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4175     Register dst_reg = as_Register($dst$$reg);
4176     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4177              rscratch1, ldarh);
4178     __ sxthw(dst_reg, dst_reg);
4179   %}
4180 
4181   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4182     Register dst_reg = as_Register($dst$$reg);
4183     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4184              rscratch1, ldarh);
4185     __ sxth(dst_reg, dst_reg);
4186   %}
4187 
4188   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4189     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4190              rscratch1, ldarh);
4191   %}
4192 
4193   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4194     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4195              rscratch1, ldarh);
4196   %}
4197 
4198   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4199     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4200              rscratch1, ldarw);
4201   %}
4202 
4203   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4204     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4205              rscratch1, ldarw);
4206   %}
4207 
4208   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4209     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4210              rscratch1, ldar);
4211   %}
4212 
4213   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4214     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4215              rscratch1, ldarw);
4216     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4217   %}
4218 
4219   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4220     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4221              rscratch1, ldar);
4222     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4223   %}
4224 
4225   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4226     Register src_reg = as_Register($src$$reg);
4227     // we sometimes get asked to store the stack pointer into the
4228     // current thread -- we cannot do that directly on AArch64
4229     if (src_reg == r31_sp) {
4230         MacroAssembler _masm(&cbuf);
4231       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4232       __ mov(rscratch2, sp);
4233       src_reg = rscratch2;
4234     }
4235     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4236                  rscratch1, stlr);
4237   %}
4238 
4239   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4240     {
4241       MacroAssembler _masm(&cbuf);
4242       FloatRegister src_reg = as_FloatRegister($src$$reg);
4243       __ fmovs(rscratch2, src_reg);
4244     }
4245     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4246                  rscratch1, stlrw);
4247   %}
4248 
4249   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4250     {
4251       MacroAssembler _masm(&cbuf);
4252       FloatRegister src_reg = as_FloatRegister($src$$reg);
4253       __ fmovd(rscratch2, src_reg);
4254     }
4255     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4256                  rscratch1, stlr);
4257   %}
4258 
4259   // synchronized read/update encodings
4260 
4261   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4262     MacroAssembler _masm(&cbuf);
4263     Register dst_reg = as_Register($dst$$reg);
4264     Register base = as_Register($mem$$base);
4265     int index = $mem$$index;
4266     int scale = $mem$$scale;
4267     int disp = $mem$$disp;
4268     if (index == -1) {
4269        if (disp != 0) {
4270         __ lea(rscratch1, Address(base, disp));
4271         __ ldaxr(dst_reg, rscratch1);
4272       } else {
4273         // TODO
4274         // should we ever get anything other than this case?
4275         __ ldaxr(dst_reg, base);
4276       }
4277     } else {
4278       Register index_reg = as_Register(index);
4279       if (disp == 0) {
4280         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4281         __ ldaxr(dst_reg, rscratch1);
4282       } else {
4283         __ lea(rscratch1, Address(base, disp));
4284         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4285         __ ldaxr(dst_reg, rscratch1);
4286       }
4287     }
4288   %}
4289 
4290   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4291     MacroAssembler _masm(&cbuf);
4292     Register src_reg = as_Register($src$$reg);
4293     Register base = as_Register($mem$$base);
4294     int index = $mem$$index;
4295     int scale = $mem$$scale;
4296     int disp = $mem$$disp;
4297     if (index == -1) {
4298        if (disp != 0) {
4299         __ lea(rscratch2, Address(base, disp));
4300         __ stlxr(rscratch1, src_reg, rscratch2);
4301       } else {
4302         // TODO
4303         // should we ever get anything other than this case?
4304         __ stlxr(rscratch1, src_reg, base);
4305       }
4306     } else {
4307       Register index_reg = as_Register(index);
4308       if (disp == 0) {
4309         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4310         __ stlxr(rscratch1, src_reg, rscratch2);
4311       } else {
4312         __ lea(rscratch2, Address(base, disp));
4313         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4314         __ stlxr(rscratch1, src_reg, rscratch2);
4315       }
4316     }
4317     __ cmpw(rscratch1, zr);
4318   %}
4319 
4320   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4321     MacroAssembler _masm(&cbuf);
4322     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4323     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4324                Assembler::xword, /*acquire*/ false, /*release*/ true,
4325                /*weak*/ false, noreg);
4326   %}
4327 
4328   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4329     MacroAssembler _masm(&cbuf);
4330     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4331     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4332                Assembler::word, /*acquire*/ false, /*release*/ true,
4333                /*weak*/ false, noreg);
4334   %}
4335 
4336   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4337     MacroAssembler _masm(&cbuf);
4338     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4339     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4340                Assembler::halfword, /*acquire*/ false, /*release*/ true,
4341                /*weak*/ false, noreg);
4342   %}
4343 
4344   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4345     MacroAssembler _masm(&cbuf);
4346     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4347     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4348                Assembler::byte, /*acquire*/ false, /*release*/ true,
4349                /*weak*/ false, noreg);
4350   %}
4351 
4352 
4353   // The only difference between aarch64_enc_cmpxchg and
4354   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4355   // CompareAndSwap sequence to serve as a barrier on acquiring a
4356   // lock.
4357   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4358     MacroAssembler _masm(&cbuf);
4359     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4360     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4361                Assembler::xword, /*acquire*/ true, /*release*/ true,
4362                /*weak*/ false, noreg);
4363   %}
4364 
4365   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4366     MacroAssembler _masm(&cbuf);
4367     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4368     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4369                Assembler::word, /*acquire*/ true, /*release*/ true,
4370                /*weak*/ false, noreg);
4371   %}
4372 
4373 
4374   // auxiliary used for CompareAndSwapX to set result register
4375   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4376     MacroAssembler _masm(&cbuf);
4377     Register res_reg = as_Register($res$$reg);
4378     __ cset(res_reg, Assembler::EQ);
4379   %}
4380 
4381   // prefetch encodings
4382 
4383   enc_class aarch64_enc_prefetchw(memory mem) %{
4384     MacroAssembler _masm(&cbuf);
4385     Register base = as_Register($mem$$base);
4386     int index = $mem$$index;
4387     int scale = $mem$$scale;
4388     int disp = $mem$$disp;
4389     if (index == -1) {
4390       __ prfm(Address(base, disp), PSTL1KEEP);
4391     } else {
4392       Register index_reg = as_Register(index);
4393       if (disp == 0) {
4394         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4395       } else {
4396         __ lea(rscratch1, Address(base, disp));
4397         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4398       }
4399     }
4400   %}
4401 
4402   /// mov envcodings
4403 
4404   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4405     MacroAssembler _masm(&cbuf);
4406     u_int32_t con = (u_int32_t)$src$$constant;
4407     Register dst_reg = as_Register($dst$$reg);
4408     if (con == 0) {
4409       __ movw(dst_reg, zr);
4410     } else {
4411       __ movw(dst_reg, con);
4412     }
4413   %}
4414 
4415   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4416     MacroAssembler _masm(&cbuf);
4417     Register dst_reg = as_Register($dst$$reg);
4418     u_int64_t con = (u_int64_t)$src$$constant;
4419     if (con == 0) {
4420       __ mov(dst_reg, zr);
4421     } else {
4422       __ mov(dst_reg, con);
4423     }
4424   %}
4425 
4426   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4427     MacroAssembler _masm(&cbuf);
4428     Register dst_reg = as_Register($dst$$reg);
4429     address con = (address)$src$$constant;
4430     if (con == NULL || con == (address)1) {
4431       ShouldNotReachHere();
4432     } else {
4433       relocInfo::relocType rtype = $src->constant_reloc();
4434       if (rtype == relocInfo::oop_type) {
4435         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4436       } else if (rtype == relocInfo::metadata_type) {
4437         __ mov_metadata(dst_reg, (Metadata*)con);
4438       } else {
4439         assert(rtype == relocInfo::none, "unexpected reloc type");
4440         if (con < (address)(uintptr_t)os::vm_page_size()) {
4441           __ mov(dst_reg, con);
4442         } else {
4443           unsigned long offset;
4444           __ adrp(dst_reg, con, offset);
4445           __ add(dst_reg, dst_reg, offset);
4446         }
4447       }
4448     }
4449   %}
4450 
4451   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4452     MacroAssembler _masm(&cbuf);
4453     Register dst_reg = as_Register($dst$$reg);
4454     __ mov(dst_reg, zr);
4455   %}
4456 
4457   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4458     MacroAssembler _masm(&cbuf);
4459     Register dst_reg = as_Register($dst$$reg);
4460     __ mov(dst_reg, (u_int64_t)1);
4461   %}
4462 
4463   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4464     MacroAssembler _masm(&cbuf);
4465     address page = (address)$src$$constant;
4466     Register dst_reg = as_Register($dst$$reg);
4467     unsigned long off;
4468     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4469     assert(off == 0, "assumed offset == 0");
4470   %}
4471 
4472   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4473     MacroAssembler _masm(&cbuf);
4474     __ load_byte_map_base($dst$$Register);
4475   %}
4476 
4477   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4478     MacroAssembler _masm(&cbuf);
4479     Register dst_reg = as_Register($dst$$reg);
4480     address con = (address)$src$$constant;
4481     if (con == NULL) {
4482       ShouldNotReachHere();
4483     } else {
4484       relocInfo::relocType rtype = $src->constant_reloc();
4485       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4486       __ set_narrow_oop(dst_reg, (jobject)con);
4487     }
4488   %}
4489 
4490   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4491     MacroAssembler _masm(&cbuf);
4492     Register dst_reg = as_Register($dst$$reg);
4493     __ mov(dst_reg, zr);
4494   %}
4495 
4496   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4497     MacroAssembler _masm(&cbuf);
4498     Register dst_reg = as_Register($dst$$reg);
4499     address con = (address)$src$$constant;
4500     if (con == NULL) {
4501       ShouldNotReachHere();
4502     } else {
4503       relocInfo::relocType rtype = $src->constant_reloc();
4504       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4505       __ set_narrow_klass(dst_reg, (Klass *)con);
4506     }
4507   %}
4508 
4509   // arithmetic encodings
4510 
4511   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4512     MacroAssembler _masm(&cbuf);
4513     Register dst_reg = as_Register($dst$$reg);
4514     Register src_reg = as_Register($src1$$reg);
4515     int32_t con = (int32_t)$src2$$constant;
4516     // add has primary == 0, subtract has primary == 1
4517     if ($primary) { con = -con; }
4518     if (con < 0) {
4519       __ subw(dst_reg, src_reg, -con);
4520     } else {
4521       __ addw(dst_reg, src_reg, con);
4522     }
4523   %}
4524 
4525   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4526     MacroAssembler _masm(&cbuf);
4527     Register dst_reg = as_Register($dst$$reg);
4528     Register src_reg = as_Register($src1$$reg);
4529     int32_t con = (int32_t)$src2$$constant;
4530     // add has primary == 0, subtract has primary == 1
4531     if ($primary) { con = -con; }
4532     if (con < 0) {
4533       __ sub(dst_reg, src_reg, -con);
4534     } else {
4535       __ add(dst_reg, src_reg, con);
4536     }
4537   %}
4538 
4539   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4540     MacroAssembler _masm(&cbuf);
4541    Register dst_reg = as_Register($dst$$reg);
4542    Register src1_reg = as_Register($src1$$reg);
4543    Register src2_reg = as_Register($src2$$reg);
4544     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4545   %}
4546 
4547   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4548     MacroAssembler _masm(&cbuf);
4549    Register dst_reg = as_Register($dst$$reg);
4550    Register src1_reg = as_Register($src1$$reg);
4551    Register src2_reg = as_Register($src2$$reg);
4552     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4553   %}
4554 
4555   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4556     MacroAssembler _masm(&cbuf);
4557    Register dst_reg = as_Register($dst$$reg);
4558    Register src1_reg = as_Register($src1$$reg);
4559    Register src2_reg = as_Register($src2$$reg);
4560     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4561   %}
4562 
4563   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4564     MacroAssembler _masm(&cbuf);
4565    Register dst_reg = as_Register($dst$$reg);
4566    Register src1_reg = as_Register($src1$$reg);
4567    Register src2_reg = as_Register($src2$$reg);
4568     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4569   %}
4570 
4571   // compare instruction encodings
4572 
4573   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4574     MacroAssembler _masm(&cbuf);
4575     Register reg1 = as_Register($src1$$reg);
4576     Register reg2 = as_Register($src2$$reg);
4577     __ cmpw(reg1, reg2);
4578   %}
4579 
4580   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4581     MacroAssembler _masm(&cbuf);
4582     Register reg = as_Register($src1$$reg);
4583     int32_t val = $src2$$constant;
4584     if (val >= 0) {
4585       __ subsw(zr, reg, val);
4586     } else {
4587       __ addsw(zr, reg, -val);
4588     }
4589   %}
4590 
4591   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4592     MacroAssembler _masm(&cbuf);
4593     Register reg1 = as_Register($src1$$reg);
4594     u_int32_t val = (u_int32_t)$src2$$constant;
4595     __ movw(rscratch1, val);
4596     __ cmpw(reg1, rscratch1);
4597   %}
4598 
4599   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4600     MacroAssembler _masm(&cbuf);
4601     Register reg1 = as_Register($src1$$reg);
4602     Register reg2 = as_Register($src2$$reg);
4603     __ cmp(reg1, reg2);
4604   %}
4605 
4606   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4607     MacroAssembler _masm(&cbuf);
4608     Register reg = as_Register($src1$$reg);
4609     int64_t val = $src2$$constant;
4610     if (val >= 0) {
4611       __ subs(zr, reg, val);
4612     } else if (val != -val) {
4613       __ adds(zr, reg, -val);
4614     } else {
4615     // aargh, Long.MIN_VALUE is a special case
4616       __ orr(rscratch1, zr, (u_int64_t)val);
4617       __ subs(zr, reg, rscratch1);
4618     }
4619   %}
4620 
4621   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4622     MacroAssembler _masm(&cbuf);
4623     Register reg1 = as_Register($src1$$reg);
4624     u_int64_t val = (u_int64_t)$src2$$constant;
4625     __ mov(rscratch1, val);
4626     __ cmp(reg1, rscratch1);
4627   %}
4628 
4629   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4630     MacroAssembler _masm(&cbuf);
4631     Register reg1 = as_Register($src1$$reg);
4632     Register reg2 = as_Register($src2$$reg);
4633     __ cmp(reg1, reg2);
4634   %}
4635 
4636   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4637     MacroAssembler _masm(&cbuf);
4638     Register reg1 = as_Register($src1$$reg);
4639     Register reg2 = as_Register($src2$$reg);
4640     __ cmpw(reg1, reg2);
4641   %}
4642 
4643   enc_class aarch64_enc_testp(iRegP src) %{
4644     MacroAssembler _masm(&cbuf);
4645     Register reg = as_Register($src$$reg);
4646     __ cmp(reg, zr);
4647   %}
4648 
4649   enc_class aarch64_enc_testn(iRegN src) %{
4650     MacroAssembler _masm(&cbuf);
4651     Register reg = as_Register($src$$reg);
4652     __ cmpw(reg, zr);
4653   %}
4654 
4655   enc_class aarch64_enc_b(label lbl) %{
4656     MacroAssembler _masm(&cbuf);
4657     Label *L = $lbl$$label;
4658     __ b(*L);
4659   %}
4660 
4661   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4662     MacroAssembler _masm(&cbuf);
4663     Label *L = $lbl$$label;
4664     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4665   %}
4666 
4667   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4668     MacroAssembler _masm(&cbuf);
4669     Label *L = $lbl$$label;
4670     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4671   %}
4672 
4673   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4674   %{
4675      Register sub_reg = as_Register($sub$$reg);
4676      Register super_reg = as_Register($super$$reg);
4677      Register temp_reg = as_Register($temp$$reg);
4678      Register result_reg = as_Register($result$$reg);
4679 
4680      Label miss;
4681      MacroAssembler _masm(&cbuf);
4682      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4683                                      NULL, &miss,
4684                                      /*set_cond_codes:*/ true);
4685      if ($primary) {
4686        __ mov(result_reg, zr);
4687      }
4688      __ bind(miss);
4689   %}
4690 
4691   enc_class aarch64_enc_java_static_call(method meth) %{
4692     MacroAssembler _masm(&cbuf);
4693 
4694     address addr = (address)$meth$$method;
4695     address call;
4696     if (!_method) {
4697       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4698       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4699     } else {
4700       int method_index = resolved_method_index(cbuf);
4701       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4702                                                   : static_call_Relocation::spec(method_index);
4703       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4704 
4705       // Emit stub for static call
4706       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4707       if (stub == NULL) {
4708         ciEnv::current()->record_failure("CodeCache is full");
4709         return;
4710       }
4711     }
4712     if (call == NULL) {
4713       ciEnv::current()->record_failure("CodeCache is full");
4714       return;
4715     }
4716   %}
4717 
4718   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4719     MacroAssembler _masm(&cbuf);
4720     int method_index = resolved_method_index(cbuf);
4721     address call = __ ic_call((address)$meth$$method, method_index);
4722     if (call == NULL) {
4723       ciEnv::current()->record_failure("CodeCache is full");
4724       return;
4725     }
4726   %}
4727 
4728   enc_class aarch64_enc_call_epilog() %{
4729     MacroAssembler _masm(&cbuf);
4730     if (VerifyStackAtCalls) {
4731       // Check that stack depth is unchanged: find majik cookie on stack
4732       __ call_Unimplemented();
4733     }
4734   %}
4735 
4736   enc_class aarch64_enc_java_to_runtime(method meth) %{
4737     MacroAssembler _masm(&cbuf);
4738 
4739     // some calls to generated routines (arraycopy code) are scheduled
4740     // by C2 as runtime calls. if so we can call them using a br (they
4741     // will be in a reachable segment) otherwise we have to use a blrt
4742     // which loads the absolute address into a register.
4743     address entry = (address)$meth$$method;
4744     CodeBlob *cb = CodeCache::find_blob(entry);
4745     if (cb) {
4746       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4747       if (call == NULL) {
4748         ciEnv::current()->record_failure("CodeCache is full");
4749         return;
4750       }
4751     } else {
4752       int gpcnt;
4753       int fpcnt;
4754       int rtype;
4755       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4756       Label retaddr;
4757       __ adr(rscratch2, retaddr);
4758       __ lea(rscratch1, RuntimeAddress(entry));
4759       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4760       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4761       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4762       __ bind(retaddr);
4763       __ add(sp, sp, 2 * wordSize);
4764     }
4765   %}
4766 
4767   enc_class aarch64_enc_rethrow() %{
4768     MacroAssembler _masm(&cbuf);
4769     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4770   %}
4771 
4772   enc_class aarch64_enc_ret() %{
4773     MacroAssembler _masm(&cbuf);
4774     __ ret(lr);
4775   %}
4776 
4777   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4778     MacroAssembler _masm(&cbuf);
4779     Register target_reg = as_Register($jump_target$$reg);
4780     __ br(target_reg);
4781   %}
4782 
4783   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4784     MacroAssembler _masm(&cbuf);
4785     Register target_reg = as_Register($jump_target$$reg);
4786     // exception oop should be in r0
4787     // ret addr has been popped into lr
4788     // callee expects it in r3
4789     __ mov(r3, lr);
4790     __ br(target_reg);
4791   %}
4792 
4793   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4794     MacroAssembler _masm(&cbuf);
4795     Register oop = as_Register($object$$reg);
4796     Register box = as_Register($box$$reg);
4797     Register disp_hdr = as_Register($tmp$$reg);
4798     Register tmp = as_Register($tmp2$$reg);
4799     Label cont;
4800     Label object_has_monitor;
4801     Label cas_failed;
4802 
4803     assert_different_registers(oop, box, tmp, disp_hdr);
4804 
4805     // Load markOop from object into displaced_header.
4806     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4807 
4808     // Always do locking in runtime.
4809     if (EmitSync & 0x01) {
4810       __ cmp(oop, zr);
4811       return;
4812     }
4813 
4814     if (UseBiasedLocking && !UseOptoBiasInlining) {
4815       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4816     }
4817 
4818     // Handle existing monitor
4819     if ((EmitSync & 0x02) == 0) {
4820       // we can use AArch64's bit test and branch here but
4821       // markoopDesc does not define a bit index just the bit value
4822       // so assert in case the bit pos changes
4823 #     define __monitor_value_log2 1
4824       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4825       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4826 #     undef __monitor_value_log2
4827     }
4828 
4829     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4830     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4831 
4832     // Load Compare Value application register.
4833 
4834     // Initialize the box. (Must happen before we update the object mark!)
4835     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4836 
4837     // Compare object markOop with mark and if equal exchange scratch1
4838     // with object markOop.
4839     if (UseLSE) {
4840       __ mov(tmp, disp_hdr);
4841       __ casal(Assembler::xword, tmp, box, oop);
4842       __ cmp(tmp, disp_hdr);
4843       __ br(Assembler::EQ, cont);
4844     } else {
4845       Label retry_load;
4846       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4847         __ prfm(Address(oop), PSTL1STRM);
4848       __ bind(retry_load);
4849       __ ldaxr(tmp, oop);
4850       __ cmp(tmp, disp_hdr);
4851       __ br(Assembler::NE, cas_failed);
4852       // use stlxr to ensure update is immediately visible
4853       __ stlxr(tmp, box, oop);
4854       __ cbzw(tmp, cont);
4855       __ b(retry_load);
4856     }
4857 
4858     // Formerly:
4859     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4860     //               /*newv=*/box,
4861     //               /*addr=*/oop,
4862     //               /*tmp=*/tmp,
4863     //               cont,
4864     //               /*fail*/NULL);
4865 
4866     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4867 
4868     // If the compare-and-exchange succeeded, then we found an unlocked
4869     // object, will have now locked it will continue at label cont
4870 
4871     __ bind(cas_failed);
4872     // We did not see an unlocked object so try the fast recursive case.
4873 
4874     // Check if the owner is self by comparing the value in the
4875     // markOop of object (disp_hdr) with the stack pointer.
4876     __ mov(rscratch1, sp);
4877     __ sub(disp_hdr, disp_hdr, rscratch1);
4878     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4879     // If condition is true we are cont and hence we can store 0 as the
4880     // displaced header in the box, which indicates that it is a recursive lock.
4881     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4882     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4883 
4884     // Handle existing monitor.
4885     if ((EmitSync & 0x02) == 0) {
4886       __ b(cont);
4887 
4888       __ bind(object_has_monitor);
4889       // The object's monitor m is unlocked iff m->owner == NULL,
4890       // otherwise m->owner may contain a thread or a stack address.
4891       //
4892       // Try to CAS m->owner from NULL to current thread.
4893       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4894       __ mov(disp_hdr, zr);
4895 
4896       if (UseLSE) {
4897         __ mov(rscratch1, disp_hdr);
4898         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4899         __ cmp(rscratch1, disp_hdr);
4900       } else {
4901         Label retry_load, fail;
4902         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4903           __ prfm(Address(tmp), PSTL1STRM);
4904         __ bind(retry_load);
4905         __ ldaxr(rscratch1, tmp);
4906         __ cmp(disp_hdr, rscratch1);
4907         __ br(Assembler::NE, fail);
4908         // use stlxr to ensure update is immediately visible
4909         __ stlxr(rscratch1, rthread, tmp);
4910         __ cbnzw(rscratch1, retry_load);
4911         __ bind(fail);
4912       }
4913 
4914       // Label next;
4915       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4916       //               /*newv=*/rthread,
4917       //               /*addr=*/tmp,
4918       //               /*tmp=*/rscratch1,
4919       //               /*succeed*/next,
4920       //               /*fail*/NULL);
4921       // __ bind(next);
4922 
4923       // store a non-null value into the box.
4924       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4925 
4926       // PPC port checks the following invariants
4927       // #ifdef ASSERT
4928       // bne(flag, cont);
4929       // We have acquired the monitor, check some invariants.
4930       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4931       // Invariant 1: _recursions should be 0.
4932       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4933       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4934       //                        "monitor->_recursions should be 0", -1);
4935       // Invariant 2: OwnerIsThread shouldn't be 0.
4936       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4937       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4938       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4939       // #endif
4940     }
4941 
4942     __ bind(cont);
4943     // flag == EQ indicates success
4944     // flag == NE indicates failure
4945 
4946   %}
4947 
4948   // TODO
4949   // reimplement this with custom cmpxchgptr code
4950   // which avoids some of the unnecessary branching
4951   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4952     MacroAssembler _masm(&cbuf);
4953     Register oop = as_Register($object$$reg);
4954     Register box = as_Register($box$$reg);
4955     Register disp_hdr = as_Register($tmp$$reg);
4956     Register tmp = as_Register($tmp2$$reg);
4957     Label cont;
4958     Label object_has_monitor;
4959     Label cas_failed;
4960 
4961     assert_different_registers(oop, box, tmp, disp_hdr);
4962 
4963     // Always do locking in runtime.
4964     if (EmitSync & 0x01) {
4965       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4966       return;
4967     }
4968 
4969     if (UseBiasedLocking && !UseOptoBiasInlining) {
4970       __ biased_locking_exit(oop, tmp, cont);
4971     }
4972 
4973     // Find the lock address and load the displaced header from the stack.
4974     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4975 
4976     // If the displaced header is 0, we have a recursive unlock.
4977     __ cmp(disp_hdr, zr);
4978     __ br(Assembler::EQ, cont);
4979 
4980 
4981     // Handle existing monitor.
4982     if ((EmitSync & 0x02) == 0) {
4983       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4984       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4985     }
4986 
4987     // Check if it is still a light weight lock, this is is true if we
4988     // see the stack address of the basicLock in the markOop of the
4989     // object.
4990 
4991       if (UseLSE) {
4992         __ mov(tmp, box);
4993         __ casl(Assembler::xword, tmp, disp_hdr, oop);
4994         __ cmp(tmp, box);
4995       } else {
4996         Label retry_load;
4997         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4998           __ prfm(Address(oop), PSTL1STRM);
4999         __ bind(retry_load);
5000         __ ldxr(tmp, oop);
5001         __ cmp(box, tmp);
5002         __ br(Assembler::NE, cas_failed);
5003         // use stlxr to ensure update is immediately visible
5004         __ stlxr(tmp, disp_hdr, oop);
5005         __ cbzw(tmp, cont);
5006         __ b(retry_load);
5007       }
5008 
5009     // __ cmpxchgptr(/*compare_value=*/box,
5010     //               /*exchange_value=*/disp_hdr,
5011     //               /*where=*/oop,
5012     //               /*result=*/tmp,
5013     //               cont,
5014     //               /*cas_failed*/NULL);
5015     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5016 
5017     __ bind(cas_failed);
5018 
5019     // Handle existing monitor.
5020     if ((EmitSync & 0x02) == 0) {
5021       __ b(cont);
5022 
5023       __ bind(object_has_monitor);
5024       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
5025       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5026       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
5027       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
5028       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
5029       __ cmp(rscratch1, zr);
5030       __ br(Assembler::NE, cont);
5031 
5032       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
5033       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
5034       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
5035       __ cmp(rscratch1, zr);
5036       __ cbnz(rscratch1, cont);
5037       // need a release store here
5038       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5039       __ stlr(rscratch1, tmp); // rscratch1 is zero
5040     }
5041 
5042     __ bind(cont);
5043     // flag == EQ indicates success
5044     // flag == NE indicates failure
5045   %}
5046 
5047 %}
5048 
5049 //----------FRAME--------------------------------------------------------------
5050 // Definition of frame structure and management information.
5051 //
5052 //  S T A C K   L A Y O U T    Allocators stack-slot number
5053 //                             |   (to get allocators register number
5054 //  G  Owned by    |        |  v    add OptoReg::stack0())
5055 //  r   CALLER     |        |
5056 //  o     |        +--------+      pad to even-align allocators stack-slot
5057 //  w     V        |  pad0  |        numbers; owned by CALLER
5058 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5059 //  h     ^        |   in   |  5
5060 //        |        |  args  |  4   Holes in incoming args owned by SELF
5061 //  |     |        |        |  3
5062 //  |     |        +--------+
5063 //  V     |        | old out|      Empty on Intel, window on Sparc
5064 //        |    old |preserve|      Must be even aligned.
5065 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5066 //        |        |   in   |  3   area for Intel ret address
5067 //     Owned by    |preserve|      Empty on Sparc.
5068 //       SELF      +--------+
5069 //        |        |  pad2  |  2   pad to align old SP
5070 //        |        +--------+  1
5071 //        |        | locks  |  0
5072 //        |        +--------+----> OptoReg::stack0(), even aligned
5073 //        |        |  pad1  | 11   pad to align new SP
5074 //        |        +--------+
5075 //        |        |        | 10
5076 //        |        | spills |  9   spills
5077 //        V        |        |  8   (pad0 slot for callee)
5078 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5079 //        ^        |  out   |  7
5080 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5081 //     Owned by    +--------+
5082 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5083 //        |    new |preserve|      Must be even-aligned.
5084 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5085 //        |        |        |
5086 //
5087 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5088 //         known from SELF's arguments and the Java calling convention.
5089 //         Region 6-7 is determined per call site.
5090 // Note 2: If the calling convention leaves holes in the incoming argument
5091 //         area, those holes are owned by SELF.  Holes in the outgoing area
5092 //         are owned by the CALLEE.  Holes should not be nessecary in the
5093 //         incoming area, as the Java calling convention is completely under
5094 //         the control of the AD file.  Doubles can be sorted and packed to
5095 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5096 //         varargs C calling conventions.
5097 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5098 //         even aligned with pad0 as needed.
5099 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5100 //           (the latter is true on Intel but is it false on AArch64?)
5101 //         region 6-11 is even aligned; it may be padded out more so that
5102 //         the region from SP to FP meets the minimum stack alignment.
5103 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5104 //         alignment.  Region 11, pad1, may be dynamically extended so that
5105 //         SP meets the minimum alignment.
5106 
5107 frame %{
5108   // What direction does stack grow in (assumed to be same for C & Java)
5109   stack_direction(TOWARDS_LOW);
5110 
5111   // These three registers define part of the calling convention
5112   // between compiled code and the interpreter.
5113 
5114   // Inline Cache Register or methodOop for I2C.
5115   inline_cache_reg(R12);
5116 
5117   // Method Oop Register when calling interpreter.
5118   interpreter_method_oop_reg(R12);
5119 
5120   // Number of stack slots consumed by locking an object
5121   sync_stack_slots(2);
5122 
5123   // Compiled code's Frame Pointer
5124   frame_pointer(R31);
5125 
5126   // Interpreter stores its frame pointer in a register which is
5127   // stored to the stack by I2CAdaptors.
5128   // I2CAdaptors convert from interpreted java to compiled java.
5129   interpreter_frame_pointer(R29);
5130 
5131   // Stack alignment requirement
5132   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5133 
5134   // Number of stack slots between incoming argument block and the start of
5135   // a new frame.  The PROLOG must add this many slots to the stack.  The
5136   // EPILOG must remove this many slots. aarch64 needs two slots for
5137   // return address and fp.
5138   // TODO think this is correct but check
5139   in_preserve_stack_slots(4);
5140 
5141   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5142   // for calls to C.  Supports the var-args backing area for register parms.
5143   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5144 
5145   // The after-PROLOG location of the return address.  Location of
5146   // return address specifies a type (REG or STACK) and a number
5147   // representing the register number (i.e. - use a register name) or
5148   // stack slot.
5149   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5150   // Otherwise, it is above the locks and verification slot and alignment word
5151   // TODO this may well be correct but need to check why that - 2 is there
5152   // ppc port uses 0 but we definitely need to allow for fixed_slots
5153   // which folds in the space used for monitors
5154   return_addr(STACK - 2 +
5155               align_up((Compile::current()->in_preserve_stack_slots() +
5156                         Compile::current()->fixed_slots()),
5157                        stack_alignment_in_slots()));
5158 
5159   // Body of function which returns an integer array locating
5160   // arguments either in registers or in stack slots.  Passed an array
5161   // of ideal registers called "sig" and a "length" count.  Stack-slot
5162   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5163   // arguments for a CALLEE.  Incoming stack arguments are
5164   // automatically biased by the preserve_stack_slots field above.
5165 
5166   calling_convention
5167   %{
5168     // No difference between ingoing/outgoing just pass false
5169     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5170   %}
5171 
5172   c_calling_convention
5173   %{
5174     // This is obviously always outgoing
5175     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5176   %}
5177 
5178   // Location of compiled Java return values.  Same as C for now.
5179   return_value
5180   %{
5181     // TODO do we allow ideal_reg == Op_RegN???
5182     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5183            "only return normal values");
5184 
5185     static const int lo[Op_RegL + 1] = { // enum name
5186       0,                                 // Op_Node
5187       0,                                 // Op_Set
5188       R0_num,                            // Op_RegN
5189       R0_num,                            // Op_RegI
5190       R0_num,                            // Op_RegP
5191       V0_num,                            // Op_RegF
5192       V0_num,                            // Op_RegD
5193       R0_num                             // Op_RegL
5194     };
5195 
5196     static const int hi[Op_RegL + 1] = { // enum name
5197       0,                                 // Op_Node
5198       0,                                 // Op_Set
5199       OptoReg::Bad,                       // Op_RegN
5200       OptoReg::Bad,                      // Op_RegI
5201       R0_H_num,                          // Op_RegP
5202       OptoReg::Bad,                      // Op_RegF
5203       V0_H_num,                          // Op_RegD
5204       R0_H_num                           // Op_RegL
5205     };
5206 
5207     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5208   %}
5209 %}
5210 
5211 //----------ATTRIBUTES---------------------------------------------------------
5212 //----------Operand Attributes-------------------------------------------------
5213 op_attrib op_cost(1);        // Required cost attribute
5214 
5215 //----------Instruction Attributes---------------------------------------------
5216 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5217 ins_attrib ins_size(32);        // Required size attribute (in bits)
5218 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5219                                 // a non-matching short branch variant
5220                                 // of some long branch?
5221 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5222                                 // be a power of 2) specifies the
5223                                 // alignment that some part of the
5224                                 // instruction (not necessarily the
5225                                 // start) requires.  If > 1, a
5226                                 // compute_padding() function must be
5227                                 // provided for the instruction
5228 
5229 //----------OPERANDS-----------------------------------------------------------
5230 // Operand definitions must precede instruction definitions for correct parsing
5231 // in the ADLC because operands constitute user defined types which are used in
5232 // instruction definitions.
5233 
5234 //----------Simple Operands----------------------------------------------------
5235 
5236 // Integer operands 32 bit
5237 // 32 bit immediate
5238 operand immI()
5239 %{
5240   match(ConI);
5241 
5242   op_cost(0);
5243   format %{ %}
5244   interface(CONST_INTER);
5245 %}
5246 
5247 // 32 bit zero
5248 operand immI0()
5249 %{
5250   predicate(n->get_int() == 0);
5251   match(ConI);
5252 
5253   op_cost(0);
5254   format %{ %}
5255   interface(CONST_INTER);
5256 %}
5257 
5258 // 32 bit unit increment
5259 operand immI_1()
5260 %{
5261   predicate(n->get_int() == 1);
5262   match(ConI);
5263 
5264   op_cost(0);
5265   format %{ %}
5266   interface(CONST_INTER);
5267 %}
5268 
5269 // 32 bit unit decrement
5270 operand immI_M1()
5271 %{
5272   predicate(n->get_int() == -1);
5273   match(ConI);
5274 
5275   op_cost(0);
5276   format %{ %}
5277   interface(CONST_INTER);
5278 %}
5279 
5280 // Shift values for add/sub extension shift
5281 operand immIExt()
5282 %{
5283   predicate(0 <= n->get_int() && (n->get_int() <= 4));
5284   match(ConI);
5285 
5286   op_cost(0);
5287   format %{ %}
5288   interface(CONST_INTER);
5289 %}
5290 
5291 operand immI_le_4()
5292 %{
5293   predicate(n->get_int() <= 4);
5294   match(ConI);
5295 
5296   op_cost(0);
5297   format %{ %}
5298   interface(CONST_INTER);
5299 %}
5300 
5301 operand immI_31()
5302 %{
5303   predicate(n->get_int() == 31);
5304   match(ConI);
5305 
5306   op_cost(0);
5307   format %{ %}
5308   interface(CONST_INTER);
5309 %}
5310 
5311 operand immI_8()
5312 %{
5313   predicate(n->get_int() == 8);
5314   match(ConI);
5315 
5316   op_cost(0);
5317   format %{ %}
5318   interface(CONST_INTER);
5319 %}
5320 
5321 operand immI_16()
5322 %{
5323   predicate(n->get_int() == 16);
5324   match(ConI);
5325 
5326   op_cost(0);
5327   format %{ %}
5328   interface(CONST_INTER);
5329 %}
5330 
5331 operand immI_24()
5332 %{
5333   predicate(n->get_int() == 24);
5334   match(ConI);
5335 
5336   op_cost(0);
5337   format %{ %}
5338   interface(CONST_INTER);
5339 %}
5340 
5341 operand immI_32()
5342 %{
5343   predicate(n->get_int() == 32);
5344   match(ConI);
5345 
5346   op_cost(0);
5347   format %{ %}
5348   interface(CONST_INTER);
5349 %}
5350 
5351 operand immI_48()
5352 %{
5353   predicate(n->get_int() == 48);
5354   match(ConI);
5355 
5356   op_cost(0);
5357   format %{ %}
5358   interface(CONST_INTER);
5359 %}
5360 
5361 operand immI_56()
5362 %{
5363   predicate(n->get_int() == 56);
5364   match(ConI);
5365 
5366   op_cost(0);
5367   format %{ %}
5368   interface(CONST_INTER);
5369 %}
5370 
5371 operand immI_63()
5372 %{
5373   predicate(n->get_int() == 63);
5374   match(ConI);
5375 
5376   op_cost(0);
5377   format %{ %}
5378   interface(CONST_INTER);
5379 %}
5380 
5381 operand immI_64()
5382 %{
5383   predicate(n->get_int() == 64);
5384   match(ConI);
5385 
5386   op_cost(0);
5387   format %{ %}
5388   interface(CONST_INTER);
5389 %}
5390 
5391 operand immI_255()
5392 %{
5393   predicate(n->get_int() == 255);
5394   match(ConI);
5395 
5396   op_cost(0);
5397   format %{ %}
5398   interface(CONST_INTER);
5399 %}
5400 
5401 operand immI_65535()
5402 %{
5403   predicate(n->get_int() == 65535);
5404   match(ConI);
5405 
5406   op_cost(0);
5407   format %{ %}
5408   interface(CONST_INTER);
5409 %}
5410 
5411 operand immL_255()
5412 %{
5413   predicate(n->get_long() == 255L);
5414   match(ConL);
5415 
5416   op_cost(0);
5417   format %{ %}
5418   interface(CONST_INTER);
5419 %}
5420 
5421 operand immL_65535()
5422 %{
5423   predicate(n->get_long() == 65535L);
5424   match(ConL);
5425 
5426   op_cost(0);
5427   format %{ %}
5428   interface(CONST_INTER);
5429 %}
5430 
5431 operand immL_4294967295()
5432 %{
5433   predicate(n->get_long() == 4294967295L);
5434   match(ConL);
5435 
5436   op_cost(0);
5437   format %{ %}
5438   interface(CONST_INTER);
5439 %}
5440 
5441 operand immL_bitmask()
5442 %{
5443   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5444             && is_power_of_2(n->get_long() + 1));
5445   match(ConL);
5446 
5447   op_cost(0);
5448   format %{ %}
5449   interface(CONST_INTER);
5450 %}
5451 
5452 operand immI_bitmask()
5453 %{
5454   predicate(((n->get_int() & 0xc0000000) == 0)
5455             && is_power_of_2(n->get_int() + 1));
5456   match(ConI);
5457 
5458   op_cost(0);
5459   format %{ %}
5460   interface(CONST_INTER);
5461 %}
5462 
5463 // Scale values for scaled offset addressing modes (up to long but not quad)
5464 operand immIScale()
5465 %{
5466   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5467   match(ConI);
5468 
5469   op_cost(0);
5470   format %{ %}
5471   interface(CONST_INTER);
5472 %}
5473 
5474 // 26 bit signed offset -- for pc-relative branches
5475 operand immI26()
5476 %{
5477   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5478   match(ConI);
5479 
5480   op_cost(0);
5481   format %{ %}
5482   interface(CONST_INTER);
5483 %}
5484 
5485 // 19 bit signed offset -- for pc-relative loads
5486 operand immI19()
5487 %{
5488   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5489   match(ConI);
5490 
5491   op_cost(0);
5492   format %{ %}
5493   interface(CONST_INTER);
5494 %}
5495 
5496 // 12 bit unsigned offset -- for base plus immediate loads
5497 operand immIU12()
5498 %{
5499   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5500   match(ConI);
5501 
5502   op_cost(0);
5503   format %{ %}
5504   interface(CONST_INTER);
5505 %}
5506 
5507 operand immLU12()
5508 %{
5509   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5510   match(ConL);
5511 
5512   op_cost(0);
5513   format %{ %}
5514   interface(CONST_INTER);
5515 %}
5516 
5517 // Offset for scaled or unscaled immediate loads and stores
5518 operand immIOffset()
5519 %{
5520   predicate(Address::offset_ok_for_immed(n->get_int()));
5521   match(ConI);
5522 
5523   op_cost(0);
5524   format %{ %}
5525   interface(CONST_INTER);
5526 %}
5527 
5528 operand immIOffset4()
5529 %{
5530   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5531   match(ConI);
5532 
5533   op_cost(0);
5534   format %{ %}
5535   interface(CONST_INTER);
5536 %}
5537 
5538 operand immIOffset8()
5539 %{
5540   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5541   match(ConI);
5542 
5543   op_cost(0);
5544   format %{ %}
5545   interface(CONST_INTER);
5546 %}
5547 
5548 operand immIOffset16()
5549 %{
5550   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5551   match(ConI);
5552 
5553   op_cost(0);
5554   format %{ %}
5555   interface(CONST_INTER);
5556 %}
5557 
5558 operand immLoffset()
5559 %{
5560   predicate(Address::offset_ok_for_immed(n->get_long()));
5561   match(ConL);
5562 
5563   op_cost(0);
5564   format %{ %}
5565   interface(CONST_INTER);
5566 %}
5567 
5568 operand immLoffset4()
5569 %{
5570   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5571   match(ConL);
5572 
5573   op_cost(0);
5574   format %{ %}
5575   interface(CONST_INTER);
5576 %}
5577 
5578 operand immLoffset8()
5579 %{
5580   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5581   match(ConL);
5582 
5583   op_cost(0);
5584   format %{ %}
5585   interface(CONST_INTER);
5586 %}
5587 
5588 operand immLoffset16()
5589 %{
5590   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5591   match(ConL);
5592 
5593   op_cost(0);
5594   format %{ %}
5595   interface(CONST_INTER);
5596 %}
5597 
5598 // 32 bit integer valid for add sub immediate
5599 operand immIAddSub()
5600 %{
5601   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5602   match(ConI);
5603   op_cost(0);
5604   format %{ %}
5605   interface(CONST_INTER);
5606 %}
5607 
5608 // 32 bit unsigned integer valid for logical immediate
5609 // TODO -- check this is right when e.g the mask is 0x80000000
5610 operand immILog()
5611 %{
5612   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5613   match(ConI);
5614 
5615   op_cost(0);
5616   format %{ %}
5617   interface(CONST_INTER);
5618 %}
5619 
5620 // Integer operands 64 bit
5621 // 64 bit immediate
5622 operand immL()
5623 %{
5624   match(ConL);
5625 
5626   op_cost(0);
5627   format %{ %}
5628   interface(CONST_INTER);
5629 %}
5630 
5631 // 64 bit zero
5632 operand immL0()
5633 %{
5634   predicate(n->get_long() == 0);
5635   match(ConL);
5636 
5637   op_cost(0);
5638   format %{ %}
5639   interface(CONST_INTER);
5640 %}
5641 
5642 // 64 bit unit increment
5643 operand immL_1()
5644 %{
5645   predicate(n->get_long() == 1);
5646   match(ConL);
5647 
5648   op_cost(0);
5649   format %{ %}
5650   interface(CONST_INTER);
5651 %}
5652 
5653 // 64 bit unit decrement
5654 operand immL_M1()
5655 %{
5656   predicate(n->get_long() == -1);
5657   match(ConL);
5658 
5659   op_cost(0);
5660   format %{ %}
5661   interface(CONST_INTER);
5662 %}
5663 
5664 // 32 bit offset of pc in thread anchor
5665 
5666 operand immL_pc_off()
5667 %{
5668   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5669                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5670   match(ConL);
5671 
5672   op_cost(0);
5673   format %{ %}
5674   interface(CONST_INTER);
5675 %}
5676 
5677 // 64 bit integer valid for add sub immediate
5678 operand immLAddSub()
5679 %{
5680   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5681   match(ConL);
5682   op_cost(0);
5683   format %{ %}
5684   interface(CONST_INTER);
5685 %}
5686 
5687 // 64 bit integer valid for logical immediate
5688 operand immLLog()
5689 %{
5690   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5691   match(ConL);
5692   op_cost(0);
5693   format %{ %}
5694   interface(CONST_INTER);
5695 %}
5696 
5697 // Long Immediate: low 32-bit mask
5698 operand immL_32bits()
5699 %{
5700   predicate(n->get_long() == 0xFFFFFFFFL);
5701   match(ConL);
5702   op_cost(0);
5703   format %{ %}
5704   interface(CONST_INTER);
5705 %}
5706 
5707 // Pointer operands
5708 // Pointer Immediate
5709 operand immP()
5710 %{
5711   match(ConP);
5712 
5713   op_cost(0);
5714   format %{ %}
5715   interface(CONST_INTER);
5716 %}
5717 
5718 // NULL Pointer Immediate
5719 operand immP0()
5720 %{
5721   predicate(n->get_ptr() == 0);
5722   match(ConP);
5723 
5724   op_cost(0);
5725   format %{ %}
5726   interface(CONST_INTER);
5727 %}
5728 
5729 // Pointer Immediate One
5730 // this is used in object initialization (initial object header)
5731 operand immP_1()
5732 %{
5733   predicate(n->get_ptr() == 1);
5734   match(ConP);
5735 
5736   op_cost(0);
5737   format %{ %}
5738   interface(CONST_INTER);
5739 %}
5740 
5741 // Polling Page Pointer Immediate
5742 operand immPollPage()
5743 %{
5744   predicate((address)n->get_ptr() == os::get_polling_page());
5745   match(ConP);
5746 
5747   op_cost(0);
5748   format %{ %}
5749   interface(CONST_INTER);
5750 %}
5751 
5752 // Card Table Byte Map Base
5753 operand immByteMapBase()
5754 %{
5755   // Get base of card map
5756   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
5757             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
5758   match(ConP);
5759 
5760   op_cost(0);
5761   format %{ %}
5762   interface(CONST_INTER);
5763 %}
5764 
5765 // Pointer Immediate Minus One
5766 // this is used when we want to write the current PC to the thread anchor
5767 operand immP_M1()
5768 %{
5769   predicate(n->get_ptr() == -1);
5770   match(ConP);
5771 
5772   op_cost(0);
5773   format %{ %}
5774   interface(CONST_INTER);
5775 %}
5776 
5777 // Pointer Immediate Minus Two
5778 // this is used when we want to write the current PC to the thread anchor
5779 operand immP_M2()
5780 %{
5781   predicate(n->get_ptr() == -2);
5782   match(ConP);
5783 
5784   op_cost(0);
5785   format %{ %}
5786   interface(CONST_INTER);
5787 %}
5788 
5789 // Float and Double operands
5790 // Double Immediate
5791 operand immD()
5792 %{
5793   match(ConD);
5794   op_cost(0);
5795   format %{ %}
5796   interface(CONST_INTER);
5797 %}
5798 
5799 // Double Immediate: +0.0d
5800 operand immD0()
5801 %{
5802   predicate(jlong_cast(n->getd()) == 0);
5803   match(ConD);
5804 
5805   op_cost(0);
5806   format %{ %}
5807   interface(CONST_INTER);
5808 %}
5809 
5810 // constant 'double +0.0'.
5811 operand immDPacked()
5812 %{
5813   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5814   match(ConD);
5815   op_cost(0);
5816   format %{ %}
5817   interface(CONST_INTER);
5818 %}
5819 
5820 // Float Immediate
5821 operand immF()
5822 %{
5823   match(ConF);
5824   op_cost(0);
5825   format %{ %}
5826   interface(CONST_INTER);
5827 %}
5828 
5829 // Float Immediate: +0.0f.
5830 operand immF0()
5831 %{
5832   predicate(jint_cast(n->getf()) == 0);
5833   match(ConF);
5834 
5835   op_cost(0);
5836   format %{ %}
5837   interface(CONST_INTER);
5838 %}
5839 
5840 //
5841 operand immFPacked()
5842 %{
5843   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5844   match(ConF);
5845   op_cost(0);
5846   format %{ %}
5847   interface(CONST_INTER);
5848 %}
5849 
5850 // Narrow pointer operands
5851 // Narrow Pointer Immediate
5852 operand immN()
5853 %{
5854   match(ConN);
5855 
5856   op_cost(0);
5857   format %{ %}
5858   interface(CONST_INTER);
5859 %}
5860 
5861 // Narrow NULL Pointer Immediate
5862 operand immN0()
5863 %{
5864   predicate(n->get_narrowcon() == 0);
5865   match(ConN);
5866 
5867   op_cost(0);
5868   format %{ %}
5869   interface(CONST_INTER);
5870 %}
5871 
5872 operand immNKlass()
5873 %{
5874   match(ConNKlass);
5875 
5876   op_cost(0);
5877   format %{ %}
5878   interface(CONST_INTER);
5879 %}
5880 
5881 // Integer 32 bit Register Operands
5882 // Integer 32 bitRegister (excludes SP)
5883 operand iRegI()
5884 %{
5885   constraint(ALLOC_IN_RC(any_reg32));
5886   match(RegI);
5887   match(iRegINoSp);
5888   op_cost(0);
5889   format %{ %}
5890   interface(REG_INTER);
5891 %}
5892 
5893 // Integer 32 bit Register not Special
5894 operand iRegINoSp()
5895 %{
5896   constraint(ALLOC_IN_RC(no_special_reg32));
5897   match(RegI);
5898   op_cost(0);
5899   format %{ %}
5900   interface(REG_INTER);
5901 %}
5902 
5903 // Integer 64 bit Register Operands
5904 // Integer 64 bit Register (includes SP)
5905 operand iRegL()
5906 %{
5907   constraint(ALLOC_IN_RC(any_reg));
5908   match(RegL);
5909   match(iRegLNoSp);
5910   op_cost(0);
5911   format %{ %}
5912   interface(REG_INTER);
5913 %}
5914 
5915 // Integer 64 bit Register not Special
5916 operand iRegLNoSp()
5917 %{
5918   constraint(ALLOC_IN_RC(no_special_reg));
5919   match(RegL);
5920   match(iRegL_R0);
5921   format %{ %}
5922   interface(REG_INTER);
5923 %}
5924 
5925 // Pointer Register Operands
5926 // Pointer Register
5927 operand iRegP()
5928 %{
5929   constraint(ALLOC_IN_RC(ptr_reg));
5930   match(RegP);
5931   match(iRegPNoSp);
5932   match(iRegP_R0);
5933   //match(iRegP_R2);
5934   //match(iRegP_R4);
5935   //match(iRegP_R5);
5936   match(thread_RegP);
5937   op_cost(0);
5938   format %{ %}
5939   interface(REG_INTER);
5940 %}
5941 
5942 // Pointer 64 bit Register not Special
5943 operand iRegPNoSp()
5944 %{
5945   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5946   match(RegP);
5947   // match(iRegP);
5948   // match(iRegP_R0);
5949   // match(iRegP_R2);
5950   // match(iRegP_R4);
5951   // match(iRegP_R5);
5952   // match(thread_RegP);
5953   op_cost(0);
5954   format %{ %}
5955   interface(REG_INTER);
5956 %}
5957 
5958 // Pointer 64 bit Register R0 only
5959 operand iRegP_R0()
5960 %{
5961   constraint(ALLOC_IN_RC(r0_reg));
5962   match(RegP);
5963   // match(iRegP);
5964   match(iRegPNoSp);
5965   op_cost(0);
5966   format %{ %}
5967   interface(REG_INTER);
5968 %}
5969 
5970 // Pointer 64 bit Register R1 only
5971 operand iRegP_R1()
5972 %{
5973   constraint(ALLOC_IN_RC(r1_reg));
5974   match(RegP);
5975   // match(iRegP);
5976   match(iRegPNoSp);
5977   op_cost(0);
5978   format %{ %}
5979   interface(REG_INTER);
5980 %}
5981 
5982 // Pointer 64 bit Register R2 only
5983 operand iRegP_R2()
5984 %{
5985   constraint(ALLOC_IN_RC(r2_reg));
5986   match(RegP);
5987   // match(iRegP);
5988   match(iRegPNoSp);
5989   op_cost(0);
5990   format %{ %}
5991   interface(REG_INTER);
5992 %}
5993 
5994 // Pointer 64 bit Register R3 only
5995 operand iRegP_R3()
5996 %{
5997   constraint(ALLOC_IN_RC(r3_reg));
5998   match(RegP);
5999   // match(iRegP);
6000   match(iRegPNoSp);
6001   op_cost(0);
6002   format %{ %}
6003   interface(REG_INTER);
6004 %}
6005 
6006 // Pointer 64 bit Register R4 only
6007 operand iRegP_R4()
6008 %{
6009   constraint(ALLOC_IN_RC(r4_reg));
6010   match(RegP);
6011   // match(iRegP);
6012   match(iRegPNoSp);
6013   op_cost(0);
6014   format %{ %}
6015   interface(REG_INTER);
6016 %}
6017 
6018 // Pointer 64 bit Register R5 only
6019 operand iRegP_R5()
6020 %{
6021   constraint(ALLOC_IN_RC(r5_reg));
6022   match(RegP);
6023   // match(iRegP);
6024   match(iRegPNoSp);
6025   op_cost(0);
6026   format %{ %}
6027   interface(REG_INTER);
6028 %}
6029 
6030 // Pointer 64 bit Register R10 only
6031 operand iRegP_R10()
6032 %{
6033   constraint(ALLOC_IN_RC(r10_reg));
6034   match(RegP);
6035   // match(iRegP);
6036   match(iRegPNoSp);
6037   op_cost(0);
6038   format %{ %}
6039   interface(REG_INTER);
6040 %}
6041 
6042 // Long 64 bit Register R0 only
6043 operand iRegL_R0()
6044 %{
6045   constraint(ALLOC_IN_RC(r0_reg));
6046   match(RegL);
6047   match(iRegLNoSp);
6048   op_cost(0);
6049   format %{ %}
6050   interface(REG_INTER);
6051 %}
6052 
6053 // Long 64 bit Register R2 only
6054 operand iRegL_R2()
6055 %{
6056   constraint(ALLOC_IN_RC(r2_reg));
6057   match(RegL);
6058   match(iRegLNoSp);
6059   op_cost(0);
6060   format %{ %}
6061   interface(REG_INTER);
6062 %}
6063 
6064 // Long 64 bit Register R3 only
6065 operand iRegL_R3()
6066 %{
6067   constraint(ALLOC_IN_RC(r3_reg));
6068   match(RegL);
6069   match(iRegLNoSp);
6070   op_cost(0);
6071   format %{ %}
6072   interface(REG_INTER);
6073 %}
6074 
6075 // Long 64 bit Register R11 only
6076 operand iRegL_R11()
6077 %{
6078   constraint(ALLOC_IN_RC(r11_reg));
6079   match(RegL);
6080   match(iRegLNoSp);
6081   op_cost(0);
6082   format %{ %}
6083   interface(REG_INTER);
6084 %}
6085 
6086 // Pointer 64 bit Register FP only
6087 operand iRegP_FP()
6088 %{
6089   constraint(ALLOC_IN_RC(fp_reg));
6090   match(RegP);
6091   // match(iRegP);
6092   op_cost(0);
6093   format %{ %}
6094   interface(REG_INTER);
6095 %}
6096 
6097 // Register R0 only
6098 operand iRegI_R0()
6099 %{
6100   constraint(ALLOC_IN_RC(int_r0_reg));
6101   match(RegI);
6102   match(iRegINoSp);
6103   op_cost(0);
6104   format %{ %}
6105   interface(REG_INTER);
6106 %}
6107 
6108 // Register R2 only
6109 operand iRegI_R2()
6110 %{
6111   constraint(ALLOC_IN_RC(int_r2_reg));
6112   match(RegI);
6113   match(iRegINoSp);
6114   op_cost(0);
6115   format %{ %}
6116   interface(REG_INTER);
6117 %}
6118 
6119 // Register R3 only
6120 operand iRegI_R3()
6121 %{
6122   constraint(ALLOC_IN_RC(int_r3_reg));
6123   match(RegI);
6124   match(iRegINoSp);
6125   op_cost(0);
6126   format %{ %}
6127   interface(REG_INTER);
6128 %}
6129 
6130 
6131 // Register R4 only
6132 operand iRegI_R4()
6133 %{
6134   constraint(ALLOC_IN_RC(int_r4_reg));
6135   match(RegI);
6136   match(iRegINoSp);
6137   op_cost(0);
6138   format %{ %}
6139   interface(REG_INTER);
6140 %}
6141 
6142 
6143 // Pointer Register Operands
6144 // Narrow Pointer Register
6145 operand iRegN()
6146 %{
6147   constraint(ALLOC_IN_RC(any_reg32));
6148   match(RegN);
6149   match(iRegNNoSp);
6150   op_cost(0);
6151   format %{ %}
6152   interface(REG_INTER);
6153 %}
6154 
6155 operand iRegN_R0()
6156 %{
6157   constraint(ALLOC_IN_RC(r0_reg));
6158   match(iRegN);
6159   op_cost(0);
6160   format %{ %}
6161   interface(REG_INTER);
6162 %}
6163 
6164 operand iRegN_R2()
6165 %{
6166   constraint(ALLOC_IN_RC(r2_reg));
6167   match(iRegN);
6168   op_cost(0);
6169   format %{ %}
6170   interface(REG_INTER);
6171 %}
6172 
6173 operand iRegN_R3()
6174 %{
6175   constraint(ALLOC_IN_RC(r3_reg));
6176   match(iRegN);
6177   op_cost(0);
6178   format %{ %}
6179   interface(REG_INTER);
6180 %}
6181 
6182 // Integer 64 bit Register not Special
6183 operand iRegNNoSp()
6184 %{
6185   constraint(ALLOC_IN_RC(no_special_reg32));
6186   match(RegN);
6187   op_cost(0);
6188   format %{ %}
6189   interface(REG_INTER);
6190 %}
6191 
6192 // heap base register -- used for encoding immN0
6193 
6194 operand iRegIHeapbase()
6195 %{
6196   constraint(ALLOC_IN_RC(heapbase_reg));
6197   match(RegI);
6198   op_cost(0);
6199   format %{ %}
6200   interface(REG_INTER);
6201 %}
6202 
6203 // Float Register
6204 // Float register operands
6205 operand vRegF()
6206 %{
6207   constraint(ALLOC_IN_RC(float_reg));
6208   match(RegF);
6209 
6210   op_cost(0);
6211   format %{ %}
6212   interface(REG_INTER);
6213 %}
6214 
6215 // Double Register
6216 // Double register operands
6217 operand vRegD()
6218 %{
6219   constraint(ALLOC_IN_RC(double_reg));
6220   match(RegD);
6221 
6222   op_cost(0);
6223   format %{ %}
6224   interface(REG_INTER);
6225 %}
6226 
6227 operand vecD()
6228 %{
6229   constraint(ALLOC_IN_RC(vectord_reg));
6230   match(VecD);
6231 
6232   op_cost(0);
6233   format %{ %}
6234   interface(REG_INTER);
6235 %}
6236 
6237 operand vecX()
6238 %{
6239   constraint(ALLOC_IN_RC(vectorx_reg));
6240   match(VecX);
6241 
6242   op_cost(0);
6243   format %{ %}
6244   interface(REG_INTER);
6245 %}
6246 
6247 operand vRegD_V0()
6248 %{
6249   constraint(ALLOC_IN_RC(v0_reg));
6250   match(RegD);
6251   op_cost(0);
6252   format %{ %}
6253   interface(REG_INTER);
6254 %}
6255 
6256 operand vRegD_V1()
6257 %{
6258   constraint(ALLOC_IN_RC(v1_reg));
6259   match(RegD);
6260   op_cost(0);
6261   format %{ %}
6262   interface(REG_INTER);
6263 %}
6264 
6265 operand vRegD_V2()
6266 %{
6267   constraint(ALLOC_IN_RC(v2_reg));
6268   match(RegD);
6269   op_cost(0);
6270   format %{ %}
6271   interface(REG_INTER);
6272 %}
6273 
6274 operand vRegD_V3()
6275 %{
6276   constraint(ALLOC_IN_RC(v3_reg));
6277   match(RegD);
6278   op_cost(0);
6279   format %{ %}
6280   interface(REG_INTER);
6281 %}
6282 
6283 // Flags register, used as output of signed compare instructions
6284 
6285 // note that on AArch64 we also use this register as the output for
6286 // for floating point compare instructions (CmpF CmpD). this ensures
6287 // that ordered inequality tests use GT, GE, LT or LE none of which
6288 // pass through cases where the result is unordered i.e. one or both
6289 // inputs to the compare is a NaN. this means that the ideal code can
6290 // replace e.g. a GT with an LE and not end up capturing the NaN case
6291 // (where the comparison should always fail). EQ and NE tests are
6292 // always generated in ideal code so that unordered folds into the NE
6293 // case, matching the behaviour of AArch64 NE.
6294 //
6295 // This differs from x86 where the outputs of FP compares use a
6296 // special FP flags registers and where compares based on this
6297 // register are distinguished into ordered inequalities (cmpOpUCF) and
6298 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6299 // to explicitly handle the unordered case in branches. x86 also has
6300 // to include extra CMoveX rules to accept a cmpOpUCF input.
6301 
6302 operand rFlagsReg()
6303 %{
6304   constraint(ALLOC_IN_RC(int_flags));
6305   match(RegFlags);
6306 
6307   op_cost(0);
6308   format %{ "RFLAGS" %}
6309   interface(REG_INTER);
6310 %}
6311 
6312 // Flags register, used as output of unsigned compare instructions
6313 operand rFlagsRegU()
6314 %{
6315   constraint(ALLOC_IN_RC(int_flags));
6316   match(RegFlags);
6317 
6318   op_cost(0);
6319   format %{ "RFLAGSU" %}
6320   interface(REG_INTER);
6321 %}
6322 
6323 // Special Registers
6324 
6325 // Method Register
6326 operand inline_cache_RegP(iRegP reg)
6327 %{
6328   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6329   match(reg);
6330   match(iRegPNoSp);
6331   op_cost(0);
6332   format %{ %}
6333   interface(REG_INTER);
6334 %}
6335 
6336 operand interpreter_method_oop_RegP(iRegP reg)
6337 %{
6338   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6339   match(reg);
6340   match(iRegPNoSp);
6341   op_cost(0);
6342   format %{ %}
6343   interface(REG_INTER);
6344 %}
6345 
6346 // Thread Register
6347 operand thread_RegP(iRegP reg)
6348 %{
6349   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6350   match(reg);
6351   op_cost(0);
6352   format %{ %}
6353   interface(REG_INTER);
6354 %}
6355 
6356 operand lr_RegP(iRegP reg)
6357 %{
6358   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6359   match(reg);
6360   op_cost(0);
6361   format %{ %}
6362   interface(REG_INTER);
6363 %}
6364 
6365 //----------Memory Operands----------------------------------------------------
6366 
6367 operand indirect(iRegP reg)
6368 %{
6369   constraint(ALLOC_IN_RC(ptr_reg));
6370   match(reg);
6371   op_cost(0);
6372   format %{ "[$reg]" %}
6373   interface(MEMORY_INTER) %{
6374     base($reg);
6375     index(0xffffffff);
6376     scale(0x0);
6377     disp(0x0);
6378   %}
6379 %}
6380 
6381 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6382 %{
6383   constraint(ALLOC_IN_RC(ptr_reg));
6384   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6385   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6386   op_cost(0);
6387   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6388   interface(MEMORY_INTER) %{
6389     base($reg);
6390     index($ireg);
6391     scale($scale);
6392     disp(0x0);
6393   %}
6394 %}
6395 
6396 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6397 %{
6398   constraint(ALLOC_IN_RC(ptr_reg));
6399   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6400   match(AddP reg (LShiftL lreg scale));
6401   op_cost(0);
6402   format %{ "$reg, $lreg lsl($scale)" %}
6403   interface(MEMORY_INTER) %{
6404     base($reg);
6405     index($lreg);
6406     scale($scale);
6407     disp(0x0);
6408   %}
6409 %}
6410 
6411 operand indIndexI2L(iRegP reg, iRegI ireg)
6412 %{
6413   constraint(ALLOC_IN_RC(ptr_reg));
6414   match(AddP reg (ConvI2L ireg));
6415   op_cost(0);
6416   format %{ "$reg, $ireg, 0, I2L" %}
6417   interface(MEMORY_INTER) %{
6418     base($reg);
6419     index($ireg);
6420     scale(0x0);
6421     disp(0x0);
6422   %}
6423 %}
6424 
6425 operand indIndex(iRegP reg, iRegL lreg)
6426 %{
6427   constraint(ALLOC_IN_RC(ptr_reg));
6428   match(AddP reg lreg);
6429   op_cost(0);
6430   format %{ "$reg, $lreg" %}
6431   interface(MEMORY_INTER) %{
6432     base($reg);
6433     index($lreg);
6434     scale(0x0);
6435     disp(0x0);
6436   %}
6437 %}
6438 
6439 operand indOffI(iRegP reg, immIOffset off)
6440 %{
6441   constraint(ALLOC_IN_RC(ptr_reg));
6442   match(AddP reg off);
6443   op_cost(0);
6444   format %{ "[$reg, $off]" %}
6445   interface(MEMORY_INTER) %{
6446     base($reg);
6447     index(0xffffffff);
6448     scale(0x0);
6449     disp($off);
6450   %}
6451 %}
6452 
6453 operand indOffI4(iRegP reg, immIOffset4 off)
6454 %{
6455   constraint(ALLOC_IN_RC(ptr_reg));
6456   match(AddP reg off);
6457   op_cost(0);
6458   format %{ "[$reg, $off]" %}
6459   interface(MEMORY_INTER) %{
6460     base($reg);
6461     index(0xffffffff);
6462     scale(0x0);
6463     disp($off);
6464   %}
6465 %}
6466 
6467 operand indOffI8(iRegP reg, immIOffset8 off)
6468 %{
6469   constraint(ALLOC_IN_RC(ptr_reg));
6470   match(AddP reg off);
6471   op_cost(0);
6472   format %{ "[$reg, $off]" %}
6473   interface(MEMORY_INTER) %{
6474     base($reg);
6475     index(0xffffffff);
6476     scale(0x0);
6477     disp($off);
6478   %}
6479 %}
6480 
6481 operand indOffI16(iRegP reg, immIOffset16 off)
6482 %{
6483   constraint(ALLOC_IN_RC(ptr_reg));
6484   match(AddP reg off);
6485   op_cost(0);
6486   format %{ "[$reg, $off]" %}
6487   interface(MEMORY_INTER) %{
6488     base($reg);
6489     index(0xffffffff);
6490     scale(0x0);
6491     disp($off);
6492   %}
6493 %}
6494 
6495 operand indOffL(iRegP reg, immLoffset off)
6496 %{
6497   constraint(ALLOC_IN_RC(ptr_reg));
6498   match(AddP reg off);
6499   op_cost(0);
6500   format %{ "[$reg, $off]" %}
6501   interface(MEMORY_INTER) %{
6502     base($reg);
6503     index(0xffffffff);
6504     scale(0x0);
6505     disp($off);
6506   %}
6507 %}
6508 
6509 operand indOffL4(iRegP reg, immLoffset4 off)
6510 %{
6511   constraint(ALLOC_IN_RC(ptr_reg));
6512   match(AddP reg off);
6513   op_cost(0);
6514   format %{ "[$reg, $off]" %}
6515   interface(MEMORY_INTER) %{
6516     base($reg);
6517     index(0xffffffff);
6518     scale(0x0);
6519     disp($off);
6520   %}
6521 %}
6522 
6523 operand indOffL8(iRegP reg, immLoffset8 off)
6524 %{
6525   constraint(ALLOC_IN_RC(ptr_reg));
6526   match(AddP reg off);
6527   op_cost(0);
6528   format %{ "[$reg, $off]" %}
6529   interface(MEMORY_INTER) %{
6530     base($reg);
6531     index(0xffffffff);
6532     scale(0x0);
6533     disp($off);
6534   %}
6535 %}
6536 
6537 operand indOffL16(iRegP reg, immLoffset16 off)
6538 %{
6539   constraint(ALLOC_IN_RC(ptr_reg));
6540   match(AddP reg off);
6541   op_cost(0);
6542   format %{ "[$reg, $off]" %}
6543   interface(MEMORY_INTER) %{
6544     base($reg);
6545     index(0xffffffff);
6546     scale(0x0);
6547     disp($off);
6548   %}
6549 %}
6550 
6551 operand indirectN(iRegN reg)
6552 %{
6553   predicate(Universe::narrow_oop_shift() == 0);
6554   constraint(ALLOC_IN_RC(ptr_reg));
6555   match(DecodeN reg);
6556   op_cost(0);
6557   format %{ "[$reg]\t# narrow" %}
6558   interface(MEMORY_INTER) %{
6559     base($reg);
6560     index(0xffffffff);
6561     scale(0x0);
6562     disp(0x0);
6563   %}
6564 %}
6565 
6566 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6567 %{
6568   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6569   constraint(ALLOC_IN_RC(ptr_reg));
6570   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6571   op_cost(0);
6572   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6573   interface(MEMORY_INTER) %{
6574     base($reg);
6575     index($ireg);
6576     scale($scale);
6577     disp(0x0);
6578   %}
6579 %}
6580 
6581 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6582 %{
6583   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6584   constraint(ALLOC_IN_RC(ptr_reg));
6585   match(AddP (DecodeN reg) (LShiftL lreg scale));
6586   op_cost(0);
6587   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6588   interface(MEMORY_INTER) %{
6589     base($reg);
6590     index($lreg);
6591     scale($scale);
6592     disp(0x0);
6593   %}
6594 %}
6595 
6596 operand indIndexI2LN(iRegN reg, iRegI ireg)
6597 %{
6598   predicate(Universe::narrow_oop_shift() == 0);
6599   constraint(ALLOC_IN_RC(ptr_reg));
6600   match(AddP (DecodeN reg) (ConvI2L ireg));
6601   op_cost(0);
6602   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6603   interface(MEMORY_INTER) %{
6604     base($reg);
6605     index($ireg);
6606     scale(0x0);
6607     disp(0x0);
6608   %}
6609 %}
6610 
6611 operand indIndexN(iRegN reg, iRegL lreg)
6612 %{
6613   predicate(Universe::narrow_oop_shift() == 0);
6614   constraint(ALLOC_IN_RC(ptr_reg));
6615   match(AddP (DecodeN reg) lreg);
6616   op_cost(0);
6617   format %{ "$reg, $lreg\t# narrow" %}
6618   interface(MEMORY_INTER) %{
6619     base($reg);
6620     index($lreg);
6621     scale(0x0);
6622     disp(0x0);
6623   %}
6624 %}
6625 
6626 operand indOffIN(iRegN reg, immIOffset off)
6627 %{
6628   predicate(Universe::narrow_oop_shift() == 0);
6629   constraint(ALLOC_IN_RC(ptr_reg));
6630   match(AddP (DecodeN reg) off);
6631   op_cost(0);
6632   format %{ "[$reg, $off]\t# narrow" %}
6633   interface(MEMORY_INTER) %{
6634     base($reg);
6635     index(0xffffffff);
6636     scale(0x0);
6637     disp($off);
6638   %}
6639 %}
6640 
6641 operand indOffLN(iRegN reg, immLoffset off)
6642 %{
6643   predicate(Universe::narrow_oop_shift() == 0);
6644   constraint(ALLOC_IN_RC(ptr_reg));
6645   match(AddP (DecodeN reg) off);
6646   op_cost(0);
6647   format %{ "[$reg, $off]\t# narrow" %}
6648   interface(MEMORY_INTER) %{
6649     base($reg);
6650     index(0xffffffff);
6651     scale(0x0);
6652     disp($off);
6653   %}
6654 %}
6655 
6656 
6657 
6658 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6659 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6660 %{
6661   constraint(ALLOC_IN_RC(ptr_reg));
6662   match(AddP reg off);
6663   op_cost(0);
6664   format %{ "[$reg, $off]" %}
6665   interface(MEMORY_INTER) %{
6666     base($reg);
6667     index(0xffffffff);
6668     scale(0x0);
6669     disp($off);
6670   %}
6671 %}
6672 
6673 //----------Special Memory Operands--------------------------------------------
6674 // Stack Slot Operand - This operand is used for loading and storing temporary
6675 //                      values on the stack where a match requires a value to
6676 //                      flow through memory.
6677 operand stackSlotP(sRegP reg)
6678 %{
6679   constraint(ALLOC_IN_RC(stack_slots));
6680   op_cost(100);
6681   // No match rule because this operand is only generated in matching
6682   // match(RegP);
6683   format %{ "[$reg]" %}
6684   interface(MEMORY_INTER) %{
6685     base(0x1e);  // RSP
6686     index(0x0);  // No Index
6687     scale(0x0);  // No Scale
6688     disp($reg);  // Stack Offset
6689   %}
6690 %}
6691 
6692 operand stackSlotI(sRegI reg)
6693 %{
6694   constraint(ALLOC_IN_RC(stack_slots));
6695   // No match rule because this operand is only generated in matching
6696   // match(RegI);
6697   format %{ "[$reg]" %}
6698   interface(MEMORY_INTER) %{
6699     base(0x1e);  // RSP
6700     index(0x0);  // No Index
6701     scale(0x0);  // No Scale
6702     disp($reg);  // Stack Offset
6703   %}
6704 %}
6705 
6706 operand stackSlotF(sRegF reg)
6707 %{
6708   constraint(ALLOC_IN_RC(stack_slots));
6709   // No match rule because this operand is only generated in matching
6710   // match(RegF);
6711   format %{ "[$reg]" %}
6712   interface(MEMORY_INTER) %{
6713     base(0x1e);  // RSP
6714     index(0x0);  // No Index
6715     scale(0x0);  // No Scale
6716     disp($reg);  // Stack Offset
6717   %}
6718 %}
6719 
6720 operand stackSlotD(sRegD reg)
6721 %{
6722   constraint(ALLOC_IN_RC(stack_slots));
6723   // No match rule because this operand is only generated in matching
6724   // match(RegD);
6725   format %{ "[$reg]" %}
6726   interface(MEMORY_INTER) %{
6727     base(0x1e);  // RSP
6728     index(0x0);  // No Index
6729     scale(0x0);  // No Scale
6730     disp($reg);  // Stack Offset
6731   %}
6732 %}
6733 
6734 operand stackSlotL(sRegL reg)
6735 %{
6736   constraint(ALLOC_IN_RC(stack_slots));
6737   // No match rule because this operand is only generated in matching
6738   // match(RegL);
6739   format %{ "[$reg]" %}
6740   interface(MEMORY_INTER) %{
6741     base(0x1e);  // RSP
6742     index(0x0);  // No Index
6743     scale(0x0);  // No Scale
6744     disp($reg);  // Stack Offset
6745   %}
6746 %}
6747 
6748 // Operands for expressing Control Flow
6749 // NOTE: Label is a predefined operand which should not be redefined in
6750 //       the AD file. It is generically handled within the ADLC.
6751 
6752 //----------Conditional Branch Operands----------------------------------------
6753 // Comparison Op  - This is the operation of the comparison, and is limited to
6754 //                  the following set of codes:
6755 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6756 //
6757 // Other attributes of the comparison, such as unsignedness, are specified
6758 // by the comparison instruction that sets a condition code flags register.
6759 // That result is represented by a flags operand whose subtype is appropriate
6760 // to the unsignedness (etc.) of the comparison.
6761 //
6762 // Later, the instruction which matches both the Comparison Op (a Bool) and
6763 // the flags (produced by the Cmp) specifies the coding of the comparison op
6764 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6765 
6766 // used for signed integral comparisons and fp comparisons
6767 
6768 operand cmpOp()
6769 %{
6770   match(Bool);
6771 
6772   format %{ "" %}
6773   interface(COND_INTER) %{
6774     equal(0x0, "eq");
6775     not_equal(0x1, "ne");
6776     less(0xb, "lt");
6777     greater_equal(0xa, "ge");
6778     less_equal(0xd, "le");
6779     greater(0xc, "gt");
6780     overflow(0x6, "vs");
6781     no_overflow(0x7, "vc");
6782   %}
6783 %}
6784 
6785 // used for unsigned integral comparisons
6786 
6787 operand cmpOpU()
6788 %{
6789   match(Bool);
6790 
6791   format %{ "" %}
6792   interface(COND_INTER) %{
6793     equal(0x0, "eq");
6794     not_equal(0x1, "ne");
6795     less(0x3, "lo");
6796     greater_equal(0x2, "hs");
6797     less_equal(0x9, "ls");
6798     greater(0x8, "hi");
6799     overflow(0x6, "vs");
6800     no_overflow(0x7, "vc");
6801   %}
6802 %}
6803 
6804 // used for certain integral comparisons which can be
6805 // converted to cbxx or tbxx instructions
6806 
6807 operand cmpOpEqNe()
6808 %{
6809   match(Bool);
6810   match(CmpOp);
6811   op_cost(0);
6812   predicate(n->as_Bool()->_test._test == BoolTest::ne
6813             || n->as_Bool()->_test._test == BoolTest::eq);
6814 
6815   format %{ "" %}
6816   interface(COND_INTER) %{
6817     equal(0x0, "eq");
6818     not_equal(0x1, "ne");
6819     less(0xb, "lt");
6820     greater_equal(0xa, "ge");
6821     less_equal(0xd, "le");
6822     greater(0xc, "gt");
6823     overflow(0x6, "vs");
6824     no_overflow(0x7, "vc");
6825   %}
6826 %}
6827 
6828 // used for certain integral comparisons which can be
6829 // converted to cbxx or tbxx instructions
6830 
6831 operand cmpOpLtGe()
6832 %{
6833   match(Bool);
6834   match(CmpOp);
6835   op_cost(0);
6836 
6837   predicate(n->as_Bool()->_test._test == BoolTest::lt
6838             || n->as_Bool()->_test._test == BoolTest::ge);
6839 
6840   format %{ "" %}
6841   interface(COND_INTER) %{
6842     equal(0x0, "eq");
6843     not_equal(0x1, "ne");
6844     less(0xb, "lt");
6845     greater_equal(0xa, "ge");
6846     less_equal(0xd, "le");
6847     greater(0xc, "gt");
6848     overflow(0x6, "vs");
6849     no_overflow(0x7, "vc");
6850   %}
6851 %}
6852 
6853 // used for certain unsigned integral comparisons which can be
6854 // converted to cbxx or tbxx instructions
6855 
6856 operand cmpOpUEqNeLtGe()
6857 %{
6858   match(Bool);
6859   match(CmpOp);
6860   op_cost(0);
6861 
6862   predicate(n->as_Bool()->_test._test == BoolTest::eq
6863             || n->as_Bool()->_test._test == BoolTest::ne
6864             || n->as_Bool()->_test._test == BoolTest::lt
6865             || n->as_Bool()->_test._test == BoolTest::ge);
6866 
6867   format %{ "" %}
6868   interface(COND_INTER) %{
6869     equal(0x0, "eq");
6870     not_equal(0x1, "ne");
6871     less(0xb, "lt");
6872     greater_equal(0xa, "ge");
6873     less_equal(0xd, "le");
6874     greater(0xc, "gt");
6875     overflow(0x6, "vs");
6876     no_overflow(0x7, "vc");
6877   %}
6878 %}
6879 
6880 // Special operand allowing long args to int ops to be truncated for free
6881 
6882 operand iRegL2I(iRegL reg) %{
6883 
6884   op_cost(0);
6885 
6886   match(ConvL2I reg);
6887 
6888   format %{ "l2i($reg)" %}
6889 
6890   interface(REG_INTER)
6891 %}
6892 
6893 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6894 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6895 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6896 
6897 //----------OPERAND CLASSES----------------------------------------------------
6898 // Operand Classes are groups of operands that are used as to simplify
6899 // instruction definitions by not requiring the AD writer to specify
6900 // separate instructions for every form of operand when the
6901 // instruction accepts multiple operand types with the same basic
6902 // encoding and format. The classic case of this is memory operands.
6903 
6904 // memory is used to define read/write location for load/store
6905 // instruction defs. we can turn a memory op into an Address
6906 
6907 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6908                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6909 
6910 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6911 // operations. it allows the src to be either an iRegI or a (ConvL2I
6912 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6913 // can be elided because the 32-bit instruction will just employ the
6914 // lower 32 bits anyway.
6915 //
6916 // n.b. this does not elide all L2I conversions. if the truncated
6917 // value is consumed by more than one operation then the ConvL2I
6918 // cannot be bundled into the consuming nodes so an l2i gets planted
6919 // (actually a movw $dst $src) and the downstream instructions consume
6920 // the result of the l2i as an iRegI input. That's a shame since the
6921 // movw is actually redundant but its not too costly.
6922 
6923 opclass iRegIorL2I(iRegI, iRegL2I);
6924 
6925 //----------PIPELINE-----------------------------------------------------------
6926 // Rules which define the behavior of the target architectures pipeline.
6927 
6928 // For specific pipelines, eg A53, define the stages of that pipeline
6929 //pipe_desc(ISS, EX1, EX2, WR);
6930 #define ISS S0
6931 #define EX1 S1
6932 #define EX2 S2
6933 #define WR  S3
6934 
6935 // Integer ALU reg operation
6936 pipeline %{
6937 
6938 attributes %{
6939   // ARM instructions are of fixed length
6940   fixed_size_instructions;        // Fixed size instructions TODO does
6941   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6942   // ARM instructions come in 32-bit word units
6943   instruction_unit_size = 4;         // An instruction is 4 bytes long
6944   instruction_fetch_unit_size = 64;  // The processor fetches one line
6945   instruction_fetch_units = 1;       // of 64 bytes
6946 
6947   // List of nop instructions
6948   nops( MachNop );
6949 %}
6950 
6951 // We don't use an actual pipeline model so don't care about resources
6952 // or description. we do use pipeline classes to introduce fixed
6953 // latencies
6954 
6955 //----------RESOURCES----------------------------------------------------------
6956 // Resources are the functional units available to the machine
6957 
6958 resources( INS0, INS1, INS01 = INS0 | INS1,
6959            ALU0, ALU1, ALU = ALU0 | ALU1,
6960            MAC,
6961            DIV,
6962            BRANCH,
6963            LDST,
6964            NEON_FP);
6965 
6966 //----------PIPELINE DESCRIPTION-----------------------------------------------
6967 // Pipeline Description specifies the stages in the machine's pipeline
6968 
6969 // Define the pipeline as a generic 6 stage pipeline
6970 pipe_desc(S0, S1, S2, S3, S4, S5);
6971 
6972 //----------PIPELINE CLASSES---------------------------------------------------
6973 // Pipeline Classes describe the stages in which input and output are
6974 // referenced by the hardware pipeline.
6975 
6976 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6977 %{
6978   single_instruction;
6979   src1   : S1(read);
6980   src2   : S2(read);
6981   dst    : S5(write);
6982   INS01  : ISS;
6983   NEON_FP : S5;
6984 %}
6985 
6986 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6987 %{
6988   single_instruction;
6989   src1   : S1(read);
6990   src2   : S2(read);
6991   dst    : S5(write);
6992   INS01  : ISS;
6993   NEON_FP : S5;
6994 %}
6995 
6996 pipe_class fp_uop_s(vRegF dst, vRegF src)
6997 %{
6998   single_instruction;
6999   src    : S1(read);
7000   dst    : S5(write);
7001   INS01  : ISS;
7002   NEON_FP : S5;
7003 %}
7004 
7005 pipe_class fp_uop_d(vRegD dst, vRegD src)
7006 %{
7007   single_instruction;
7008   src    : S1(read);
7009   dst    : S5(write);
7010   INS01  : ISS;
7011   NEON_FP : S5;
7012 %}
7013 
7014 pipe_class fp_d2f(vRegF dst, vRegD src)
7015 %{
7016   single_instruction;
7017   src    : S1(read);
7018   dst    : S5(write);
7019   INS01  : ISS;
7020   NEON_FP : S5;
7021 %}
7022 
7023 pipe_class fp_f2d(vRegD dst, vRegF src)
7024 %{
7025   single_instruction;
7026   src    : S1(read);
7027   dst    : S5(write);
7028   INS01  : ISS;
7029   NEON_FP : S5;
7030 %}
7031 
7032 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
7033 %{
7034   single_instruction;
7035   src    : S1(read);
7036   dst    : S5(write);
7037   INS01  : ISS;
7038   NEON_FP : S5;
7039 %}
7040 
7041 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
7042 %{
7043   single_instruction;
7044   src    : S1(read);
7045   dst    : S5(write);
7046   INS01  : ISS;
7047   NEON_FP : S5;
7048 %}
7049 
7050 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7051 %{
7052   single_instruction;
7053   src    : S1(read);
7054   dst    : S5(write);
7055   INS01  : ISS;
7056   NEON_FP : S5;
7057 %}
7058 
7059 pipe_class fp_l2f(vRegF dst, iRegL src)
7060 %{
7061   single_instruction;
7062   src    : S1(read);
7063   dst    : S5(write);
7064   INS01  : ISS;
7065   NEON_FP : S5;
7066 %}
7067 
7068 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7069 %{
7070   single_instruction;
7071   src    : S1(read);
7072   dst    : S5(write);
7073   INS01  : ISS;
7074   NEON_FP : S5;
7075 %}
7076 
7077 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7078 %{
7079   single_instruction;
7080   src    : S1(read);
7081   dst    : S5(write);
7082   INS01  : ISS;
7083   NEON_FP : S5;
7084 %}
7085 
7086 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7087 %{
7088   single_instruction;
7089   src    : S1(read);
7090   dst    : S5(write);
7091   INS01  : ISS;
7092   NEON_FP : S5;
7093 %}
7094 
7095 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7096 %{
7097   single_instruction;
7098   src    : S1(read);
7099   dst    : S5(write);
7100   INS01  : ISS;
7101   NEON_FP : S5;
7102 %}
7103 
7104 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7105 %{
7106   single_instruction;
7107   src1   : S1(read);
7108   src2   : S2(read);
7109   dst    : S5(write);
7110   INS0   : ISS;
7111   NEON_FP : S5;
7112 %}
7113 
7114 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7115 %{
7116   single_instruction;
7117   src1   : S1(read);
7118   src2   : S2(read);
7119   dst    : S5(write);
7120   INS0   : ISS;
7121   NEON_FP : S5;
7122 %}
7123 
7124 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7125 %{
7126   single_instruction;
7127   cr     : S1(read);
7128   src1   : S1(read);
7129   src2   : S1(read);
7130   dst    : S3(write);
7131   INS01  : ISS;
7132   NEON_FP : S3;
7133 %}
7134 
7135 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7136 %{
7137   single_instruction;
7138   cr     : S1(read);
7139   src1   : S1(read);
7140   src2   : S1(read);
7141   dst    : S3(write);
7142   INS01  : ISS;
7143   NEON_FP : S3;
7144 %}
7145 
7146 pipe_class fp_imm_s(vRegF dst)
7147 %{
7148   single_instruction;
7149   dst    : S3(write);
7150   INS01  : ISS;
7151   NEON_FP : S3;
7152 %}
7153 
7154 pipe_class fp_imm_d(vRegD dst)
7155 %{
7156   single_instruction;
7157   dst    : S3(write);
7158   INS01  : ISS;
7159   NEON_FP : S3;
7160 %}
7161 
7162 pipe_class fp_load_constant_s(vRegF dst)
7163 %{
7164   single_instruction;
7165   dst    : S4(write);
7166   INS01  : ISS;
7167   NEON_FP : S4;
7168 %}
7169 
7170 pipe_class fp_load_constant_d(vRegD dst)
7171 %{
7172   single_instruction;
7173   dst    : S4(write);
7174   INS01  : ISS;
7175   NEON_FP : S4;
7176 %}
7177 
7178 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7179 %{
7180   single_instruction;
7181   dst    : S5(write);
7182   src1   : S1(read);
7183   src2   : S1(read);
7184   INS01  : ISS;
7185   NEON_FP : S5;
7186 %}
7187 
7188 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7189 %{
7190   single_instruction;
7191   dst    : S5(write);
7192   src1   : S1(read);
7193   src2   : S1(read);
7194   INS0   : ISS;
7195   NEON_FP : S5;
7196 %}
7197 
7198 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7199 %{
7200   single_instruction;
7201   dst    : S5(write);
7202   src1   : S1(read);
7203   src2   : S1(read);
7204   dst    : S1(read);
7205   INS01  : ISS;
7206   NEON_FP : S5;
7207 %}
7208 
7209 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7210 %{
7211   single_instruction;
7212   dst    : S5(write);
7213   src1   : S1(read);
7214   src2   : S1(read);
7215   dst    : S1(read);
7216   INS0   : ISS;
7217   NEON_FP : S5;
7218 %}
7219 
7220 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7221 %{
7222   single_instruction;
7223   dst    : S4(write);
7224   src1   : S2(read);
7225   src2   : S2(read);
7226   INS01  : ISS;
7227   NEON_FP : S4;
7228 %}
7229 
7230 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7231 %{
7232   single_instruction;
7233   dst    : S4(write);
7234   src1   : S2(read);
7235   src2   : S2(read);
7236   INS0   : ISS;
7237   NEON_FP : S4;
7238 %}
7239 
7240 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7241 %{
7242   single_instruction;
7243   dst    : S3(write);
7244   src1   : S2(read);
7245   src2   : S2(read);
7246   INS01  : ISS;
7247   NEON_FP : S3;
7248 %}
7249 
7250 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7251 %{
7252   single_instruction;
7253   dst    : S3(write);
7254   src1   : S2(read);
7255   src2   : S2(read);
7256   INS0   : ISS;
7257   NEON_FP : S3;
7258 %}
7259 
7260 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7261 %{
7262   single_instruction;
7263   dst    : S3(write);
7264   src    : S1(read);
7265   shift  : S1(read);
7266   INS01  : ISS;
7267   NEON_FP : S3;
7268 %}
7269 
7270 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7271 %{
7272   single_instruction;
7273   dst    : S3(write);
7274   src    : S1(read);
7275   shift  : S1(read);
7276   INS0   : ISS;
7277   NEON_FP : S3;
7278 %}
7279 
7280 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7281 %{
7282   single_instruction;
7283   dst    : S3(write);
7284   src    : S1(read);
7285   INS01  : ISS;
7286   NEON_FP : S3;
7287 %}
7288 
7289 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7290 %{
7291   single_instruction;
7292   dst    : S3(write);
7293   src    : S1(read);
7294   INS0   : ISS;
7295   NEON_FP : S3;
7296 %}
7297 
7298 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7299 %{
7300   single_instruction;
7301   dst    : S5(write);
7302   src1   : S1(read);
7303   src2   : S1(read);
7304   INS01  : ISS;
7305   NEON_FP : S5;
7306 %}
7307 
7308 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7309 %{
7310   single_instruction;
7311   dst    : S5(write);
7312   src1   : S1(read);
7313   src2   : S1(read);
7314   INS0   : ISS;
7315   NEON_FP : S5;
7316 %}
7317 
7318 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7319 %{
7320   single_instruction;
7321   dst    : S5(write);
7322   src1   : S1(read);
7323   src2   : S1(read);
7324   INS0   : ISS;
7325   NEON_FP : S5;
7326 %}
7327 
7328 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7329 %{
7330   single_instruction;
7331   dst    : S5(write);
7332   src1   : S1(read);
7333   src2   : S1(read);
7334   INS0   : ISS;
7335   NEON_FP : S5;
7336 %}
7337 
7338 pipe_class vsqrt_fp128(vecX dst, vecX src)
7339 %{
7340   single_instruction;
7341   dst    : S5(write);
7342   src    : S1(read);
7343   INS0   : ISS;
7344   NEON_FP : S5;
7345 %}
7346 
7347 pipe_class vunop_fp64(vecD dst, vecD src)
7348 %{
7349   single_instruction;
7350   dst    : S5(write);
7351   src    : S1(read);
7352   INS01  : ISS;
7353   NEON_FP : S5;
7354 %}
7355 
7356 pipe_class vunop_fp128(vecX dst, vecX src)
7357 %{
7358   single_instruction;
7359   dst    : S5(write);
7360   src    : S1(read);
7361   INS0   : ISS;
7362   NEON_FP : S5;
7363 %}
7364 
7365 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7366 %{
7367   single_instruction;
7368   dst    : S3(write);
7369   src    : S1(read);
7370   INS01  : ISS;
7371   NEON_FP : S3;
7372 %}
7373 
7374 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7375 %{
7376   single_instruction;
7377   dst    : S3(write);
7378   src    : S1(read);
7379   INS01  : ISS;
7380   NEON_FP : S3;
7381 %}
7382 
7383 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7384 %{
7385   single_instruction;
7386   dst    : S3(write);
7387   src    : S1(read);
7388   INS01  : ISS;
7389   NEON_FP : S3;
7390 %}
7391 
7392 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7393 %{
7394   single_instruction;
7395   dst    : S3(write);
7396   src    : S1(read);
7397   INS01  : ISS;
7398   NEON_FP : S3;
7399 %}
7400 
7401 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7402 %{
7403   single_instruction;
7404   dst    : S3(write);
7405   src    : S1(read);
7406   INS01  : ISS;
7407   NEON_FP : S3;
7408 %}
7409 
7410 pipe_class vmovi_reg_imm64(vecD dst)
7411 %{
7412   single_instruction;
7413   dst    : S3(write);
7414   INS01  : ISS;
7415   NEON_FP : S3;
7416 %}
7417 
7418 pipe_class vmovi_reg_imm128(vecX dst)
7419 %{
7420   single_instruction;
7421   dst    : S3(write);
7422   INS0   : ISS;
7423   NEON_FP : S3;
7424 %}
7425 
7426 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7427 %{
7428   single_instruction;
7429   dst    : S5(write);
7430   mem    : ISS(read);
7431   INS01  : ISS;
7432   NEON_FP : S3;
7433 %}
7434 
7435 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7436 %{
7437   single_instruction;
7438   dst    : S5(write);
7439   mem    : ISS(read);
7440   INS01  : ISS;
7441   NEON_FP : S3;
7442 %}
7443 
7444 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7445 %{
7446   single_instruction;
7447   mem    : ISS(read);
7448   src    : S2(read);
7449   INS01  : ISS;
7450   NEON_FP : S3;
7451 %}
7452 
7453 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7454 %{
7455   single_instruction;
7456   mem    : ISS(read);
7457   src    : S2(read);
7458   INS01  : ISS;
7459   NEON_FP : S3;
7460 %}
7461 
7462 //------- Integer ALU operations --------------------------
7463 
7464 // Integer ALU reg-reg operation
7465 // Operands needed in EX1, result generated in EX2
7466 // Eg.  ADD     x0, x1, x2
7467 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7468 %{
7469   single_instruction;
7470   dst    : EX2(write);
7471   src1   : EX1(read);
7472   src2   : EX1(read);
7473   INS01  : ISS; // Dual issue as instruction 0 or 1
7474   ALU    : EX2;
7475 %}
7476 
7477 // Integer ALU reg-reg operation with constant shift
7478 // Shifted register must be available in LATE_ISS instead of EX1
7479 // Eg.  ADD     x0, x1, x2, LSL #2
7480 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7481 %{
7482   single_instruction;
7483   dst    : EX2(write);
7484   src1   : EX1(read);
7485   src2   : ISS(read);
7486   INS01  : ISS;
7487   ALU    : EX2;
7488 %}
7489 
7490 // Integer ALU reg operation with constant shift
7491 // Eg.  LSL     x0, x1, #shift
7492 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7493 %{
7494   single_instruction;
7495   dst    : EX2(write);
7496   src1   : ISS(read);
7497   INS01  : ISS;
7498   ALU    : EX2;
7499 %}
7500 
7501 // Integer ALU reg-reg operation with variable shift
7502 // Both operands must be available in LATE_ISS instead of EX1
7503 // Result is available in EX1 instead of EX2
7504 // Eg.  LSLV    x0, x1, x2
7505 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7506 %{
7507   single_instruction;
7508   dst    : EX1(write);
7509   src1   : ISS(read);
7510   src2   : ISS(read);
7511   INS01  : ISS;
7512   ALU    : EX1;
7513 %}
7514 
7515 // Integer ALU reg-reg operation with extract
7516 // As for _vshift above, but result generated in EX2
7517 // Eg.  EXTR    x0, x1, x2, #N
7518 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7519 %{
7520   single_instruction;
7521   dst    : EX2(write);
7522   src1   : ISS(read);
7523   src2   : ISS(read);
7524   INS1   : ISS; // Can only dual issue as Instruction 1
7525   ALU    : EX1;
7526 %}
7527 
7528 // Integer ALU reg operation
7529 // Eg.  NEG     x0, x1
7530 pipe_class ialu_reg(iRegI dst, iRegI src)
7531 %{
7532   single_instruction;
7533   dst    : EX2(write);
7534   src    : EX1(read);
7535   INS01  : ISS;
7536   ALU    : EX2;
7537 %}
7538 
7539 // Integer ALU reg mmediate operation
7540 // Eg.  ADD     x0, x1, #N
7541 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7542 %{
7543   single_instruction;
7544   dst    : EX2(write);
7545   src1   : EX1(read);
7546   INS01  : ISS;
7547   ALU    : EX2;
7548 %}
7549 
7550 // Integer ALU immediate operation (no source operands)
7551 // Eg.  MOV     x0, #N
7552 pipe_class ialu_imm(iRegI dst)
7553 %{
7554   single_instruction;
7555   dst    : EX1(write);
7556   INS01  : ISS;
7557   ALU    : EX1;
7558 %}
7559 
7560 //------- Compare operation -------------------------------
7561 
7562 // Compare reg-reg
7563 // Eg.  CMP     x0, x1
7564 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7565 %{
7566   single_instruction;
7567 //  fixed_latency(16);
7568   cr     : EX2(write);
7569   op1    : EX1(read);
7570   op2    : EX1(read);
7571   INS01  : ISS;
7572   ALU    : EX2;
7573 %}
7574 
7575 // Compare reg-reg
7576 // Eg.  CMP     x0, #N
7577 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7578 %{
7579   single_instruction;
7580 //  fixed_latency(16);
7581   cr     : EX2(write);
7582   op1    : EX1(read);
7583   INS01  : ISS;
7584   ALU    : EX2;
7585 %}
7586 
7587 //------- Conditional instructions ------------------------
7588 
7589 // Conditional no operands
7590 // Eg.  CSINC   x0, zr, zr, <cond>
7591 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7592 %{
7593   single_instruction;
7594   cr     : EX1(read);
7595   dst    : EX2(write);
7596   INS01  : ISS;
7597   ALU    : EX2;
7598 %}
7599 
7600 // Conditional 2 operand
7601 // EG.  CSEL    X0, X1, X2, <cond>
7602 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7603 %{
7604   single_instruction;
7605   cr     : EX1(read);
7606   src1   : EX1(read);
7607   src2   : EX1(read);
7608   dst    : EX2(write);
7609   INS01  : ISS;
7610   ALU    : EX2;
7611 %}
7612 
7613 // Conditional 2 operand
7614 // EG.  CSEL    X0, X1, X2, <cond>
7615 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7616 %{
7617   single_instruction;
7618   cr     : EX1(read);
7619   src    : EX1(read);
7620   dst    : EX2(write);
7621   INS01  : ISS;
7622   ALU    : EX2;
7623 %}
7624 
7625 //------- Multiply pipeline operations --------------------
7626 
7627 // Multiply reg-reg
7628 // Eg.  MUL     w0, w1, w2
7629 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7630 %{
7631   single_instruction;
7632   dst    : WR(write);
7633   src1   : ISS(read);
7634   src2   : ISS(read);
7635   INS01  : ISS;
7636   MAC    : WR;
7637 %}
7638 
7639 // Multiply accumulate
7640 // Eg.  MADD    w0, w1, w2, w3
7641 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7642 %{
7643   single_instruction;
7644   dst    : WR(write);
7645   src1   : ISS(read);
7646   src2   : ISS(read);
7647   src3   : ISS(read);
7648   INS01  : ISS;
7649   MAC    : WR;
7650 %}
7651 
7652 // Eg.  MUL     w0, w1, w2
7653 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7654 %{
7655   single_instruction;
7656   fixed_latency(3); // Maximum latency for 64 bit mul
7657   dst    : WR(write);
7658   src1   : ISS(read);
7659   src2   : ISS(read);
7660   INS01  : ISS;
7661   MAC    : WR;
7662 %}
7663 
7664 // Multiply accumulate
7665 // Eg.  MADD    w0, w1, w2, w3
7666 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7667 %{
7668   single_instruction;
7669   fixed_latency(3); // Maximum latency for 64 bit mul
7670   dst    : WR(write);
7671   src1   : ISS(read);
7672   src2   : ISS(read);
7673   src3   : ISS(read);
7674   INS01  : ISS;
7675   MAC    : WR;
7676 %}
7677 
7678 //------- Divide pipeline operations --------------------
7679 
7680 // Eg.  SDIV    w0, w1, w2
7681 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7682 %{
7683   single_instruction;
7684   fixed_latency(8); // Maximum latency for 32 bit divide
7685   dst    : WR(write);
7686   src1   : ISS(read);
7687   src2   : ISS(read);
7688   INS0   : ISS; // Can only dual issue as instruction 0
7689   DIV    : WR;
7690 %}
7691 
7692 // Eg.  SDIV    x0, x1, x2
7693 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7694 %{
7695   single_instruction;
7696   fixed_latency(16); // Maximum latency for 64 bit divide
7697   dst    : WR(write);
7698   src1   : ISS(read);
7699   src2   : ISS(read);
7700   INS0   : ISS; // Can only dual issue as instruction 0
7701   DIV    : WR;
7702 %}
7703 
7704 //------- Load pipeline operations ------------------------
7705 
7706 // Load - prefetch
7707 // Eg.  PFRM    <mem>
7708 pipe_class iload_prefetch(memory mem)
7709 %{
7710   single_instruction;
7711   mem    : ISS(read);
7712   INS01  : ISS;
7713   LDST   : WR;
7714 %}
7715 
7716 // Load - reg, mem
7717 // Eg.  LDR     x0, <mem>
7718 pipe_class iload_reg_mem(iRegI dst, memory mem)
7719 %{
7720   single_instruction;
7721   dst    : WR(write);
7722   mem    : ISS(read);
7723   INS01  : ISS;
7724   LDST   : WR;
7725 %}
7726 
7727 // Load - reg, reg
7728 // Eg.  LDR     x0, [sp, x1]
7729 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7730 %{
7731   single_instruction;
7732   dst    : WR(write);
7733   src    : ISS(read);
7734   INS01  : ISS;
7735   LDST   : WR;
7736 %}
7737 
7738 //------- Store pipeline operations -----------------------
7739 
7740 // Store - zr, mem
7741 // Eg.  STR     zr, <mem>
7742 pipe_class istore_mem(memory mem)
7743 %{
7744   single_instruction;
7745   mem    : ISS(read);
7746   INS01  : ISS;
7747   LDST   : WR;
7748 %}
7749 
7750 // Store - reg, mem
7751 // Eg.  STR     x0, <mem>
7752 pipe_class istore_reg_mem(iRegI src, memory mem)
7753 %{
7754   single_instruction;
7755   mem    : ISS(read);
7756   src    : EX2(read);
7757   INS01  : ISS;
7758   LDST   : WR;
7759 %}
7760 
7761 // Store - reg, reg
7762 // Eg. STR      x0, [sp, x1]
7763 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7764 %{
7765   single_instruction;
7766   dst    : ISS(read);
7767   src    : EX2(read);
7768   INS01  : ISS;
7769   LDST   : WR;
7770 %}
7771 
7772 //------- Store pipeline operations -----------------------
7773 
7774 // Branch
7775 pipe_class pipe_branch()
7776 %{
7777   single_instruction;
7778   INS01  : ISS;
7779   BRANCH : EX1;
7780 %}
7781 
7782 // Conditional branch
7783 pipe_class pipe_branch_cond(rFlagsReg cr)
7784 %{
7785   single_instruction;
7786   cr     : EX1(read);
7787   INS01  : ISS;
7788   BRANCH : EX1;
7789 %}
7790 
7791 // Compare & Branch
7792 // EG.  CBZ/CBNZ
7793 pipe_class pipe_cmp_branch(iRegI op1)
7794 %{
7795   single_instruction;
7796   op1    : EX1(read);
7797   INS01  : ISS;
7798   BRANCH : EX1;
7799 %}
7800 
7801 //------- Synchronisation operations ----------------------
7802 
7803 // Any operation requiring serialization.
7804 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7805 pipe_class pipe_serial()
7806 %{
7807   single_instruction;
7808   force_serialization;
7809   fixed_latency(16);
7810   INS01  : ISS(2); // Cannot dual issue with any other instruction
7811   LDST   : WR;
7812 %}
7813 
7814 // Generic big/slow expanded idiom - also serialized
7815 pipe_class pipe_slow()
7816 %{
7817   instruction_count(10);
7818   multiple_bundles;
7819   force_serialization;
7820   fixed_latency(16);
7821   INS01  : ISS(2); // Cannot dual issue with any other instruction
7822   LDST   : WR;
7823 %}
7824 
7825 // Empty pipeline class
7826 pipe_class pipe_class_empty()
7827 %{
7828   single_instruction;
7829   fixed_latency(0);
7830 %}
7831 
7832 // Default pipeline class.
7833 pipe_class pipe_class_default()
7834 %{
7835   single_instruction;
7836   fixed_latency(2);
7837 %}
7838 
7839 // Pipeline class for compares.
7840 pipe_class pipe_class_compare()
7841 %{
7842   single_instruction;
7843   fixed_latency(16);
7844 %}
7845 
7846 // Pipeline class for memory operations.
7847 pipe_class pipe_class_memory()
7848 %{
7849   single_instruction;
7850   fixed_latency(16);
7851 %}
7852 
7853 // Pipeline class for call.
7854 pipe_class pipe_class_call()
7855 %{
7856   single_instruction;
7857   fixed_latency(100);
7858 %}
7859 
7860 // Define the class for the Nop node.
7861 define %{
7862    MachNop = pipe_class_empty;
7863 %}
7864 
7865 %}
7866 //----------INSTRUCTIONS-------------------------------------------------------
7867 //
7868 // match      -- States which machine-independent subtree may be replaced
7869 //               by this instruction.
7870 // ins_cost   -- The estimated cost of this instruction is used by instruction
7871 //               selection to identify a minimum cost tree of machine
7872 //               instructions that matches a tree of machine-independent
7873 //               instructions.
7874 // format     -- A string providing the disassembly for this instruction.
7875 //               The value of an instruction's operand may be inserted
7876 //               by referring to it with a '$' prefix.
7877 // opcode     -- Three instruction opcodes may be provided.  These are referred
7878 //               to within an encode class as $primary, $secondary, and $tertiary
7879 //               rrspectively.  The primary opcode is commonly used to
7880 //               indicate the type of machine instruction, while secondary
7881 //               and tertiary are often used for prefix options or addressing
7882 //               modes.
7883 // ins_encode -- A list of encode classes with parameters. The encode class
7884 //               name must have been defined in an 'enc_class' specification
7885 //               in the encode section of the architecture description.
7886 
7887 // ============================================================================
7888 // Memory (Load/Store) Instructions
7889 
7890 // Load Instructions
7891 
7892 // Load Byte (8 bit signed)
7893 instruct loadB(iRegINoSp dst, memory mem)
7894 %{
7895   match(Set dst (LoadB mem));
7896   predicate(!needs_acquiring_load(n));
7897 
7898   ins_cost(4 * INSN_COST);
7899   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7900 
7901   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7902 
7903   ins_pipe(iload_reg_mem);
7904 %}
7905 
7906 // Load Byte (8 bit signed) into long
7907 instruct loadB2L(iRegLNoSp dst, memory mem)
7908 %{
7909   match(Set dst (ConvI2L (LoadB mem)));
7910   predicate(!needs_acquiring_load(n->in(1)));
7911 
7912   ins_cost(4 * INSN_COST);
7913   format %{ "ldrsb  $dst, $mem\t# byte" %}
7914 
7915   ins_encode(aarch64_enc_ldrsb(dst, mem));
7916 
7917   ins_pipe(iload_reg_mem);
7918 %}
7919 
7920 // Load Byte (8 bit unsigned)
7921 instruct loadUB(iRegINoSp dst, memory mem)
7922 %{
7923   match(Set dst (LoadUB mem));
7924   predicate(!needs_acquiring_load(n));
7925 
7926   ins_cost(4 * INSN_COST);
7927   format %{ "ldrbw  $dst, $mem\t# byte" %}
7928 
7929   ins_encode(aarch64_enc_ldrb(dst, mem));
7930 
7931   ins_pipe(iload_reg_mem);
7932 %}
7933 
7934 // Load Byte (8 bit unsigned) into long
7935 instruct loadUB2L(iRegLNoSp dst, memory mem)
7936 %{
7937   match(Set dst (ConvI2L (LoadUB mem)));
7938   predicate(!needs_acquiring_load(n->in(1)));
7939 
7940   ins_cost(4 * INSN_COST);
7941   format %{ "ldrb  $dst, $mem\t# byte" %}
7942 
7943   ins_encode(aarch64_enc_ldrb(dst, mem));
7944 
7945   ins_pipe(iload_reg_mem);
7946 %}
7947 
7948 // Load Short (16 bit signed)
7949 instruct loadS(iRegINoSp dst, memory mem)
7950 %{
7951   match(Set dst (LoadS mem));
7952   predicate(!needs_acquiring_load(n));
7953 
7954   ins_cost(4 * INSN_COST);
7955   format %{ "ldrshw  $dst, $mem\t# short" %}
7956 
7957   ins_encode(aarch64_enc_ldrshw(dst, mem));
7958 
7959   ins_pipe(iload_reg_mem);
7960 %}
7961 
7962 // Load Short (16 bit signed) into long
7963 instruct loadS2L(iRegLNoSp dst, memory mem)
7964 %{
7965   match(Set dst (ConvI2L (LoadS mem)));
7966   predicate(!needs_acquiring_load(n->in(1)));
7967 
7968   ins_cost(4 * INSN_COST);
7969   format %{ "ldrsh  $dst, $mem\t# short" %}
7970 
7971   ins_encode(aarch64_enc_ldrsh(dst, mem));
7972 
7973   ins_pipe(iload_reg_mem);
7974 %}
7975 
7976 // Load Char (16 bit unsigned)
7977 instruct loadUS(iRegINoSp dst, memory mem)
7978 %{
7979   match(Set dst (LoadUS mem));
7980   predicate(!needs_acquiring_load(n));
7981 
7982   ins_cost(4 * INSN_COST);
7983   format %{ "ldrh  $dst, $mem\t# short" %}
7984 
7985   ins_encode(aarch64_enc_ldrh(dst, mem));
7986 
7987   ins_pipe(iload_reg_mem);
7988 %}
7989 
7990 // Load Short/Char (16 bit unsigned) into long
7991 instruct loadUS2L(iRegLNoSp dst, memory mem)
7992 %{
7993   match(Set dst (ConvI2L (LoadUS mem)));
7994   predicate(!needs_acquiring_load(n->in(1)));
7995 
7996   ins_cost(4 * INSN_COST);
7997   format %{ "ldrh  $dst, $mem\t# short" %}
7998 
7999   ins_encode(aarch64_enc_ldrh(dst, mem));
8000 
8001   ins_pipe(iload_reg_mem);
8002 %}
8003 
8004 // Load Integer (32 bit signed)
8005 instruct loadI(iRegINoSp dst, memory mem)
8006 %{
8007   match(Set dst (LoadI mem));
8008   predicate(!needs_acquiring_load(n));
8009 
8010   ins_cost(4 * INSN_COST);
8011   format %{ "ldrw  $dst, $mem\t# int" %}
8012 
8013   ins_encode(aarch64_enc_ldrw(dst, mem));
8014 
8015   ins_pipe(iload_reg_mem);
8016 %}
8017 
8018 // Load Integer (32 bit signed) into long
8019 instruct loadI2L(iRegLNoSp dst, memory mem)
8020 %{
8021   match(Set dst (ConvI2L (LoadI mem)));
8022   predicate(!needs_acquiring_load(n->in(1)));
8023 
8024   ins_cost(4 * INSN_COST);
8025   format %{ "ldrsw  $dst, $mem\t# int" %}
8026 
8027   ins_encode(aarch64_enc_ldrsw(dst, mem));
8028 
8029   ins_pipe(iload_reg_mem);
8030 %}
8031 
8032 // Load Integer (32 bit unsigned) into long
8033 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
8034 %{
8035   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8036   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
8037 
8038   ins_cost(4 * INSN_COST);
8039   format %{ "ldrw  $dst, $mem\t# int" %}
8040 
8041   ins_encode(aarch64_enc_ldrw(dst, mem));
8042 
8043   ins_pipe(iload_reg_mem);
8044 %}
8045 
8046 // Load Long (64 bit signed)
8047 instruct loadL(iRegLNoSp dst, memory mem)
8048 %{
8049   match(Set dst (LoadL mem));
8050   predicate(!needs_acquiring_load(n));
8051 
8052   ins_cost(4 * INSN_COST);
8053   format %{ "ldr  $dst, $mem\t# int" %}
8054 
8055   ins_encode(aarch64_enc_ldr(dst, mem));
8056 
8057   ins_pipe(iload_reg_mem);
8058 %}
8059 
8060 // Load Range
8061 instruct loadRange(iRegINoSp dst, memory mem)
8062 %{
8063   match(Set dst (LoadRange mem));
8064 
8065   ins_cost(4 * INSN_COST);
8066   format %{ "ldrw  $dst, $mem\t# range" %}
8067 
8068   ins_encode(aarch64_enc_ldrw(dst, mem));
8069 
8070   ins_pipe(iload_reg_mem);
8071 %}
8072 
8073 // Load Pointer
8074 instruct loadP(iRegPNoSp dst, memory mem)
8075 %{
8076   match(Set dst (LoadP mem));
8077   predicate(!needs_acquiring_load(n));
8078 
8079   ins_cost(4 * INSN_COST);
8080   format %{ "ldr  $dst, $mem\t# ptr" %}
8081 
8082   ins_encode(aarch64_enc_ldr(dst, mem));
8083 
8084   ins_pipe(iload_reg_mem);
8085 %}
8086 
8087 // Load Compressed Pointer
8088 instruct loadN(iRegNNoSp dst, memory mem)
8089 %{
8090   match(Set dst (LoadN mem));
8091   predicate(!needs_acquiring_load(n));
8092 
8093   ins_cost(4 * INSN_COST);
8094   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8095 
8096   ins_encode(aarch64_enc_ldrw(dst, mem));
8097 
8098   ins_pipe(iload_reg_mem);
8099 %}
8100 
8101 // Load Klass Pointer
8102 instruct loadKlass(iRegPNoSp dst, memory mem)
8103 %{
8104   match(Set dst (LoadKlass mem));
8105   predicate(!needs_acquiring_load(n));
8106 
8107   ins_cost(4 * INSN_COST);
8108   format %{ "ldr  $dst, $mem\t# class" %}
8109 
8110   ins_encode(aarch64_enc_ldr(dst, mem));
8111 
8112   ins_pipe(iload_reg_mem);
8113 %}
8114 
8115 // Load Narrow Klass Pointer
8116 instruct loadNKlass(iRegNNoSp dst, memory mem)
8117 %{
8118   match(Set dst (LoadNKlass mem));
8119   predicate(!needs_acquiring_load(n));
8120 
8121   ins_cost(4 * INSN_COST);
8122   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8123 
8124   ins_encode(aarch64_enc_ldrw(dst, mem));
8125 
8126   ins_pipe(iload_reg_mem);
8127 %}
8128 
8129 // Load Float
8130 instruct loadF(vRegF dst, memory mem)
8131 %{
8132   match(Set dst (LoadF mem));
8133   predicate(!needs_acquiring_load(n));
8134 
8135   ins_cost(4 * INSN_COST);
8136   format %{ "ldrs  $dst, $mem\t# float" %}
8137 
8138   ins_encode( aarch64_enc_ldrs(dst, mem) );
8139 
8140   ins_pipe(pipe_class_memory);
8141 %}
8142 
8143 // Load Double
8144 instruct loadD(vRegD dst, memory mem)
8145 %{
8146   match(Set dst (LoadD mem));
8147   predicate(!needs_acquiring_load(n));
8148 
8149   ins_cost(4 * INSN_COST);
8150   format %{ "ldrd  $dst, $mem\t# double" %}
8151 
8152   ins_encode( aarch64_enc_ldrd(dst, mem) );
8153 
8154   ins_pipe(pipe_class_memory);
8155 %}
8156 
8157 
8158 // Load Int Constant
8159 instruct loadConI(iRegINoSp dst, immI src)
8160 %{
8161   match(Set dst src);
8162 
8163   ins_cost(INSN_COST);
8164   format %{ "mov $dst, $src\t# int" %}
8165 
8166   ins_encode( aarch64_enc_movw_imm(dst, src) );
8167 
8168   ins_pipe(ialu_imm);
8169 %}
8170 
8171 // Load Long Constant
8172 instruct loadConL(iRegLNoSp dst, immL src)
8173 %{
8174   match(Set dst src);
8175 
8176   ins_cost(INSN_COST);
8177   format %{ "mov $dst, $src\t# long" %}
8178 
8179   ins_encode( aarch64_enc_mov_imm(dst, src) );
8180 
8181   ins_pipe(ialu_imm);
8182 %}
8183 
8184 // Load Pointer Constant
8185 
8186 instruct loadConP(iRegPNoSp dst, immP con)
8187 %{
8188   match(Set dst con);
8189 
8190   ins_cost(INSN_COST * 4);
8191   format %{
8192     "mov  $dst, $con\t# ptr\n\t"
8193   %}
8194 
8195   ins_encode(aarch64_enc_mov_p(dst, con));
8196 
8197   ins_pipe(ialu_imm);
8198 %}
8199 
8200 // Load Null Pointer Constant
8201 
8202 instruct loadConP0(iRegPNoSp dst, immP0 con)
8203 %{
8204   match(Set dst con);
8205 
8206   ins_cost(INSN_COST);
8207   format %{ "mov  $dst, $con\t# NULL ptr" %}
8208 
8209   ins_encode(aarch64_enc_mov_p0(dst, con));
8210 
8211   ins_pipe(ialu_imm);
8212 %}
8213 
8214 // Load Pointer Constant One
8215 
8216 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8217 %{
8218   match(Set dst con);
8219 
8220   ins_cost(INSN_COST);
8221   format %{ "mov  $dst, $con\t# NULL ptr" %}
8222 
8223   ins_encode(aarch64_enc_mov_p1(dst, con));
8224 
8225   ins_pipe(ialu_imm);
8226 %}
8227 
8228 // Load Poll Page Constant
8229 
8230 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8231 %{
8232   match(Set dst con);
8233 
8234   ins_cost(INSN_COST);
8235   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8236 
8237   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8238 
8239   ins_pipe(ialu_imm);
8240 %}
8241 
8242 // Load Byte Map Base Constant
8243 
8244 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8245 %{
8246   match(Set dst con);
8247 
8248   ins_cost(INSN_COST);
8249   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8250 
8251   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8252 
8253   ins_pipe(ialu_imm);
8254 %}
8255 
8256 // Load Narrow Pointer Constant
8257 
8258 instruct loadConN(iRegNNoSp dst, immN con)
8259 %{
8260   match(Set dst con);
8261 
8262   ins_cost(INSN_COST * 4);
8263   format %{ "mov  $dst, $con\t# compressed ptr" %}
8264 
8265   ins_encode(aarch64_enc_mov_n(dst, con));
8266 
8267   ins_pipe(ialu_imm);
8268 %}
8269 
8270 // Load Narrow Null Pointer Constant
8271 
8272 instruct loadConN0(iRegNNoSp dst, immN0 con)
8273 %{
8274   match(Set dst con);
8275 
8276   ins_cost(INSN_COST);
8277   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8278 
8279   ins_encode(aarch64_enc_mov_n0(dst, con));
8280 
8281   ins_pipe(ialu_imm);
8282 %}
8283 
8284 // Load Narrow Klass Constant
8285 
8286 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8287 %{
8288   match(Set dst con);
8289 
8290   ins_cost(INSN_COST);
8291   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8292 
8293   ins_encode(aarch64_enc_mov_nk(dst, con));
8294 
8295   ins_pipe(ialu_imm);
8296 %}
8297 
8298 // Load Packed Float Constant
8299 
8300 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8301   match(Set dst con);
8302   ins_cost(INSN_COST * 4);
8303   format %{ "fmovs  $dst, $con"%}
8304   ins_encode %{
8305     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8306   %}
8307 
8308   ins_pipe(fp_imm_s);
8309 %}
8310 
8311 // Load Float Constant
8312 
8313 instruct loadConF(vRegF dst, immF con) %{
8314   match(Set dst con);
8315 
8316   ins_cost(INSN_COST * 4);
8317 
8318   format %{
8319     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8320   %}
8321 
8322   ins_encode %{
8323     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8324   %}
8325 
8326   ins_pipe(fp_load_constant_s);
8327 %}
8328 
8329 // Load Packed Double Constant
8330 
8331 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8332   match(Set dst con);
8333   ins_cost(INSN_COST);
8334   format %{ "fmovd  $dst, $con"%}
8335   ins_encode %{
8336     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8337   %}
8338 
8339   ins_pipe(fp_imm_d);
8340 %}
8341 
8342 // Load Double Constant
8343 
8344 instruct loadConD(vRegD dst, immD con) %{
8345   match(Set dst con);
8346 
8347   ins_cost(INSN_COST * 5);
8348   format %{
8349     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8350   %}
8351 
8352   ins_encode %{
8353     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8354   %}
8355 
8356   ins_pipe(fp_load_constant_d);
8357 %}
8358 
8359 // Store Instructions
8360 
8361 // Store CMS card-mark Immediate
8362 instruct storeimmCM0(immI0 zero, memory mem)
8363 %{
8364   match(Set mem (StoreCM mem zero));
8365   predicate(unnecessary_storestore(n));
8366 
8367   ins_cost(INSN_COST);
8368   format %{ "storestore (elided)\n\t"
8369             "strb zr, $mem\t# byte" %}
8370 
8371   ins_encode(aarch64_enc_strb0(mem));
8372 
8373   ins_pipe(istore_mem);
8374 %}
8375 
8376 // Store CMS card-mark Immediate with intervening StoreStore
8377 // needed when using CMS with no conditional card marking
8378 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8379 %{
8380   match(Set mem (StoreCM mem zero));
8381 
8382   ins_cost(INSN_COST * 2);
8383   format %{ "storestore\n\t"
8384             "dmb ishst"
8385             "\n\tstrb zr, $mem\t# byte" %}
8386 
8387   ins_encode(aarch64_enc_strb0_ordered(mem));
8388 
8389   ins_pipe(istore_mem);
8390 %}
8391 
8392 // Store Byte
8393 instruct storeB(iRegIorL2I src, memory mem)
8394 %{
8395   match(Set mem (StoreB mem src));
8396   predicate(!needs_releasing_store(n));
8397 
8398   ins_cost(INSN_COST);
8399   format %{ "strb  $src, $mem\t# byte" %}
8400 
8401   ins_encode(aarch64_enc_strb(src, mem));
8402 
8403   ins_pipe(istore_reg_mem);
8404 %}
8405 
8406 
8407 instruct storeimmB0(immI0 zero, memory mem)
8408 %{
8409   match(Set mem (StoreB mem zero));
8410   predicate(!needs_releasing_store(n));
8411 
8412   ins_cost(INSN_COST);
8413   format %{ "strb rscractch2, $mem\t# byte" %}
8414 
8415   ins_encode(aarch64_enc_strb0(mem));
8416 
8417   ins_pipe(istore_mem);
8418 %}
8419 
8420 // Store Char/Short
8421 instruct storeC(iRegIorL2I src, memory mem)
8422 %{
8423   match(Set mem (StoreC mem src));
8424   predicate(!needs_releasing_store(n));
8425 
8426   ins_cost(INSN_COST);
8427   format %{ "strh  $src, $mem\t# short" %}
8428 
8429   ins_encode(aarch64_enc_strh(src, mem));
8430 
8431   ins_pipe(istore_reg_mem);
8432 %}
8433 
8434 instruct storeimmC0(immI0 zero, memory mem)
8435 %{
8436   match(Set mem (StoreC mem zero));
8437   predicate(!needs_releasing_store(n));
8438 
8439   ins_cost(INSN_COST);
8440   format %{ "strh  zr, $mem\t# short" %}
8441 
8442   ins_encode(aarch64_enc_strh0(mem));
8443 
8444   ins_pipe(istore_mem);
8445 %}
8446 
8447 // Store Integer
8448 
8449 instruct storeI(iRegIorL2I src, memory mem)
8450 %{
8451   match(Set mem(StoreI mem src));
8452   predicate(!needs_releasing_store(n));
8453 
8454   ins_cost(INSN_COST);
8455   format %{ "strw  $src, $mem\t# int" %}
8456 
8457   ins_encode(aarch64_enc_strw(src, mem));
8458 
8459   ins_pipe(istore_reg_mem);
8460 %}
8461 
8462 instruct storeimmI0(immI0 zero, memory mem)
8463 %{
8464   match(Set mem(StoreI mem zero));
8465   predicate(!needs_releasing_store(n));
8466 
8467   ins_cost(INSN_COST);
8468   format %{ "strw  zr, $mem\t# int" %}
8469 
8470   ins_encode(aarch64_enc_strw0(mem));
8471 
8472   ins_pipe(istore_mem);
8473 %}
8474 
8475 // Store Long (64 bit signed)
8476 instruct storeL(iRegL src, memory mem)
8477 %{
8478   match(Set mem (StoreL mem src));
8479   predicate(!needs_releasing_store(n));
8480 
8481   ins_cost(INSN_COST);
8482   format %{ "str  $src, $mem\t# int" %}
8483 
8484   ins_encode(aarch64_enc_str(src, mem));
8485 
8486   ins_pipe(istore_reg_mem);
8487 %}
8488 
8489 // Store Long (64 bit signed)
8490 instruct storeimmL0(immL0 zero, memory mem)
8491 %{
8492   match(Set mem (StoreL mem zero));
8493   predicate(!needs_releasing_store(n));
8494 
8495   ins_cost(INSN_COST);
8496   format %{ "str  zr, $mem\t# int" %}
8497 
8498   ins_encode(aarch64_enc_str0(mem));
8499 
8500   ins_pipe(istore_mem);
8501 %}
8502 
8503 // Store Pointer
8504 instruct storeP(iRegP src, memory mem)
8505 %{
8506   match(Set mem (StoreP mem src));
8507   predicate(!needs_releasing_store(n));
8508 
8509   ins_cost(INSN_COST);
8510   format %{ "str  $src, $mem\t# ptr" %}
8511 
8512   ins_encode(aarch64_enc_str(src, mem));
8513 
8514   ins_pipe(istore_reg_mem);
8515 %}
8516 
8517 // Store Pointer
8518 instruct storeimmP0(immP0 zero, memory mem)
8519 %{
8520   match(Set mem (StoreP mem zero));
8521   predicate(!needs_releasing_store(n));
8522 
8523   ins_cost(INSN_COST);
8524   format %{ "str zr, $mem\t# ptr" %}
8525 
8526   ins_encode(aarch64_enc_str0(mem));
8527 
8528   ins_pipe(istore_mem);
8529 %}
8530 
8531 // Store Compressed Pointer
8532 instruct storeN(iRegN src, memory mem)
8533 %{
8534   match(Set mem (StoreN mem src));
8535   predicate(!needs_releasing_store(n));
8536 
8537   ins_cost(INSN_COST);
8538   format %{ "strw  $src, $mem\t# compressed ptr" %}
8539 
8540   ins_encode(aarch64_enc_strw(src, mem));
8541 
8542   ins_pipe(istore_reg_mem);
8543 %}
8544 
8545 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8546 %{
8547   match(Set mem (StoreN mem zero));
8548   predicate(Universe::narrow_oop_base() == NULL &&
8549             Universe::narrow_klass_base() == NULL &&
8550             (!needs_releasing_store(n)));
8551 
8552   ins_cost(INSN_COST);
8553   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8554 
8555   ins_encode(aarch64_enc_strw(heapbase, mem));
8556 
8557   ins_pipe(istore_reg_mem);
8558 %}
8559 
8560 // Store Float
8561 instruct storeF(vRegF src, memory mem)
8562 %{
8563   match(Set mem (StoreF mem src));
8564   predicate(!needs_releasing_store(n));
8565 
8566   ins_cost(INSN_COST);
8567   format %{ "strs  $src, $mem\t# float" %}
8568 
8569   ins_encode( aarch64_enc_strs(src, mem) );
8570 
8571   ins_pipe(pipe_class_memory);
8572 %}
8573 
8574 // TODO
8575 // implement storeImmF0 and storeFImmPacked
8576 
8577 // Store Double
8578 instruct storeD(vRegD src, memory mem)
8579 %{
8580   match(Set mem (StoreD mem src));
8581   predicate(!needs_releasing_store(n));
8582 
8583   ins_cost(INSN_COST);
8584   format %{ "strd  $src, $mem\t# double" %}
8585 
8586   ins_encode( aarch64_enc_strd(src, mem) );
8587 
8588   ins_pipe(pipe_class_memory);
8589 %}
8590 
8591 // Store Compressed Klass Pointer
8592 instruct storeNKlass(iRegN src, memory mem)
8593 %{
8594   predicate(!needs_releasing_store(n));
8595   match(Set mem (StoreNKlass mem src));
8596 
8597   ins_cost(INSN_COST);
8598   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8599 
8600   ins_encode(aarch64_enc_strw(src, mem));
8601 
8602   ins_pipe(istore_reg_mem);
8603 %}
8604 
8605 // TODO
8606 // implement storeImmD0 and storeDImmPacked
8607 
8608 // prefetch instructions
8609 // Must be safe to execute with invalid address (cannot fault).
8610 
8611 instruct prefetchalloc( memory mem ) %{
8612   match(PrefetchAllocation mem);
8613 
8614   ins_cost(INSN_COST);
8615   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8616 
8617   ins_encode( aarch64_enc_prefetchw(mem) );
8618 
8619   ins_pipe(iload_prefetch);
8620 %}
8621 
8622 //  ---------------- volatile loads and stores ----------------
8623 
8624 // Load Byte (8 bit signed)
8625 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8626 %{
8627   match(Set dst (LoadB mem));
8628 
8629   ins_cost(VOLATILE_REF_COST);
8630   format %{ "ldarsb  $dst, $mem\t# byte" %}
8631 
8632   ins_encode(aarch64_enc_ldarsb(dst, mem));
8633 
8634   ins_pipe(pipe_serial);
8635 %}
8636 
8637 // Load Byte (8 bit signed) into long
8638 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8639 %{
8640   match(Set dst (ConvI2L (LoadB mem)));
8641 
8642   ins_cost(VOLATILE_REF_COST);
8643   format %{ "ldarsb  $dst, $mem\t# byte" %}
8644 
8645   ins_encode(aarch64_enc_ldarsb(dst, mem));
8646 
8647   ins_pipe(pipe_serial);
8648 %}
8649 
8650 // Load Byte (8 bit unsigned)
8651 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8652 %{
8653   match(Set dst (LoadUB mem));
8654 
8655   ins_cost(VOLATILE_REF_COST);
8656   format %{ "ldarb  $dst, $mem\t# byte" %}
8657 
8658   ins_encode(aarch64_enc_ldarb(dst, mem));
8659 
8660   ins_pipe(pipe_serial);
8661 %}
8662 
8663 // Load Byte (8 bit unsigned) into long
8664 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8665 %{
8666   match(Set dst (ConvI2L (LoadUB mem)));
8667 
8668   ins_cost(VOLATILE_REF_COST);
8669   format %{ "ldarb  $dst, $mem\t# byte" %}
8670 
8671   ins_encode(aarch64_enc_ldarb(dst, mem));
8672 
8673   ins_pipe(pipe_serial);
8674 %}
8675 
8676 // Load Short (16 bit signed)
8677 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8678 %{
8679   match(Set dst (LoadS mem));
8680 
8681   ins_cost(VOLATILE_REF_COST);
8682   format %{ "ldarshw  $dst, $mem\t# short" %}
8683 
8684   ins_encode(aarch64_enc_ldarshw(dst, mem));
8685 
8686   ins_pipe(pipe_serial);
8687 %}
8688 
8689 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8690 %{
8691   match(Set dst (LoadUS mem));
8692 
8693   ins_cost(VOLATILE_REF_COST);
8694   format %{ "ldarhw  $dst, $mem\t# short" %}
8695 
8696   ins_encode(aarch64_enc_ldarhw(dst, mem));
8697 
8698   ins_pipe(pipe_serial);
8699 %}
8700 
8701 // Load Short/Char (16 bit unsigned) into long
8702 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8703 %{
8704   match(Set dst (ConvI2L (LoadUS mem)));
8705 
8706   ins_cost(VOLATILE_REF_COST);
8707   format %{ "ldarh  $dst, $mem\t# short" %}
8708 
8709   ins_encode(aarch64_enc_ldarh(dst, mem));
8710 
8711   ins_pipe(pipe_serial);
8712 %}
8713 
8714 // Load Short/Char (16 bit signed) into long
8715 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8716 %{
8717   match(Set dst (ConvI2L (LoadS mem)));
8718 
8719   ins_cost(VOLATILE_REF_COST);
8720   format %{ "ldarh  $dst, $mem\t# short" %}
8721 
8722   ins_encode(aarch64_enc_ldarsh(dst, mem));
8723 
8724   ins_pipe(pipe_serial);
8725 %}
8726 
8727 // Load Integer (32 bit signed)
8728 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8729 %{
8730   match(Set dst (LoadI mem));
8731 
8732   ins_cost(VOLATILE_REF_COST);
8733   format %{ "ldarw  $dst, $mem\t# int" %}
8734 
8735   ins_encode(aarch64_enc_ldarw(dst, mem));
8736 
8737   ins_pipe(pipe_serial);
8738 %}
8739 
8740 // Load Integer (32 bit unsigned) into long
8741 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8742 %{
8743   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8744 
8745   ins_cost(VOLATILE_REF_COST);
8746   format %{ "ldarw  $dst, $mem\t# int" %}
8747 
8748   ins_encode(aarch64_enc_ldarw(dst, mem));
8749 
8750   ins_pipe(pipe_serial);
8751 %}
8752 
8753 // Load Long (64 bit signed)
8754 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8755 %{
8756   match(Set dst (LoadL mem));
8757 
8758   ins_cost(VOLATILE_REF_COST);
8759   format %{ "ldar  $dst, $mem\t# int" %}
8760 
8761   ins_encode(aarch64_enc_ldar(dst, mem));
8762 
8763   ins_pipe(pipe_serial);
8764 %}
8765 
8766 // Load Pointer
8767 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8768 %{
8769   match(Set dst (LoadP mem));
8770 
8771   ins_cost(VOLATILE_REF_COST);
8772   format %{ "ldar  $dst, $mem\t# ptr" %}
8773 
8774   ins_encode(aarch64_enc_ldar(dst, mem));
8775 
8776   ins_pipe(pipe_serial);
8777 %}
8778 
8779 // Load Compressed Pointer
8780 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8781 %{
8782   match(Set dst (LoadN mem));
8783 
8784   ins_cost(VOLATILE_REF_COST);
8785   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8786 
8787   ins_encode(aarch64_enc_ldarw(dst, mem));
8788 
8789   ins_pipe(pipe_serial);
8790 %}
8791 
8792 // Load Float
8793 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8794 %{
8795   match(Set dst (LoadF mem));
8796 
8797   ins_cost(VOLATILE_REF_COST);
8798   format %{ "ldars  $dst, $mem\t# float" %}
8799 
8800   ins_encode( aarch64_enc_fldars(dst, mem) );
8801 
8802   ins_pipe(pipe_serial);
8803 %}
8804 
8805 // Load Double
8806 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8807 %{
8808   match(Set dst (LoadD mem));
8809 
8810   ins_cost(VOLATILE_REF_COST);
8811   format %{ "ldard  $dst, $mem\t# double" %}
8812 
8813   ins_encode( aarch64_enc_fldard(dst, mem) );
8814 
8815   ins_pipe(pipe_serial);
8816 %}
8817 
8818 // Store Byte
8819 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8820 %{
8821   match(Set mem (StoreB mem src));
8822 
8823   ins_cost(VOLATILE_REF_COST);
8824   format %{ "stlrb  $src, $mem\t# byte" %}
8825 
8826   ins_encode(aarch64_enc_stlrb(src, mem));
8827 
8828   ins_pipe(pipe_class_memory);
8829 %}
8830 
8831 // Store Char/Short
8832 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8833 %{
8834   match(Set mem (StoreC mem src));
8835 
8836   ins_cost(VOLATILE_REF_COST);
8837   format %{ "stlrh  $src, $mem\t# short" %}
8838 
8839   ins_encode(aarch64_enc_stlrh(src, mem));
8840 
8841   ins_pipe(pipe_class_memory);
8842 %}
8843 
8844 // Store Integer
8845 
8846 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8847 %{
8848   match(Set mem(StoreI mem src));
8849 
8850   ins_cost(VOLATILE_REF_COST);
8851   format %{ "stlrw  $src, $mem\t# int" %}
8852 
8853   ins_encode(aarch64_enc_stlrw(src, mem));
8854 
8855   ins_pipe(pipe_class_memory);
8856 %}
8857 
8858 // Store Long (64 bit signed)
8859 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8860 %{
8861   match(Set mem (StoreL mem src));
8862 
8863   ins_cost(VOLATILE_REF_COST);
8864   format %{ "stlr  $src, $mem\t# int" %}
8865 
8866   ins_encode(aarch64_enc_stlr(src, mem));
8867 
8868   ins_pipe(pipe_class_memory);
8869 %}
8870 
8871 // Store Pointer
8872 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8873 %{
8874   match(Set mem (StoreP mem src));
8875 
8876   ins_cost(VOLATILE_REF_COST);
8877   format %{ "stlr  $src, $mem\t# ptr" %}
8878 
8879   ins_encode(aarch64_enc_stlr(src, mem));
8880 
8881   ins_pipe(pipe_class_memory);
8882 %}
8883 
8884 // Store Compressed Pointer
8885 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8886 %{
8887   match(Set mem (StoreN mem src));
8888 
8889   ins_cost(VOLATILE_REF_COST);
8890   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8891 
8892   ins_encode(aarch64_enc_stlrw(src, mem));
8893 
8894   ins_pipe(pipe_class_memory);
8895 %}
8896 
8897 // Store Float
8898 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8899 %{
8900   match(Set mem (StoreF mem src));
8901 
8902   ins_cost(VOLATILE_REF_COST);
8903   format %{ "stlrs  $src, $mem\t# float" %}
8904 
8905   ins_encode( aarch64_enc_fstlrs(src, mem) );
8906 
8907   ins_pipe(pipe_class_memory);
8908 %}
8909 
8910 // TODO
8911 // implement storeImmF0 and storeFImmPacked
8912 
8913 // Store Double
8914 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8915 %{
8916   match(Set mem (StoreD mem src));
8917 
8918   ins_cost(VOLATILE_REF_COST);
8919   format %{ "stlrd  $src, $mem\t# double" %}
8920 
8921   ins_encode( aarch64_enc_fstlrd(src, mem) );
8922 
8923   ins_pipe(pipe_class_memory);
8924 %}
8925 
8926 //  ---------------- end of volatile loads and stores ----------------
8927 
8928 instruct cacheWB(indirect addr)
8929 %{
8930   match(CacheWB addr);
8931 
8932   ins_cost(100);
8933   format %{"cache wb $addr" %}
8934   ins_encode %{
8935     assert($addr->index_position() < 0, "should be");
8936     assert($addr$$disp == 0, "should be");
8937     __ cache_wb(Address($addr$$base$$Register, 0));
8938   %}
8939   ins_pipe(pipe_slow); // XXX
8940 %}
8941 
8942 instruct cacheWBPreSync()
8943 %{
8944   match(CacheWBPreSync);
8945 
8946   ins_cost(100);
8947   format %{"cache wb presync" %}
8948   ins_encode %{
8949     __ cache_wbsync(true);
8950   %}
8951   ins_pipe(pipe_slow); // XXX
8952 %}
8953 
8954 instruct cacheWBPostSync()
8955 %{
8956   match(CacheWBPostSync);
8957 
8958   ins_cost(100);
8959   format %{"cache wb postsync" %}
8960   ins_encode %{
8961     __ cache_wbsync(false);
8962   %}
8963   ins_pipe(pipe_slow); // XXX
8964 %}
8965 
8966 // ============================================================================
8967 // BSWAP Instructions
8968 
8969 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8970   match(Set dst (ReverseBytesI src));
8971 
8972   ins_cost(INSN_COST);
8973   format %{ "revw  $dst, $src" %}
8974 
8975   ins_encode %{
8976     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8977   %}
8978 
8979   ins_pipe(ialu_reg);
8980 %}
8981 
8982 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8983   match(Set dst (ReverseBytesL src));
8984 
8985   ins_cost(INSN_COST);
8986   format %{ "rev  $dst, $src" %}
8987 
8988   ins_encode %{
8989     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8990   %}
8991 
8992   ins_pipe(ialu_reg);
8993 %}
8994 
8995 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8996   match(Set dst (ReverseBytesUS src));
8997 
8998   ins_cost(INSN_COST);
8999   format %{ "rev16w  $dst, $src" %}
9000 
9001   ins_encode %{
9002     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9003   %}
9004 
9005   ins_pipe(ialu_reg);
9006 %}
9007 
9008 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
9009   match(Set dst (ReverseBytesS src));
9010 
9011   ins_cost(INSN_COST);
9012   format %{ "rev16w  $dst, $src\n\t"
9013             "sbfmw $dst, $dst, #0, #15" %}
9014 
9015   ins_encode %{
9016     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9017     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
9018   %}
9019 
9020   ins_pipe(ialu_reg);
9021 %}
9022 
9023 // ============================================================================
9024 // Zero Count Instructions
9025 
9026 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9027   match(Set dst (CountLeadingZerosI src));
9028 
9029   ins_cost(INSN_COST);
9030   format %{ "clzw  $dst, $src" %}
9031   ins_encode %{
9032     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
9033   %}
9034 
9035   ins_pipe(ialu_reg);
9036 %}
9037 
9038 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
9039   match(Set dst (CountLeadingZerosL src));
9040 
9041   ins_cost(INSN_COST);
9042   format %{ "clz   $dst, $src" %}
9043   ins_encode %{
9044     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
9045   %}
9046 
9047   ins_pipe(ialu_reg);
9048 %}
9049 
9050 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9051   match(Set dst (CountTrailingZerosI src));
9052 
9053   ins_cost(INSN_COST * 2);
9054   format %{ "rbitw  $dst, $src\n\t"
9055             "clzw   $dst, $dst" %}
9056   ins_encode %{
9057     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9058     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9059   %}
9060 
9061   ins_pipe(ialu_reg);
9062 %}
9063 
9064 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9065   match(Set dst (CountTrailingZerosL src));
9066 
9067   ins_cost(INSN_COST * 2);
9068   format %{ "rbit   $dst, $src\n\t"
9069             "clz    $dst, $dst" %}
9070   ins_encode %{
9071     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9072     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9073   %}
9074 
9075   ins_pipe(ialu_reg);
9076 %}
9077 
9078 //---------- Population Count Instructions -------------------------------------
9079 //
9080 
9081 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9082   predicate(UsePopCountInstruction);
9083   match(Set dst (PopCountI src));
9084   effect(TEMP tmp);
9085   ins_cost(INSN_COST * 13);
9086 
9087   format %{ "movw   $src, $src\n\t"
9088             "mov    $tmp, $src\t# vector (1D)\n\t"
9089             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9090             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9091             "mov    $dst, $tmp\t# vector (1D)" %}
9092   ins_encode %{
9093     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9094     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9095     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9096     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9097     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9098   %}
9099 
9100   ins_pipe(pipe_class_default);
9101 %}
9102 
9103 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9104   predicate(UsePopCountInstruction);
9105   match(Set dst (PopCountI (LoadI mem)));
9106   effect(TEMP tmp);
9107   ins_cost(INSN_COST * 13);
9108 
9109   format %{ "ldrs   $tmp, $mem\n\t"
9110             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9111             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9112             "mov    $dst, $tmp\t# vector (1D)" %}
9113   ins_encode %{
9114     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9115     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
9116                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9117     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9118     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9119     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9120   %}
9121 
9122   ins_pipe(pipe_class_default);
9123 %}
9124 
9125 // Note: Long.bitCount(long) returns an int.
9126 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9127   predicate(UsePopCountInstruction);
9128   match(Set dst (PopCountL src));
9129   effect(TEMP tmp);
9130   ins_cost(INSN_COST * 13);
9131 
9132   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9133             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9134             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9135             "mov    $dst, $tmp\t# vector (1D)" %}
9136   ins_encode %{
9137     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9138     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9139     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9140     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9141   %}
9142 
9143   ins_pipe(pipe_class_default);
9144 %}
9145 
9146 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9147   predicate(UsePopCountInstruction);
9148   match(Set dst (PopCountL (LoadL mem)));
9149   effect(TEMP tmp);
9150   ins_cost(INSN_COST * 13);
9151 
9152   format %{ "ldrd   $tmp, $mem\n\t"
9153             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9154             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9155             "mov    $dst, $tmp\t# vector (1D)" %}
9156   ins_encode %{
9157     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9158     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9159                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9160     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9161     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9162     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9163   %}
9164 
9165   ins_pipe(pipe_class_default);
9166 %}
9167 
9168 // ============================================================================
9169 // MemBar Instruction
9170 
9171 instruct load_fence() %{
9172   match(LoadFence);
9173   ins_cost(VOLATILE_REF_COST);
9174 
9175   format %{ "load_fence" %}
9176 
9177   ins_encode %{
9178     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9179   %}
9180   ins_pipe(pipe_serial);
9181 %}
9182 
9183 instruct unnecessary_membar_acquire() %{
9184   predicate(unnecessary_acquire(n));
9185   match(MemBarAcquire);
9186   ins_cost(0);
9187 
9188   format %{ "membar_acquire (elided)" %}
9189 
9190   ins_encode %{
9191     __ block_comment("membar_acquire (elided)");
9192   %}
9193 
9194   ins_pipe(pipe_class_empty);
9195 %}
9196 
9197 instruct membar_acquire() %{
9198   match(MemBarAcquire);
9199   ins_cost(VOLATILE_REF_COST);
9200 
9201   format %{ "membar_acquire\n\t"
9202             "dmb ish" %}
9203 
9204   ins_encode %{
9205     __ block_comment("membar_acquire");
9206     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9207   %}
9208 
9209   ins_pipe(pipe_serial);
9210 %}
9211 
9212 
9213 instruct membar_acquire_lock() %{
9214   match(MemBarAcquireLock);
9215   ins_cost(VOLATILE_REF_COST);
9216 
9217   format %{ "membar_acquire_lock (elided)" %}
9218 
9219   ins_encode %{
9220     __ block_comment("membar_acquire_lock (elided)");
9221   %}
9222 
9223   ins_pipe(pipe_serial);
9224 %}
9225 
9226 instruct store_fence() %{
9227   match(StoreFence);
9228   ins_cost(VOLATILE_REF_COST);
9229 
9230   format %{ "store_fence" %}
9231 
9232   ins_encode %{
9233     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9234   %}
9235   ins_pipe(pipe_serial);
9236 %}
9237 
9238 instruct unnecessary_membar_release() %{
9239   predicate(unnecessary_release(n));
9240   match(MemBarRelease);
9241   ins_cost(0);
9242 
9243   format %{ "membar_release (elided)" %}
9244 
9245   ins_encode %{
9246     __ block_comment("membar_release (elided)");
9247   %}
9248   ins_pipe(pipe_serial);
9249 %}
9250 
9251 instruct membar_release() %{
9252   match(MemBarRelease);
9253   ins_cost(VOLATILE_REF_COST);
9254 
9255   format %{ "membar_release\n\t"
9256             "dmb ish" %}
9257 
9258   ins_encode %{
9259     __ block_comment("membar_release");
9260     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9261   %}
9262   ins_pipe(pipe_serial);
9263 %}
9264 
9265 instruct membar_storestore() %{
9266   match(MemBarStoreStore);
9267   ins_cost(VOLATILE_REF_COST);
9268 
9269   format %{ "MEMBAR-store-store" %}
9270 
9271   ins_encode %{
9272     __ membar(Assembler::StoreStore);
9273   %}
9274   ins_pipe(pipe_serial);
9275 %}
9276 
9277 instruct membar_release_lock() %{
9278   match(MemBarReleaseLock);
9279   ins_cost(VOLATILE_REF_COST);
9280 
9281   format %{ "membar_release_lock (elided)" %}
9282 
9283   ins_encode %{
9284     __ block_comment("membar_release_lock (elided)");
9285   %}
9286 
9287   ins_pipe(pipe_serial);
9288 %}
9289 
9290 instruct unnecessary_membar_volatile() %{
9291   predicate(unnecessary_volatile(n));
9292   match(MemBarVolatile);
9293   ins_cost(0);
9294 
9295   format %{ "membar_volatile (elided)" %}
9296 
9297   ins_encode %{
9298     __ block_comment("membar_volatile (elided)");
9299   %}
9300 
9301   ins_pipe(pipe_serial);
9302 %}
9303 
9304 instruct membar_volatile() %{
9305   match(MemBarVolatile);
9306   ins_cost(VOLATILE_REF_COST*100);
9307 
9308   format %{ "membar_volatile\n\t"
9309              "dmb ish"%}
9310 
9311   ins_encode %{
9312     __ block_comment("membar_volatile");
9313     __ membar(Assembler::StoreLoad);
9314   %}
9315 
9316   ins_pipe(pipe_serial);
9317 %}
9318 
9319 // ============================================================================
9320 // Cast/Convert Instructions
9321 
9322 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9323   match(Set dst (CastX2P src));
9324 
9325   ins_cost(INSN_COST);
9326   format %{ "mov $dst, $src\t# long -> ptr" %}
9327 
9328   ins_encode %{
9329     if ($dst$$reg != $src$$reg) {
9330       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9331     }
9332   %}
9333 
9334   ins_pipe(ialu_reg);
9335 %}
9336 
9337 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9338   match(Set dst (CastP2X src));
9339 
9340   ins_cost(INSN_COST);
9341   format %{ "mov $dst, $src\t# ptr -> long" %}
9342 
9343   ins_encode %{
9344     if ($dst$$reg != $src$$reg) {
9345       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9346     }
9347   %}
9348 
9349   ins_pipe(ialu_reg);
9350 %}
9351 
9352 // Convert oop into int for vectors alignment masking
9353 instruct convP2I(iRegINoSp dst, iRegP src) %{
9354   match(Set dst (ConvL2I (CastP2X src)));
9355 
9356   ins_cost(INSN_COST);
9357   format %{ "movw $dst, $src\t# ptr -> int" %}
9358   ins_encode %{
9359     __ movw($dst$$Register, $src$$Register);
9360   %}
9361 
9362   ins_pipe(ialu_reg);
9363 %}
9364 
9365 // Convert compressed oop into int for vectors alignment masking
9366 // in case of 32bit oops (heap < 4Gb).
9367 instruct convN2I(iRegINoSp dst, iRegN src)
9368 %{
9369   predicate(Universe::narrow_oop_shift() == 0);
9370   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9371 
9372   ins_cost(INSN_COST);
9373   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9374   ins_encode %{
9375     __ movw($dst$$Register, $src$$Register);
9376   %}
9377 
9378   ins_pipe(ialu_reg);
9379 %}
9380 
9381 
9382 // Convert oop pointer into compressed form
9383 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9384   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9385   match(Set dst (EncodeP src));
9386   effect(KILL cr);
9387   ins_cost(INSN_COST * 3);
9388   format %{ "encode_heap_oop $dst, $src" %}
9389   ins_encode %{
9390     Register s = $src$$Register;
9391     Register d = $dst$$Register;
9392     __ encode_heap_oop(d, s);
9393   %}
9394   ins_pipe(ialu_reg);
9395 %}
9396 
9397 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9398   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9399   match(Set dst (EncodeP src));
9400   ins_cost(INSN_COST * 3);
9401   format %{ "encode_heap_oop_not_null $dst, $src" %}
9402   ins_encode %{
9403     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9404   %}
9405   ins_pipe(ialu_reg);
9406 %}
9407 
9408 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9409   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9410             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9411   match(Set dst (DecodeN src));
9412   ins_cost(INSN_COST * 3);
9413   format %{ "decode_heap_oop $dst, $src" %}
9414   ins_encode %{
9415     Register s = $src$$Register;
9416     Register d = $dst$$Register;
9417     __ decode_heap_oop(d, s);
9418   %}
9419   ins_pipe(ialu_reg);
9420 %}
9421 
9422 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9423   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9424             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9425   match(Set dst (DecodeN src));
9426   ins_cost(INSN_COST * 3);
9427   format %{ "decode_heap_oop_not_null $dst, $src" %}
9428   ins_encode %{
9429     Register s = $src$$Register;
9430     Register d = $dst$$Register;
9431     __ decode_heap_oop_not_null(d, s);
9432   %}
9433   ins_pipe(ialu_reg);
9434 %}
9435 
9436 // n.b. AArch64 implementations of encode_klass_not_null and
9437 // decode_klass_not_null do not modify the flags register so, unlike
9438 // Intel, we don't kill CR as a side effect here
9439 
9440 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9441   match(Set dst (EncodePKlass src));
9442 
9443   ins_cost(INSN_COST * 3);
9444   format %{ "encode_klass_not_null $dst,$src" %}
9445 
9446   ins_encode %{
9447     Register src_reg = as_Register($src$$reg);
9448     Register dst_reg = as_Register($dst$$reg);
9449     __ encode_klass_not_null(dst_reg, src_reg);
9450   %}
9451 
9452    ins_pipe(ialu_reg);
9453 %}
9454 
9455 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9456   match(Set dst (DecodeNKlass src));
9457 
9458   ins_cost(INSN_COST * 3);
9459   format %{ "decode_klass_not_null $dst,$src" %}
9460 
9461   ins_encode %{
9462     Register src_reg = as_Register($src$$reg);
9463     Register dst_reg = as_Register($dst$$reg);
9464     if (dst_reg != src_reg) {
9465       __ decode_klass_not_null(dst_reg, src_reg);
9466     } else {
9467       __ decode_klass_not_null(dst_reg);
9468     }
9469   %}
9470 
9471    ins_pipe(ialu_reg);
9472 %}
9473 
9474 instruct checkCastPP(iRegPNoSp dst)
9475 %{
9476   match(Set dst (CheckCastPP dst));
9477 
9478   size(0);
9479   format %{ "# checkcastPP of $dst" %}
9480   ins_encode(/* empty encoding */);
9481   ins_pipe(pipe_class_empty);
9482 %}
9483 
9484 instruct castPP(iRegPNoSp dst)
9485 %{
9486   match(Set dst (CastPP dst));
9487 
9488   size(0);
9489   format %{ "# castPP of $dst" %}
9490   ins_encode(/* empty encoding */);
9491   ins_pipe(pipe_class_empty);
9492 %}
9493 
9494 instruct castII(iRegI dst)
9495 %{
9496   match(Set dst (CastII dst));
9497 
9498   size(0);
9499   format %{ "# castII of $dst" %}
9500   ins_encode(/* empty encoding */);
9501   ins_cost(0);
9502   ins_pipe(pipe_class_empty);
9503 %}
9504 
9505 // ============================================================================
9506 // Atomic operation instructions
9507 //
9508 // Intel and SPARC both implement Ideal Node LoadPLocked and
9509 // Store{PIL}Conditional instructions using a normal load for the
9510 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9511 //
9512 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9513 // pair to lock object allocations from Eden space when not using
9514 // TLABs.
9515 //
9516 // There does not appear to be a Load{IL}Locked Ideal Node and the
9517 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9518 // and to use StoreIConditional only for 32-bit and StoreLConditional
9519 // only for 64-bit.
9520 //
9521 // We implement LoadPLocked and StorePLocked instructions using,
9522 // respectively the AArch64 hw load-exclusive and store-conditional
9523 // instructions. Whereas we must implement each of
9524 // Store{IL}Conditional using a CAS which employs a pair of
9525 // instructions comprising a load-exclusive followed by a
9526 // store-conditional.
9527 
9528 
9529 // Locked-load (linked load) of the current heap-top
9530 // used when updating the eden heap top
9531 // implemented using ldaxr on AArch64
9532 
9533 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9534 %{
9535   match(Set dst (LoadPLocked mem));
9536 
9537   ins_cost(VOLATILE_REF_COST);
9538 
9539   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9540 
9541   ins_encode(aarch64_enc_ldaxr(dst, mem));
9542 
9543   ins_pipe(pipe_serial);
9544 %}
9545 
9546 // Conditional-store of the updated heap-top.
9547 // Used during allocation of the shared heap.
9548 // Sets flag (EQ) on success.
9549 // implemented using stlxr on AArch64.
9550 
9551 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9552 %{
9553   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9554 
9555   ins_cost(VOLATILE_REF_COST);
9556 
9557  // TODO
9558  // do we need to do a store-conditional release or can we just use a
9559  // plain store-conditional?
9560 
9561   format %{
9562     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9563     "cmpw rscratch1, zr\t# EQ on successful write"
9564   %}
9565 
9566   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9567 
9568   ins_pipe(pipe_serial);
9569 %}
9570 
9571 
9572 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9573 // when attempting to rebias a lock towards the current thread.  We
9574 // must use the acquire form of cmpxchg in order to guarantee acquire
9575 // semantics in this case.
9576 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9577 %{
9578   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9579 
9580   ins_cost(VOLATILE_REF_COST);
9581 
9582   format %{
9583     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9584     "cmpw rscratch1, zr\t# EQ on successful write"
9585   %}
9586 
9587   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9588 
9589   ins_pipe(pipe_slow);
9590 %}
9591 
9592 // storeIConditional also has acquire semantics, for no better reason
9593 // than matching storeLConditional.  At the time of writing this
9594 // comment storeIConditional was not used anywhere by AArch64.
9595 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9596 %{
9597   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9598 
9599   ins_cost(VOLATILE_REF_COST);
9600 
9601   format %{
9602     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9603     "cmpw rscratch1, zr\t# EQ on successful write"
9604   %}
9605 
9606   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9607 
9608   ins_pipe(pipe_slow);
9609 %}
9610 
9611 // standard CompareAndSwapX when we are using barriers
9612 // these have higher priority than the rules selected by a predicate
9613 
9614 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9615 // can't match them
9616 
9617 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9618 
9619   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9620   ins_cost(2 * VOLATILE_REF_COST);
9621 
9622   effect(KILL cr);
9623 
9624   format %{
9625     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9626     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9627   %}
9628 
9629   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9630             aarch64_enc_cset_eq(res));
9631 
9632   ins_pipe(pipe_slow);
9633 %}
9634 
9635 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9636 
9637   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9638   ins_cost(2 * VOLATILE_REF_COST);
9639 
9640   effect(KILL cr);
9641 
9642   format %{
9643     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9644     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9645   %}
9646 
9647   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9648             aarch64_enc_cset_eq(res));
9649 
9650   ins_pipe(pipe_slow);
9651 %}
9652 
9653 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9654 
9655   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9656   ins_cost(2 * VOLATILE_REF_COST);
9657 
9658   effect(KILL cr);
9659 
9660  format %{
9661     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9662     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9663  %}
9664 
9665  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9666             aarch64_enc_cset_eq(res));
9667 
9668   ins_pipe(pipe_slow);
9669 %}
9670 
9671 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9672 
9673   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9674   ins_cost(2 * VOLATILE_REF_COST);
9675 
9676   effect(KILL cr);
9677 
9678  format %{
9679     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9680     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9681  %}
9682 
9683  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9684             aarch64_enc_cset_eq(res));
9685 
9686   ins_pipe(pipe_slow);
9687 %}
9688 
9689 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9690 
9691   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9692   ins_cost(2 * VOLATILE_REF_COST);
9693 
9694   effect(KILL cr);
9695 
9696  format %{
9697     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9698     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9699  %}
9700 
9701  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9702             aarch64_enc_cset_eq(res));
9703 
9704   ins_pipe(pipe_slow);
9705 %}
9706 
9707 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9708 
9709   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9710   ins_cost(2 * VOLATILE_REF_COST);
9711 
9712   effect(KILL cr);
9713 
9714  format %{
9715     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9716     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9717  %}
9718 
9719  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9720             aarch64_enc_cset_eq(res));
9721 
9722   ins_pipe(pipe_slow);
9723 %}
9724 
9725 // alternative CompareAndSwapX when we are eliding barriers
9726 
9727 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9728 
9729   predicate(needs_acquiring_load_exclusive(n));
9730   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9731   ins_cost(VOLATILE_REF_COST);
9732 
9733   effect(KILL cr);
9734 
9735  format %{
9736     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9737     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9738  %}
9739 
9740  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9741             aarch64_enc_cset_eq(res));
9742 
9743   ins_pipe(pipe_slow);
9744 %}
9745 
9746 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9747 
9748   predicate(needs_acquiring_load_exclusive(n));
9749   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9750   ins_cost(VOLATILE_REF_COST);
9751 
9752   effect(KILL cr);
9753 
9754  format %{
9755     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9756     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9757  %}
9758 
9759  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9760             aarch64_enc_cset_eq(res));
9761 
9762   ins_pipe(pipe_slow);
9763 %}
9764 
9765 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9766 
9767   predicate(needs_acquiring_load_exclusive(n));
9768   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9769   ins_cost(VOLATILE_REF_COST);
9770 
9771   effect(KILL cr);
9772 
9773  format %{
9774     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9775     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9776  %}
9777 
9778  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9779             aarch64_enc_cset_eq(res));
9780 
9781   ins_pipe(pipe_slow);
9782 %}
9783 
9784 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9785 
9786   predicate(needs_acquiring_load_exclusive(n));
9787   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9788   ins_cost(VOLATILE_REF_COST);
9789 
9790   effect(KILL cr);
9791 
9792  format %{
9793     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9794     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9795  %}
9796 
9797  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9798             aarch64_enc_cset_eq(res));
9799 
9800   ins_pipe(pipe_slow);
9801 %}
9802 
9803 
9804 // ---------------------------------------------------------------------
9805 
9806 
9807 // BEGIN This section of the file is automatically generated. Do not edit --------------
9808 
9809 // Sundry CAS operations.  Note that release is always true,
9810 // regardless of the memory ordering of the CAS.  This is because we
9811 // need the volatile case to be sequentially consistent but there is
9812 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
9813 // can't check the type of memory ordering here, so we always emit a
9814 // STLXR.
9815 
9816 // This section is generated from aarch64_ad_cas.m4
9817 
9818 
9819 
9820 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9821   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9822   ins_cost(2 * VOLATILE_REF_COST);
9823   effect(TEMP_DEF res, KILL cr);
9824   format %{
9825     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9826   %}
9827   ins_encode %{
9828     __ uxtbw(rscratch2, $oldval$$Register);
9829     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9830                Assembler::byte, /*acquire*/ false, /*release*/ true,
9831                /*weak*/ false, $res$$Register);
9832     __ sxtbw($res$$Register, $res$$Register);
9833   %}
9834   ins_pipe(pipe_slow);
9835 %}
9836 
9837 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9838   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9839   ins_cost(2 * VOLATILE_REF_COST);
9840   effect(TEMP_DEF res, KILL cr);
9841   format %{
9842     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9843   %}
9844   ins_encode %{
9845     __ uxthw(rscratch2, $oldval$$Register);
9846     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9847                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9848                /*weak*/ false, $res$$Register);
9849     __ sxthw($res$$Register, $res$$Register);
9850   %}
9851   ins_pipe(pipe_slow);
9852 %}
9853 
9854 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9855   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9856   ins_cost(2 * VOLATILE_REF_COST);
9857   effect(TEMP_DEF res, KILL cr);
9858   format %{
9859     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9860   %}
9861   ins_encode %{
9862     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9863                Assembler::word, /*acquire*/ false, /*release*/ true,
9864                /*weak*/ false, $res$$Register);
9865   %}
9866   ins_pipe(pipe_slow);
9867 %}
9868 
9869 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9870   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9871   ins_cost(2 * VOLATILE_REF_COST);
9872   effect(TEMP_DEF res, KILL cr);
9873   format %{
9874     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9875   %}
9876   ins_encode %{
9877     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9878                Assembler::xword, /*acquire*/ false, /*release*/ true,
9879                /*weak*/ false, $res$$Register);
9880   %}
9881   ins_pipe(pipe_slow);
9882 %}
9883 
9884 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9885   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9886   ins_cost(2 * VOLATILE_REF_COST);
9887   effect(TEMP_DEF res, KILL cr);
9888   format %{
9889     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9890   %}
9891   ins_encode %{
9892     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9893                Assembler::word, /*acquire*/ false, /*release*/ true,
9894                /*weak*/ false, $res$$Register);
9895   %}
9896   ins_pipe(pipe_slow);
9897 %}
9898 
9899 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9900   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9901   ins_cost(2 * VOLATILE_REF_COST);
9902   effect(TEMP_DEF res, KILL cr);
9903   format %{
9904     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9905   %}
9906   ins_encode %{
9907     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9908                Assembler::xword, /*acquire*/ false, /*release*/ true,
9909                /*weak*/ false, $res$$Register);
9910   %}
9911   ins_pipe(pipe_slow);
9912 %}
9913 
9914 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9915   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9916   ins_cost(2 * VOLATILE_REF_COST);
9917   effect(KILL cr);
9918   format %{
9919     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9920     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9921   %}
9922   ins_encode %{
9923     __ uxtbw(rscratch2, $oldval$$Register);
9924     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9925                Assembler::byte, /*acquire*/ false, /*release*/ true,
9926                /*weak*/ true, noreg);
9927     __ csetw($res$$Register, Assembler::EQ);
9928   %}
9929   ins_pipe(pipe_slow);
9930 %}
9931 
9932 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9933   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9934   ins_cost(2 * VOLATILE_REF_COST);
9935   effect(KILL cr);
9936   format %{
9937     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9938     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9939   %}
9940   ins_encode %{
9941     __ uxthw(rscratch2, $oldval$$Register);
9942     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9943                Assembler::halfword, /*acquire*/ false, /*release*/ true,
9944                /*weak*/ true, noreg);
9945     __ csetw($res$$Register, Assembler::EQ);
9946   %}
9947   ins_pipe(pipe_slow);
9948 %}
9949 
9950 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9951   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9952   ins_cost(2 * VOLATILE_REF_COST);
9953   effect(KILL cr);
9954   format %{
9955     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9956     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9957   %}
9958   ins_encode %{
9959     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9960                Assembler::word, /*acquire*/ false, /*release*/ true,
9961                /*weak*/ true, noreg);
9962     __ csetw($res$$Register, Assembler::EQ);
9963   %}
9964   ins_pipe(pipe_slow);
9965 %}
9966 
9967 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9968   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9969   ins_cost(2 * VOLATILE_REF_COST);
9970   effect(KILL cr);
9971   format %{
9972     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9973     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9974   %}
9975   ins_encode %{
9976     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9977                Assembler::xword, /*acquire*/ false, /*release*/ true,
9978                /*weak*/ true, noreg);
9979     __ csetw($res$$Register, Assembler::EQ);
9980   %}
9981   ins_pipe(pipe_slow);
9982 %}
9983 
9984 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9985   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9986   ins_cost(2 * VOLATILE_REF_COST);
9987   effect(KILL cr);
9988   format %{
9989     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9990     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9991   %}
9992   ins_encode %{
9993     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9994                Assembler::word, /*acquire*/ false, /*release*/ true,
9995                /*weak*/ true, noreg);
9996     __ csetw($res$$Register, Assembler::EQ);
9997   %}
9998   ins_pipe(pipe_slow);
9999 %}
10000 
10001 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10002   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10003   ins_cost(2 * VOLATILE_REF_COST);
10004   effect(KILL cr);
10005   format %{
10006     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10007     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10008   %}
10009   ins_encode %{
10010     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10011                Assembler::xword, /*acquire*/ false, /*release*/ true,
10012                /*weak*/ true, noreg);
10013     __ csetw($res$$Register, Assembler::EQ);
10014   %}
10015   ins_pipe(pipe_slow);
10016 %}
10017 
10018 // END This section of the file is automatically generated. Do not edit --------------
10019 // ---------------------------------------------------------------------
10020 
10021 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
10022   match(Set prev (GetAndSetI mem newv));
10023   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10024   ins_encode %{
10025     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10026   %}
10027   ins_pipe(pipe_serial);
10028 %}
10029 
10030 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
10031   match(Set prev (GetAndSetL mem newv));
10032   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10033   ins_encode %{
10034     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10035   %}
10036   ins_pipe(pipe_serial);
10037 %}
10038 
10039 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
10040   match(Set prev (GetAndSetN mem newv));
10041   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10042   ins_encode %{
10043     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10044   %}
10045   ins_pipe(pipe_serial);
10046 %}
10047 
10048 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
10049   match(Set prev (GetAndSetP mem newv));
10050   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10051   ins_encode %{
10052     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10053   %}
10054   ins_pipe(pipe_serial);
10055 %}
10056 
10057 
10058 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10059   match(Set newval (GetAndAddL mem incr));
10060   ins_cost(INSN_COST * 10);
10061   format %{ "get_and_addL $newval, [$mem], $incr" %}
10062   ins_encode %{
10063     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10064   %}
10065   ins_pipe(pipe_serial);
10066 %}
10067 
10068 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10069   predicate(n->as_LoadStore()->result_not_used());
10070   match(Set dummy (GetAndAddL mem incr));
10071   ins_cost(INSN_COST * 9);
10072   format %{ "get_and_addL [$mem], $incr" %}
10073   ins_encode %{
10074     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10075   %}
10076   ins_pipe(pipe_serial);
10077 %}
10078 
10079 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10080   match(Set newval (GetAndAddL mem incr));
10081   ins_cost(INSN_COST * 10);
10082   format %{ "get_and_addL $newval, [$mem], $incr" %}
10083   ins_encode %{
10084     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10085   %}
10086   ins_pipe(pipe_serial);
10087 %}
10088 
10089 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10090   predicate(n->as_LoadStore()->result_not_used());
10091   match(Set dummy (GetAndAddL mem incr));
10092   ins_cost(INSN_COST * 9);
10093   format %{ "get_and_addL [$mem], $incr" %}
10094   ins_encode %{
10095     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10096   %}
10097   ins_pipe(pipe_serial);
10098 %}
10099 
10100 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10101   match(Set newval (GetAndAddI mem incr));
10102   ins_cost(INSN_COST * 10);
10103   format %{ "get_and_addI $newval, [$mem], $incr" %}
10104   ins_encode %{
10105     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10106   %}
10107   ins_pipe(pipe_serial);
10108 %}
10109 
10110 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10111   predicate(n->as_LoadStore()->result_not_used());
10112   match(Set dummy (GetAndAddI mem incr));
10113   ins_cost(INSN_COST * 9);
10114   format %{ "get_and_addI [$mem], $incr" %}
10115   ins_encode %{
10116     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10117   %}
10118   ins_pipe(pipe_serial);
10119 %}
10120 
10121 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10122   match(Set newval (GetAndAddI mem incr));
10123   ins_cost(INSN_COST * 10);
10124   format %{ "get_and_addI $newval, [$mem], $incr" %}
10125   ins_encode %{
10126     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10127   %}
10128   ins_pipe(pipe_serial);
10129 %}
10130 
10131 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10132   predicate(n->as_LoadStore()->result_not_used());
10133   match(Set dummy (GetAndAddI mem incr));
10134   ins_cost(INSN_COST * 9);
10135   format %{ "get_and_addI [$mem], $incr" %}
10136   ins_encode %{
10137     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10138   %}
10139   ins_pipe(pipe_serial);
10140 %}
10141 
10142 // Manifest a CmpL result in an integer register.
10143 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10144 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10145 %{
10146   match(Set dst (CmpL3 src1 src2));
10147   effect(KILL flags);
10148 
10149   ins_cost(INSN_COST * 6);
10150   format %{
10151       "cmp $src1, $src2"
10152       "csetw $dst, ne"
10153       "cnegw $dst, lt"
10154   %}
10155   // format %{ "CmpL3 $dst, $src1, $src2" %}
10156   ins_encode %{
10157     __ cmp($src1$$Register, $src2$$Register);
10158     __ csetw($dst$$Register, Assembler::NE);
10159     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10160   %}
10161 
10162   ins_pipe(pipe_class_default);
10163 %}
10164 
10165 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10166 %{
10167   match(Set dst (CmpL3 src1 src2));
10168   effect(KILL flags);
10169 
10170   ins_cost(INSN_COST * 6);
10171   format %{
10172       "cmp $src1, $src2"
10173       "csetw $dst, ne"
10174       "cnegw $dst, lt"
10175   %}
10176   ins_encode %{
10177     int32_t con = (int32_t)$src2$$constant;
10178      if (con < 0) {
10179       __ adds(zr, $src1$$Register, -con);
10180     } else {
10181       __ subs(zr, $src1$$Register, con);
10182     }
10183     __ csetw($dst$$Register, Assembler::NE);
10184     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10185   %}
10186 
10187   ins_pipe(pipe_class_default);
10188 %}
10189 
10190 // ============================================================================
10191 // Conditional Move Instructions
10192 
10193 // n.b. we have identical rules for both a signed compare op (cmpOp)
10194 // and an unsigned compare op (cmpOpU). it would be nice if we could
10195 // define an op class which merged both inputs and use it to type the
10196 // argument to a single rule. unfortunatelyt his fails because the
10197 // opclass does not live up to the COND_INTER interface of its
10198 // component operands. When the generic code tries to negate the
10199 // operand it ends up running the generci Machoper::negate method
10200 // which throws a ShouldNotHappen. So, we have to provide two flavours
10201 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10202 
10203 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10204   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10205 
10206   ins_cost(INSN_COST * 2);
10207   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10208 
10209   ins_encode %{
10210     __ cselw(as_Register($dst$$reg),
10211              as_Register($src2$$reg),
10212              as_Register($src1$$reg),
10213              (Assembler::Condition)$cmp$$cmpcode);
10214   %}
10215 
10216   ins_pipe(icond_reg_reg);
10217 %}
10218 
10219 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10220   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10221 
10222   ins_cost(INSN_COST * 2);
10223   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10224 
10225   ins_encode %{
10226     __ cselw(as_Register($dst$$reg),
10227              as_Register($src2$$reg),
10228              as_Register($src1$$reg),
10229              (Assembler::Condition)$cmp$$cmpcode);
10230   %}
10231 
10232   ins_pipe(icond_reg_reg);
10233 %}
10234 
10235 // special cases where one arg is zero
10236 
10237 // n.b. this is selected in preference to the rule above because it
10238 // avoids loading constant 0 into a source register
10239 
10240 // TODO
10241 // we ought only to be able to cull one of these variants as the ideal
10242 // transforms ought always to order the zero consistently (to left/right?)
10243 
10244 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10245   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10246 
10247   ins_cost(INSN_COST * 2);
10248   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10249 
10250   ins_encode %{
10251     __ cselw(as_Register($dst$$reg),
10252              as_Register($src$$reg),
10253              zr,
10254              (Assembler::Condition)$cmp$$cmpcode);
10255   %}
10256 
10257   ins_pipe(icond_reg);
10258 %}
10259 
10260 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10261   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10262 
10263   ins_cost(INSN_COST * 2);
10264   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10265 
10266   ins_encode %{
10267     __ cselw(as_Register($dst$$reg),
10268              as_Register($src$$reg),
10269              zr,
10270              (Assembler::Condition)$cmp$$cmpcode);
10271   %}
10272 
10273   ins_pipe(icond_reg);
10274 %}
10275 
10276 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10277   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10278 
10279   ins_cost(INSN_COST * 2);
10280   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10281 
10282   ins_encode %{
10283     __ cselw(as_Register($dst$$reg),
10284              zr,
10285              as_Register($src$$reg),
10286              (Assembler::Condition)$cmp$$cmpcode);
10287   %}
10288 
10289   ins_pipe(icond_reg);
10290 %}
10291 
10292 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10293   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10294 
10295   ins_cost(INSN_COST * 2);
10296   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10297 
10298   ins_encode %{
10299     __ cselw(as_Register($dst$$reg),
10300              zr,
10301              as_Register($src$$reg),
10302              (Assembler::Condition)$cmp$$cmpcode);
10303   %}
10304 
10305   ins_pipe(icond_reg);
10306 %}
10307 
10308 // special case for creating a boolean 0 or 1
10309 
10310 // n.b. this is selected in preference to the rule above because it
10311 // avoids loading constants 0 and 1 into a source register
10312 
10313 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10314   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10315 
10316   ins_cost(INSN_COST * 2);
10317   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10318 
10319   ins_encode %{
10320     // equivalently
10321     // cset(as_Register($dst$$reg),
10322     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10323     __ csincw(as_Register($dst$$reg),
10324              zr,
10325              zr,
10326              (Assembler::Condition)$cmp$$cmpcode);
10327   %}
10328 
10329   ins_pipe(icond_none);
10330 %}
10331 
10332 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10333   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10334 
10335   ins_cost(INSN_COST * 2);
10336   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10337 
10338   ins_encode %{
10339     // equivalently
10340     // cset(as_Register($dst$$reg),
10341     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10342     __ csincw(as_Register($dst$$reg),
10343              zr,
10344              zr,
10345              (Assembler::Condition)$cmp$$cmpcode);
10346   %}
10347 
10348   ins_pipe(icond_none);
10349 %}
10350 
10351 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10352   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10353 
10354   ins_cost(INSN_COST * 2);
10355   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10356 
10357   ins_encode %{
10358     __ csel(as_Register($dst$$reg),
10359             as_Register($src2$$reg),
10360             as_Register($src1$$reg),
10361             (Assembler::Condition)$cmp$$cmpcode);
10362   %}
10363 
10364   ins_pipe(icond_reg_reg);
10365 %}
10366 
10367 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10368   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10369 
10370   ins_cost(INSN_COST * 2);
10371   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10372 
10373   ins_encode %{
10374     __ csel(as_Register($dst$$reg),
10375             as_Register($src2$$reg),
10376             as_Register($src1$$reg),
10377             (Assembler::Condition)$cmp$$cmpcode);
10378   %}
10379 
10380   ins_pipe(icond_reg_reg);
10381 %}
10382 
10383 // special cases where one arg is zero
10384 
10385 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10386   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10387 
10388   ins_cost(INSN_COST * 2);
10389   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10390 
10391   ins_encode %{
10392     __ csel(as_Register($dst$$reg),
10393             zr,
10394             as_Register($src$$reg),
10395             (Assembler::Condition)$cmp$$cmpcode);
10396   %}
10397 
10398   ins_pipe(icond_reg);
10399 %}
10400 
10401 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10402   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10403 
10404   ins_cost(INSN_COST * 2);
10405   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10406 
10407   ins_encode %{
10408     __ csel(as_Register($dst$$reg),
10409             zr,
10410             as_Register($src$$reg),
10411             (Assembler::Condition)$cmp$$cmpcode);
10412   %}
10413 
10414   ins_pipe(icond_reg);
10415 %}
10416 
10417 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10418   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10419 
10420   ins_cost(INSN_COST * 2);
10421   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10422 
10423   ins_encode %{
10424     __ csel(as_Register($dst$$reg),
10425             as_Register($src$$reg),
10426             zr,
10427             (Assembler::Condition)$cmp$$cmpcode);
10428   %}
10429 
10430   ins_pipe(icond_reg);
10431 %}
10432 
10433 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10434   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10435 
10436   ins_cost(INSN_COST * 2);
10437   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10438 
10439   ins_encode %{
10440     __ csel(as_Register($dst$$reg),
10441             as_Register($src$$reg),
10442             zr,
10443             (Assembler::Condition)$cmp$$cmpcode);
10444   %}
10445 
10446   ins_pipe(icond_reg);
10447 %}
10448 
10449 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10450   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10451 
10452   ins_cost(INSN_COST * 2);
10453   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10454 
10455   ins_encode %{
10456     __ csel(as_Register($dst$$reg),
10457             as_Register($src2$$reg),
10458             as_Register($src1$$reg),
10459             (Assembler::Condition)$cmp$$cmpcode);
10460   %}
10461 
10462   ins_pipe(icond_reg_reg);
10463 %}
10464 
10465 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10466   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10467 
10468   ins_cost(INSN_COST * 2);
10469   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10470 
10471   ins_encode %{
10472     __ csel(as_Register($dst$$reg),
10473             as_Register($src2$$reg),
10474             as_Register($src1$$reg),
10475             (Assembler::Condition)$cmp$$cmpcode);
10476   %}
10477 
10478   ins_pipe(icond_reg_reg);
10479 %}
10480 
10481 // special cases where one arg is zero
10482 
10483 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10484   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10485 
10486   ins_cost(INSN_COST * 2);
10487   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10488 
10489   ins_encode %{
10490     __ csel(as_Register($dst$$reg),
10491             zr,
10492             as_Register($src$$reg),
10493             (Assembler::Condition)$cmp$$cmpcode);
10494   %}
10495 
10496   ins_pipe(icond_reg);
10497 %}
10498 
10499 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10500   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10501 
10502   ins_cost(INSN_COST * 2);
10503   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10504 
10505   ins_encode %{
10506     __ csel(as_Register($dst$$reg),
10507             zr,
10508             as_Register($src$$reg),
10509             (Assembler::Condition)$cmp$$cmpcode);
10510   %}
10511 
10512   ins_pipe(icond_reg);
10513 %}
10514 
10515 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10516   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10517 
10518   ins_cost(INSN_COST * 2);
10519   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10520 
10521   ins_encode %{
10522     __ csel(as_Register($dst$$reg),
10523             as_Register($src$$reg),
10524             zr,
10525             (Assembler::Condition)$cmp$$cmpcode);
10526   %}
10527 
10528   ins_pipe(icond_reg);
10529 %}
10530 
10531 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10532   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10533 
10534   ins_cost(INSN_COST * 2);
10535   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10536 
10537   ins_encode %{
10538     __ csel(as_Register($dst$$reg),
10539             as_Register($src$$reg),
10540             zr,
10541             (Assembler::Condition)$cmp$$cmpcode);
10542   %}
10543 
10544   ins_pipe(icond_reg);
10545 %}
10546 
10547 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10548   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10549 
10550   ins_cost(INSN_COST * 2);
10551   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10552 
10553   ins_encode %{
10554     __ cselw(as_Register($dst$$reg),
10555              as_Register($src2$$reg),
10556              as_Register($src1$$reg),
10557              (Assembler::Condition)$cmp$$cmpcode);
10558   %}
10559 
10560   ins_pipe(icond_reg_reg);
10561 %}
10562 
10563 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10564   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10565 
10566   ins_cost(INSN_COST * 2);
10567   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10568 
10569   ins_encode %{
10570     __ cselw(as_Register($dst$$reg),
10571              as_Register($src2$$reg),
10572              as_Register($src1$$reg),
10573              (Assembler::Condition)$cmp$$cmpcode);
10574   %}
10575 
10576   ins_pipe(icond_reg_reg);
10577 %}
10578 
10579 // special cases where one arg is zero
10580 
10581 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10582   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10583 
10584   ins_cost(INSN_COST * 2);
10585   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10586 
10587   ins_encode %{
10588     __ cselw(as_Register($dst$$reg),
10589              zr,
10590              as_Register($src$$reg),
10591              (Assembler::Condition)$cmp$$cmpcode);
10592   %}
10593 
10594   ins_pipe(icond_reg);
10595 %}
10596 
10597 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10598   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10599 
10600   ins_cost(INSN_COST * 2);
10601   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10602 
10603   ins_encode %{
10604     __ cselw(as_Register($dst$$reg),
10605              zr,
10606              as_Register($src$$reg),
10607              (Assembler::Condition)$cmp$$cmpcode);
10608   %}
10609 
10610   ins_pipe(icond_reg);
10611 %}
10612 
10613 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10614   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10615 
10616   ins_cost(INSN_COST * 2);
10617   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10618 
10619   ins_encode %{
10620     __ cselw(as_Register($dst$$reg),
10621              as_Register($src$$reg),
10622              zr,
10623              (Assembler::Condition)$cmp$$cmpcode);
10624   %}
10625 
10626   ins_pipe(icond_reg);
10627 %}
10628 
10629 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10630   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10631 
10632   ins_cost(INSN_COST * 2);
10633   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10634 
10635   ins_encode %{
10636     __ cselw(as_Register($dst$$reg),
10637              as_Register($src$$reg),
10638              zr,
10639              (Assembler::Condition)$cmp$$cmpcode);
10640   %}
10641 
10642   ins_pipe(icond_reg);
10643 %}
10644 
10645 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10646 %{
10647   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10648 
10649   ins_cost(INSN_COST * 3);
10650 
10651   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10652   ins_encode %{
10653     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10654     __ fcsels(as_FloatRegister($dst$$reg),
10655               as_FloatRegister($src2$$reg),
10656               as_FloatRegister($src1$$reg),
10657               cond);
10658   %}
10659 
10660   ins_pipe(fp_cond_reg_reg_s);
10661 %}
10662 
10663 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10664 %{
10665   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10666 
10667   ins_cost(INSN_COST * 3);
10668 
10669   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10670   ins_encode %{
10671     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10672     __ fcsels(as_FloatRegister($dst$$reg),
10673               as_FloatRegister($src2$$reg),
10674               as_FloatRegister($src1$$reg),
10675               cond);
10676   %}
10677 
10678   ins_pipe(fp_cond_reg_reg_s);
10679 %}
10680 
10681 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10682 %{
10683   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10684 
10685   ins_cost(INSN_COST * 3);
10686 
10687   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10688   ins_encode %{
10689     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10690     __ fcseld(as_FloatRegister($dst$$reg),
10691               as_FloatRegister($src2$$reg),
10692               as_FloatRegister($src1$$reg),
10693               cond);
10694   %}
10695 
10696   ins_pipe(fp_cond_reg_reg_d);
10697 %}
10698 
10699 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10700 %{
10701   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10702 
10703   ins_cost(INSN_COST * 3);
10704 
10705   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10706   ins_encode %{
10707     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10708     __ fcseld(as_FloatRegister($dst$$reg),
10709               as_FloatRegister($src2$$reg),
10710               as_FloatRegister($src1$$reg),
10711               cond);
10712   %}
10713 
10714   ins_pipe(fp_cond_reg_reg_d);
10715 %}
10716 
10717 // ============================================================================
10718 // Arithmetic Instructions
10719 //
10720 
10721 // Integer Addition
10722 
10723 // TODO
10724 // these currently employ operations which do not set CR and hence are
10725 // not flagged as killing CR but we would like to isolate the cases
10726 // where we want to set flags from those where we don't. need to work
10727 // out how to do that.
10728 
10729 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10730   match(Set dst (AddI src1 src2));
10731 
10732   ins_cost(INSN_COST);
10733   format %{ "addw  $dst, $src1, $src2" %}
10734 
10735   ins_encode %{
10736     __ addw(as_Register($dst$$reg),
10737             as_Register($src1$$reg),
10738             as_Register($src2$$reg));
10739   %}
10740 
10741   ins_pipe(ialu_reg_reg);
10742 %}
10743 
10744 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10745   match(Set dst (AddI src1 src2));
10746 
10747   ins_cost(INSN_COST);
10748   format %{ "addw $dst, $src1, $src2" %}
10749 
10750   // use opcode to indicate that this is an add not a sub
10751   opcode(0x0);
10752 
10753   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10754 
10755   ins_pipe(ialu_reg_imm);
10756 %}
10757 
10758 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10759   match(Set dst (AddI (ConvL2I src1) src2));
10760 
10761   ins_cost(INSN_COST);
10762   format %{ "addw $dst, $src1, $src2" %}
10763 
10764   // use opcode to indicate that this is an add not a sub
10765   opcode(0x0);
10766 
10767   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10768 
10769   ins_pipe(ialu_reg_imm);
10770 %}
10771 
10772 // Pointer Addition
10773 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10774   match(Set dst (AddP src1 src2));
10775 
10776   ins_cost(INSN_COST);
10777   format %{ "add $dst, $src1, $src2\t# ptr" %}
10778 
10779   ins_encode %{
10780     __ add(as_Register($dst$$reg),
10781            as_Register($src1$$reg),
10782            as_Register($src2$$reg));
10783   %}
10784 
10785   ins_pipe(ialu_reg_reg);
10786 %}
10787 
10788 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10789   match(Set dst (AddP src1 (ConvI2L src2)));
10790 
10791   ins_cost(1.9 * INSN_COST);
10792   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10793 
10794   ins_encode %{
10795     __ add(as_Register($dst$$reg),
10796            as_Register($src1$$reg),
10797            as_Register($src2$$reg), ext::sxtw);
10798   %}
10799 
10800   ins_pipe(ialu_reg_reg);
10801 %}
10802 
10803 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10804   match(Set dst (AddP src1 (LShiftL src2 scale)));
10805 
10806   ins_cost(1.9 * INSN_COST);
10807   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10808 
10809   ins_encode %{
10810     __ lea(as_Register($dst$$reg),
10811            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10812                    Address::lsl($scale$$constant)));
10813   %}
10814 
10815   ins_pipe(ialu_reg_reg_shift);
10816 %}
10817 
10818 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10819   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10820 
10821   ins_cost(1.9 * INSN_COST);
10822   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10823 
10824   ins_encode %{
10825     __ lea(as_Register($dst$$reg),
10826            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10827                    Address::sxtw($scale$$constant)));
10828   %}
10829 
10830   ins_pipe(ialu_reg_reg_shift);
10831 %}
10832 
10833 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10834   match(Set dst (LShiftL (ConvI2L src) scale));
10835 
10836   ins_cost(INSN_COST);
10837   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10838 
10839   ins_encode %{
10840     __ sbfiz(as_Register($dst$$reg),
10841           as_Register($src$$reg),
10842           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10843   %}
10844 
10845   ins_pipe(ialu_reg_shift);
10846 %}
10847 
10848 // Pointer Immediate Addition
10849 // n.b. this needs to be more expensive than using an indirect memory
10850 // operand
10851 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10852   match(Set dst (AddP src1 src2));
10853 
10854   ins_cost(INSN_COST);
10855   format %{ "add $dst, $src1, $src2\t# ptr" %}
10856 
10857   // use opcode to indicate that this is an add not a sub
10858   opcode(0x0);
10859 
10860   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10861 
10862   ins_pipe(ialu_reg_imm);
10863 %}
10864 
10865 // Long Addition
10866 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10867 
10868   match(Set dst (AddL src1 src2));
10869 
10870   ins_cost(INSN_COST);
10871   format %{ "add  $dst, $src1, $src2" %}
10872 
10873   ins_encode %{
10874     __ add(as_Register($dst$$reg),
10875            as_Register($src1$$reg),
10876            as_Register($src2$$reg));
10877   %}
10878 
10879   ins_pipe(ialu_reg_reg);
10880 %}
10881 
10882 // No constant pool entries requiredLong Immediate Addition.
10883 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10884   match(Set dst (AddL src1 src2));
10885 
10886   ins_cost(INSN_COST);
10887   format %{ "add $dst, $src1, $src2" %}
10888 
10889   // use opcode to indicate that this is an add not a sub
10890   opcode(0x0);
10891 
10892   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10893 
10894   ins_pipe(ialu_reg_imm);
10895 %}
10896 
10897 // Integer Subtraction
10898 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10899   match(Set dst (SubI src1 src2));
10900 
10901   ins_cost(INSN_COST);
10902   format %{ "subw  $dst, $src1, $src2" %}
10903 
10904   ins_encode %{
10905     __ subw(as_Register($dst$$reg),
10906             as_Register($src1$$reg),
10907             as_Register($src2$$reg));
10908   %}
10909 
10910   ins_pipe(ialu_reg_reg);
10911 %}
10912 
10913 // Immediate Subtraction
10914 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10915   match(Set dst (SubI src1 src2));
10916 
10917   ins_cost(INSN_COST);
10918   format %{ "subw $dst, $src1, $src2" %}
10919 
10920   // use opcode to indicate that this is a sub not an add
10921   opcode(0x1);
10922 
10923   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10924 
10925   ins_pipe(ialu_reg_imm);
10926 %}
10927 
10928 // Long Subtraction
10929 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10930 
10931   match(Set dst (SubL src1 src2));
10932 
10933   ins_cost(INSN_COST);
10934   format %{ "sub  $dst, $src1, $src2" %}
10935 
10936   ins_encode %{
10937     __ sub(as_Register($dst$$reg),
10938            as_Register($src1$$reg),
10939            as_Register($src2$$reg));
10940   %}
10941 
10942   ins_pipe(ialu_reg_reg);
10943 %}
10944 
10945 // No constant pool entries requiredLong Immediate Subtraction.
10946 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10947   match(Set dst (SubL src1 src2));
10948 
10949   ins_cost(INSN_COST);
10950   format %{ "sub$dst, $src1, $src2" %}
10951 
10952   // use opcode to indicate that this is a sub not an add
10953   opcode(0x1);
10954 
10955   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10956 
10957   ins_pipe(ialu_reg_imm);
10958 %}
10959 
10960 // Integer Negation (special case for sub)
10961 
10962 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10963   match(Set dst (SubI zero src));
10964 
10965   ins_cost(INSN_COST);
10966   format %{ "negw $dst, $src\t# int" %}
10967 
10968   ins_encode %{
10969     __ negw(as_Register($dst$$reg),
10970             as_Register($src$$reg));
10971   %}
10972 
10973   ins_pipe(ialu_reg);
10974 %}
10975 
10976 // Long Negation
10977 
10978 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
10979   match(Set dst (SubL zero src));
10980 
10981   ins_cost(INSN_COST);
10982   format %{ "neg $dst, $src\t# long" %}
10983 
10984   ins_encode %{
10985     __ neg(as_Register($dst$$reg),
10986            as_Register($src$$reg));
10987   %}
10988 
10989   ins_pipe(ialu_reg);
10990 %}
10991 
10992 // Integer Multiply
10993 
10994 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10995   match(Set dst (MulI src1 src2));
10996 
10997   ins_cost(INSN_COST * 3);
10998   format %{ "mulw  $dst, $src1, $src2" %}
10999 
11000   ins_encode %{
11001     __ mulw(as_Register($dst$$reg),
11002             as_Register($src1$$reg),
11003             as_Register($src2$$reg));
11004   %}
11005 
11006   ins_pipe(imul_reg_reg);
11007 %}
11008 
11009 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11010   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11011 
11012   ins_cost(INSN_COST * 3);
11013   format %{ "smull  $dst, $src1, $src2" %}
11014 
11015   ins_encode %{
11016     __ smull(as_Register($dst$$reg),
11017              as_Register($src1$$reg),
11018              as_Register($src2$$reg));
11019   %}
11020 
11021   ins_pipe(imul_reg_reg);
11022 %}
11023 
11024 // Long Multiply
11025 
11026 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11027   match(Set dst (MulL src1 src2));
11028 
11029   ins_cost(INSN_COST * 5);
11030   format %{ "mul  $dst, $src1, $src2" %}
11031 
11032   ins_encode %{
11033     __ mul(as_Register($dst$$reg),
11034            as_Register($src1$$reg),
11035            as_Register($src2$$reg));
11036   %}
11037 
11038   ins_pipe(lmul_reg_reg);
11039 %}
11040 
11041 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11042 %{
11043   match(Set dst (MulHiL src1 src2));
11044 
11045   ins_cost(INSN_COST * 7);
11046   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11047 
11048   ins_encode %{
11049     __ smulh(as_Register($dst$$reg),
11050              as_Register($src1$$reg),
11051              as_Register($src2$$reg));
11052   %}
11053 
11054   ins_pipe(lmul_reg_reg);
11055 %}
11056 
11057 // Combined Integer Multiply & Add/Sub
11058 
11059 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11060   match(Set dst (AddI src3 (MulI src1 src2)));
11061 
11062   ins_cost(INSN_COST * 3);
11063   format %{ "madd  $dst, $src1, $src2, $src3" %}
11064 
11065   ins_encode %{
11066     __ maddw(as_Register($dst$$reg),
11067              as_Register($src1$$reg),
11068              as_Register($src2$$reg),
11069              as_Register($src3$$reg));
11070   %}
11071 
11072   ins_pipe(imac_reg_reg);
11073 %}
11074 
11075 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11076   match(Set dst (SubI src3 (MulI src1 src2)));
11077 
11078   ins_cost(INSN_COST * 3);
11079   format %{ "msub  $dst, $src1, $src2, $src3" %}
11080 
11081   ins_encode %{
11082     __ msubw(as_Register($dst$$reg),
11083              as_Register($src1$$reg),
11084              as_Register($src2$$reg),
11085              as_Register($src3$$reg));
11086   %}
11087 
11088   ins_pipe(imac_reg_reg);
11089 %}
11090 
11091 // Combined Long Multiply & Add/Sub
11092 
11093 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11094   match(Set dst (AddL src3 (MulL src1 src2)));
11095 
11096   ins_cost(INSN_COST * 5);
11097   format %{ "madd  $dst, $src1, $src2, $src3" %}
11098 
11099   ins_encode %{
11100     __ madd(as_Register($dst$$reg),
11101             as_Register($src1$$reg),
11102             as_Register($src2$$reg),
11103             as_Register($src3$$reg));
11104   %}
11105 
11106   ins_pipe(lmac_reg_reg);
11107 %}
11108 
11109 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11110   match(Set dst (SubL src3 (MulL src1 src2)));
11111 
11112   ins_cost(INSN_COST * 5);
11113   format %{ "msub  $dst, $src1, $src2, $src3" %}
11114 
11115   ins_encode %{
11116     __ msub(as_Register($dst$$reg),
11117             as_Register($src1$$reg),
11118             as_Register($src2$$reg),
11119             as_Register($src3$$reg));
11120   %}
11121 
11122   ins_pipe(lmac_reg_reg);
11123 %}
11124 
11125 // Integer Divide
11126 
11127 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11128   match(Set dst (DivI src1 src2));
11129 
11130   ins_cost(INSN_COST * 19);
11131   format %{ "sdivw  $dst, $src1, $src2" %}
11132 
11133   ins_encode(aarch64_enc_divw(dst, src1, src2));
11134   ins_pipe(idiv_reg_reg);
11135 %}
11136 
11137 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11138   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11139   ins_cost(INSN_COST);
11140   format %{ "lsrw $dst, $src1, $div1" %}
11141   ins_encode %{
11142     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11143   %}
11144   ins_pipe(ialu_reg_shift);
11145 %}
11146 
11147 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11148   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11149   ins_cost(INSN_COST);
11150   format %{ "addw $dst, $src, LSR $div1" %}
11151 
11152   ins_encode %{
11153     __ addw(as_Register($dst$$reg),
11154               as_Register($src$$reg),
11155               as_Register($src$$reg),
11156               Assembler::LSR, 31);
11157   %}
11158   ins_pipe(ialu_reg);
11159 %}
11160 
11161 // Long Divide
11162 
11163 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11164   match(Set dst (DivL src1 src2));
11165 
11166   ins_cost(INSN_COST * 35);
11167   format %{ "sdiv   $dst, $src1, $src2" %}
11168 
11169   ins_encode(aarch64_enc_div(dst, src1, src2));
11170   ins_pipe(ldiv_reg_reg);
11171 %}
11172 
11173 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11174   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11175   ins_cost(INSN_COST);
11176   format %{ "lsr $dst, $src1, $div1" %}
11177   ins_encode %{
11178     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11179   %}
11180   ins_pipe(ialu_reg_shift);
11181 %}
11182 
11183 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11184   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11185   ins_cost(INSN_COST);
11186   format %{ "add $dst, $src, $div1" %}
11187 
11188   ins_encode %{
11189     __ add(as_Register($dst$$reg),
11190               as_Register($src$$reg),
11191               as_Register($src$$reg),
11192               Assembler::LSR, 63);
11193   %}
11194   ins_pipe(ialu_reg);
11195 %}
11196 
11197 // Integer Remainder
11198 
11199 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11200   match(Set dst (ModI src1 src2));
11201 
11202   ins_cost(INSN_COST * 22);
11203   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11204             "msubw($dst, rscratch1, $src2, $src1" %}
11205 
11206   ins_encode(aarch64_enc_modw(dst, src1, src2));
11207   ins_pipe(idiv_reg_reg);
11208 %}
11209 
11210 // Long Remainder
11211 
11212 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11213   match(Set dst (ModL src1 src2));
11214 
11215   ins_cost(INSN_COST * 38);
11216   format %{ "sdiv   rscratch1, $src1, $src2\n"
11217             "msub($dst, rscratch1, $src2, $src1" %}
11218 
11219   ins_encode(aarch64_enc_mod(dst, src1, src2));
11220   ins_pipe(ldiv_reg_reg);
11221 %}
11222 
11223 // Integer Shifts
11224 
11225 // Shift Left Register
11226 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11227   match(Set dst (LShiftI src1 src2));
11228 
11229   ins_cost(INSN_COST * 2);
11230   format %{ "lslvw  $dst, $src1, $src2" %}
11231 
11232   ins_encode %{
11233     __ lslvw(as_Register($dst$$reg),
11234              as_Register($src1$$reg),
11235              as_Register($src2$$reg));
11236   %}
11237 
11238   ins_pipe(ialu_reg_reg_vshift);
11239 %}
11240 
11241 // Shift Left Immediate
11242 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11243   match(Set dst (LShiftI src1 src2));
11244 
11245   ins_cost(INSN_COST);
11246   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11247 
11248   ins_encode %{
11249     __ lslw(as_Register($dst$$reg),
11250             as_Register($src1$$reg),
11251             $src2$$constant & 0x1f);
11252   %}
11253 
11254   ins_pipe(ialu_reg_shift);
11255 %}
11256 
11257 // Shift Right Logical Register
11258 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11259   match(Set dst (URShiftI src1 src2));
11260 
11261   ins_cost(INSN_COST * 2);
11262   format %{ "lsrvw  $dst, $src1, $src2" %}
11263 
11264   ins_encode %{
11265     __ lsrvw(as_Register($dst$$reg),
11266              as_Register($src1$$reg),
11267              as_Register($src2$$reg));
11268   %}
11269 
11270   ins_pipe(ialu_reg_reg_vshift);
11271 %}
11272 
11273 // Shift Right Logical Immediate
11274 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11275   match(Set dst (URShiftI src1 src2));
11276 
11277   ins_cost(INSN_COST);
11278   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11279 
11280   ins_encode %{
11281     __ lsrw(as_Register($dst$$reg),
11282             as_Register($src1$$reg),
11283             $src2$$constant & 0x1f);
11284   %}
11285 
11286   ins_pipe(ialu_reg_shift);
11287 %}
11288 
11289 // Shift Right Arithmetic Register
11290 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11291   match(Set dst (RShiftI src1 src2));
11292 
11293   ins_cost(INSN_COST * 2);
11294   format %{ "asrvw  $dst, $src1, $src2" %}
11295 
11296   ins_encode %{
11297     __ asrvw(as_Register($dst$$reg),
11298              as_Register($src1$$reg),
11299              as_Register($src2$$reg));
11300   %}
11301 
11302   ins_pipe(ialu_reg_reg_vshift);
11303 %}
11304 
11305 // Shift Right Arithmetic Immediate
11306 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11307   match(Set dst (RShiftI src1 src2));
11308 
11309   ins_cost(INSN_COST);
11310   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11311 
11312   ins_encode %{
11313     __ asrw(as_Register($dst$$reg),
11314             as_Register($src1$$reg),
11315             $src2$$constant & 0x1f);
11316   %}
11317 
11318   ins_pipe(ialu_reg_shift);
11319 %}
11320 
11321 // Combined Int Mask and Right Shift (using UBFM)
11322 // TODO
11323 
11324 // Long Shifts
11325 
11326 // Shift Left Register
11327 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11328   match(Set dst (LShiftL src1 src2));
11329 
11330   ins_cost(INSN_COST * 2);
11331   format %{ "lslv  $dst, $src1, $src2" %}
11332 
11333   ins_encode %{
11334     __ lslv(as_Register($dst$$reg),
11335             as_Register($src1$$reg),
11336             as_Register($src2$$reg));
11337   %}
11338 
11339   ins_pipe(ialu_reg_reg_vshift);
11340 %}
11341 
11342 // Shift Left Immediate
11343 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11344   match(Set dst (LShiftL src1 src2));
11345 
11346   ins_cost(INSN_COST);
11347   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11348 
11349   ins_encode %{
11350     __ lsl(as_Register($dst$$reg),
11351             as_Register($src1$$reg),
11352             $src2$$constant & 0x3f);
11353   %}
11354 
11355   ins_pipe(ialu_reg_shift);
11356 %}
11357 
11358 // Shift Right Logical Register
11359 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11360   match(Set dst (URShiftL src1 src2));
11361 
11362   ins_cost(INSN_COST * 2);
11363   format %{ "lsrv  $dst, $src1, $src2" %}
11364 
11365   ins_encode %{
11366     __ lsrv(as_Register($dst$$reg),
11367             as_Register($src1$$reg),
11368             as_Register($src2$$reg));
11369   %}
11370 
11371   ins_pipe(ialu_reg_reg_vshift);
11372 %}
11373 
11374 // Shift Right Logical Immediate
11375 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11376   match(Set dst (URShiftL src1 src2));
11377 
11378   ins_cost(INSN_COST);
11379   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11380 
11381   ins_encode %{
11382     __ lsr(as_Register($dst$$reg),
11383            as_Register($src1$$reg),
11384            $src2$$constant & 0x3f);
11385   %}
11386 
11387   ins_pipe(ialu_reg_shift);
11388 %}
11389 
11390 // A special-case pattern for card table stores.
11391 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11392   match(Set dst (URShiftL (CastP2X src1) src2));
11393 
11394   ins_cost(INSN_COST);
11395   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11396 
11397   ins_encode %{
11398     __ lsr(as_Register($dst$$reg),
11399            as_Register($src1$$reg),
11400            $src2$$constant & 0x3f);
11401   %}
11402 
11403   ins_pipe(ialu_reg_shift);
11404 %}
11405 
11406 // Shift Right Arithmetic Register
11407 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11408   match(Set dst (RShiftL src1 src2));
11409 
11410   ins_cost(INSN_COST * 2);
11411   format %{ "asrv  $dst, $src1, $src2" %}
11412 
11413   ins_encode %{
11414     __ asrv(as_Register($dst$$reg),
11415             as_Register($src1$$reg),
11416             as_Register($src2$$reg));
11417   %}
11418 
11419   ins_pipe(ialu_reg_reg_vshift);
11420 %}
11421 
11422 // Shift Right Arithmetic Immediate
11423 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11424   match(Set dst (RShiftL src1 src2));
11425 
11426   ins_cost(INSN_COST);
11427   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11428 
11429   ins_encode %{
11430     __ asr(as_Register($dst$$reg),
11431            as_Register($src1$$reg),
11432            $src2$$constant & 0x3f);
11433   %}
11434 
11435   ins_pipe(ialu_reg_shift);
11436 %}
11437 
11438 // BEGIN This section of the file is automatically generated. Do not edit --------------
11439 
11440 instruct regL_not_reg(iRegLNoSp dst,
11441                          iRegL src1, immL_M1 m1,
11442                          rFlagsReg cr) %{
11443   match(Set dst (XorL src1 m1));
11444   ins_cost(INSN_COST);
11445   format %{ "eon  $dst, $src1, zr" %}
11446 
11447   ins_encode %{
11448     __ eon(as_Register($dst$$reg),
11449               as_Register($src1$$reg),
11450               zr,
11451               Assembler::LSL, 0);
11452   %}
11453 
11454   ins_pipe(ialu_reg);
11455 %}
11456 instruct regI_not_reg(iRegINoSp dst,
11457                          iRegIorL2I src1, immI_M1 m1,
11458                          rFlagsReg cr) %{
11459   match(Set dst (XorI src1 m1));
11460   ins_cost(INSN_COST);
11461   format %{ "eonw  $dst, $src1, zr" %}
11462 
11463   ins_encode %{
11464     __ eonw(as_Register($dst$$reg),
11465               as_Register($src1$$reg),
11466               zr,
11467               Assembler::LSL, 0);
11468   %}
11469 
11470   ins_pipe(ialu_reg);
11471 %}
11472 
11473 instruct AndI_reg_not_reg(iRegINoSp dst,
11474                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11475                          rFlagsReg cr) %{
11476   match(Set dst (AndI src1 (XorI src2 m1)));
11477   ins_cost(INSN_COST);
11478   format %{ "bicw  $dst, $src1, $src2" %}
11479 
11480   ins_encode %{
11481     __ bicw(as_Register($dst$$reg),
11482               as_Register($src1$$reg),
11483               as_Register($src2$$reg),
11484               Assembler::LSL, 0);
11485   %}
11486 
11487   ins_pipe(ialu_reg_reg);
11488 %}
11489 
11490 instruct AndL_reg_not_reg(iRegLNoSp dst,
11491                          iRegL src1, iRegL src2, immL_M1 m1,
11492                          rFlagsReg cr) %{
11493   match(Set dst (AndL src1 (XorL src2 m1)));
11494   ins_cost(INSN_COST);
11495   format %{ "bic  $dst, $src1, $src2" %}
11496 
11497   ins_encode %{
11498     __ bic(as_Register($dst$$reg),
11499               as_Register($src1$$reg),
11500               as_Register($src2$$reg),
11501               Assembler::LSL, 0);
11502   %}
11503 
11504   ins_pipe(ialu_reg_reg);
11505 %}
11506 
11507 instruct OrI_reg_not_reg(iRegINoSp dst,
11508                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11509                          rFlagsReg cr) %{
11510   match(Set dst (OrI src1 (XorI src2 m1)));
11511   ins_cost(INSN_COST);
11512   format %{ "ornw  $dst, $src1, $src2" %}
11513 
11514   ins_encode %{
11515     __ ornw(as_Register($dst$$reg),
11516               as_Register($src1$$reg),
11517               as_Register($src2$$reg),
11518               Assembler::LSL, 0);
11519   %}
11520 
11521   ins_pipe(ialu_reg_reg);
11522 %}
11523 
11524 instruct OrL_reg_not_reg(iRegLNoSp dst,
11525                          iRegL src1, iRegL src2, immL_M1 m1,
11526                          rFlagsReg cr) %{
11527   match(Set dst (OrL src1 (XorL src2 m1)));
11528   ins_cost(INSN_COST);
11529   format %{ "orn  $dst, $src1, $src2" %}
11530 
11531   ins_encode %{
11532     __ orn(as_Register($dst$$reg),
11533               as_Register($src1$$reg),
11534               as_Register($src2$$reg),
11535               Assembler::LSL, 0);
11536   %}
11537 
11538   ins_pipe(ialu_reg_reg);
11539 %}
11540 
11541 instruct XorI_reg_not_reg(iRegINoSp dst,
11542                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11543                          rFlagsReg cr) %{
11544   match(Set dst (XorI m1 (XorI src2 src1)));
11545   ins_cost(INSN_COST);
11546   format %{ "eonw  $dst, $src1, $src2" %}
11547 
11548   ins_encode %{
11549     __ eonw(as_Register($dst$$reg),
11550               as_Register($src1$$reg),
11551               as_Register($src2$$reg),
11552               Assembler::LSL, 0);
11553   %}
11554 
11555   ins_pipe(ialu_reg_reg);
11556 %}
11557 
11558 instruct XorL_reg_not_reg(iRegLNoSp dst,
11559                          iRegL src1, iRegL src2, immL_M1 m1,
11560                          rFlagsReg cr) %{
11561   match(Set dst (XorL m1 (XorL src2 src1)));
11562   ins_cost(INSN_COST);
11563   format %{ "eon  $dst, $src1, $src2" %}
11564 
11565   ins_encode %{
11566     __ eon(as_Register($dst$$reg),
11567               as_Register($src1$$reg),
11568               as_Register($src2$$reg),
11569               Assembler::LSL, 0);
11570   %}
11571 
11572   ins_pipe(ialu_reg_reg);
11573 %}
11574 
11575 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11576                          iRegIorL2I src1, iRegIorL2I src2,
11577                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11578   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11579   ins_cost(1.9 * INSN_COST);
11580   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11581 
11582   ins_encode %{
11583     __ bicw(as_Register($dst$$reg),
11584               as_Register($src1$$reg),
11585               as_Register($src2$$reg),
11586               Assembler::LSR,
11587               $src3$$constant & 0x1f);
11588   %}
11589 
11590   ins_pipe(ialu_reg_reg_shift);
11591 %}
11592 
11593 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11594                          iRegL src1, iRegL src2,
11595                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11596   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11597   ins_cost(1.9 * INSN_COST);
11598   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11599 
11600   ins_encode %{
11601     __ bic(as_Register($dst$$reg),
11602               as_Register($src1$$reg),
11603               as_Register($src2$$reg),
11604               Assembler::LSR,
11605               $src3$$constant & 0x3f);
11606   %}
11607 
11608   ins_pipe(ialu_reg_reg_shift);
11609 %}
11610 
11611 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11612                          iRegIorL2I src1, iRegIorL2I src2,
11613                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11614   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11615   ins_cost(1.9 * INSN_COST);
11616   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11617 
11618   ins_encode %{
11619     __ bicw(as_Register($dst$$reg),
11620               as_Register($src1$$reg),
11621               as_Register($src2$$reg),
11622               Assembler::ASR,
11623               $src3$$constant & 0x1f);
11624   %}
11625 
11626   ins_pipe(ialu_reg_reg_shift);
11627 %}
11628 
11629 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11630                          iRegL src1, iRegL src2,
11631                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11632   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11633   ins_cost(1.9 * INSN_COST);
11634   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11635 
11636   ins_encode %{
11637     __ bic(as_Register($dst$$reg),
11638               as_Register($src1$$reg),
11639               as_Register($src2$$reg),
11640               Assembler::ASR,
11641               $src3$$constant & 0x3f);
11642   %}
11643 
11644   ins_pipe(ialu_reg_reg_shift);
11645 %}
11646 
11647 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11648                          iRegIorL2I src1, iRegIorL2I src2,
11649                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11650   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11651   ins_cost(1.9 * INSN_COST);
11652   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11653 
11654   ins_encode %{
11655     __ bicw(as_Register($dst$$reg),
11656               as_Register($src1$$reg),
11657               as_Register($src2$$reg),
11658               Assembler::LSL,
11659               $src3$$constant & 0x1f);
11660   %}
11661 
11662   ins_pipe(ialu_reg_reg_shift);
11663 %}
11664 
11665 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11666                          iRegL src1, iRegL src2,
11667                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11668   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11669   ins_cost(1.9 * INSN_COST);
11670   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11671 
11672   ins_encode %{
11673     __ bic(as_Register($dst$$reg),
11674               as_Register($src1$$reg),
11675               as_Register($src2$$reg),
11676               Assembler::LSL,
11677               $src3$$constant & 0x3f);
11678   %}
11679 
11680   ins_pipe(ialu_reg_reg_shift);
11681 %}
11682 
11683 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11684                          iRegIorL2I src1, iRegIorL2I src2,
11685                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11686   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11687   ins_cost(1.9 * INSN_COST);
11688   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11689 
11690   ins_encode %{
11691     __ eonw(as_Register($dst$$reg),
11692               as_Register($src1$$reg),
11693               as_Register($src2$$reg),
11694               Assembler::LSR,
11695               $src3$$constant & 0x1f);
11696   %}
11697 
11698   ins_pipe(ialu_reg_reg_shift);
11699 %}
11700 
11701 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11702                          iRegL src1, iRegL src2,
11703                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11704   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11705   ins_cost(1.9 * INSN_COST);
11706   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11707 
11708   ins_encode %{
11709     __ eon(as_Register($dst$$reg),
11710               as_Register($src1$$reg),
11711               as_Register($src2$$reg),
11712               Assembler::LSR,
11713               $src3$$constant & 0x3f);
11714   %}
11715 
11716   ins_pipe(ialu_reg_reg_shift);
11717 %}
11718 
11719 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11720                          iRegIorL2I src1, iRegIorL2I src2,
11721                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11722   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11723   ins_cost(1.9 * INSN_COST);
11724   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11725 
11726   ins_encode %{
11727     __ eonw(as_Register($dst$$reg),
11728               as_Register($src1$$reg),
11729               as_Register($src2$$reg),
11730               Assembler::ASR,
11731               $src3$$constant & 0x1f);
11732   %}
11733 
11734   ins_pipe(ialu_reg_reg_shift);
11735 %}
11736 
11737 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11738                          iRegL src1, iRegL src2,
11739                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11740   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11741   ins_cost(1.9 * INSN_COST);
11742   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11743 
11744   ins_encode %{
11745     __ eon(as_Register($dst$$reg),
11746               as_Register($src1$$reg),
11747               as_Register($src2$$reg),
11748               Assembler::ASR,
11749               $src3$$constant & 0x3f);
11750   %}
11751 
11752   ins_pipe(ialu_reg_reg_shift);
11753 %}
11754 
11755 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11756                          iRegIorL2I src1, iRegIorL2I src2,
11757                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11758   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11759   ins_cost(1.9 * INSN_COST);
11760   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11761 
11762   ins_encode %{
11763     __ eonw(as_Register($dst$$reg),
11764               as_Register($src1$$reg),
11765               as_Register($src2$$reg),
11766               Assembler::LSL,
11767               $src3$$constant & 0x1f);
11768   %}
11769 
11770   ins_pipe(ialu_reg_reg_shift);
11771 %}
11772 
11773 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11774                          iRegL src1, iRegL src2,
11775                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11776   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11777   ins_cost(1.9 * INSN_COST);
11778   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11779 
11780   ins_encode %{
11781     __ eon(as_Register($dst$$reg),
11782               as_Register($src1$$reg),
11783               as_Register($src2$$reg),
11784               Assembler::LSL,
11785               $src3$$constant & 0x3f);
11786   %}
11787 
11788   ins_pipe(ialu_reg_reg_shift);
11789 %}
11790 
11791 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11792                          iRegIorL2I src1, iRegIorL2I src2,
11793                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11794   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11795   ins_cost(1.9 * INSN_COST);
11796   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11797 
11798   ins_encode %{
11799     __ ornw(as_Register($dst$$reg),
11800               as_Register($src1$$reg),
11801               as_Register($src2$$reg),
11802               Assembler::LSR,
11803               $src3$$constant & 0x1f);
11804   %}
11805 
11806   ins_pipe(ialu_reg_reg_shift);
11807 %}
11808 
11809 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11810                          iRegL src1, iRegL src2,
11811                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11812   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11813   ins_cost(1.9 * INSN_COST);
11814   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11815 
11816   ins_encode %{
11817     __ orn(as_Register($dst$$reg),
11818               as_Register($src1$$reg),
11819               as_Register($src2$$reg),
11820               Assembler::LSR,
11821               $src3$$constant & 0x3f);
11822   %}
11823 
11824   ins_pipe(ialu_reg_reg_shift);
11825 %}
11826 
11827 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11828                          iRegIorL2I src1, iRegIorL2I src2,
11829                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11830   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11831   ins_cost(1.9 * INSN_COST);
11832   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11833 
11834   ins_encode %{
11835     __ ornw(as_Register($dst$$reg),
11836               as_Register($src1$$reg),
11837               as_Register($src2$$reg),
11838               Assembler::ASR,
11839               $src3$$constant & 0x1f);
11840   %}
11841 
11842   ins_pipe(ialu_reg_reg_shift);
11843 %}
11844 
11845 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11846                          iRegL src1, iRegL src2,
11847                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11848   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11849   ins_cost(1.9 * INSN_COST);
11850   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11851 
11852   ins_encode %{
11853     __ orn(as_Register($dst$$reg),
11854               as_Register($src1$$reg),
11855               as_Register($src2$$reg),
11856               Assembler::ASR,
11857               $src3$$constant & 0x3f);
11858   %}
11859 
11860   ins_pipe(ialu_reg_reg_shift);
11861 %}
11862 
11863 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11864                          iRegIorL2I src1, iRegIorL2I src2,
11865                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11866   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11867   ins_cost(1.9 * INSN_COST);
11868   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11869 
11870   ins_encode %{
11871     __ ornw(as_Register($dst$$reg),
11872               as_Register($src1$$reg),
11873               as_Register($src2$$reg),
11874               Assembler::LSL,
11875               $src3$$constant & 0x1f);
11876   %}
11877 
11878   ins_pipe(ialu_reg_reg_shift);
11879 %}
11880 
11881 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11882                          iRegL src1, iRegL src2,
11883                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11884   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11885   ins_cost(1.9 * INSN_COST);
11886   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11887 
11888   ins_encode %{
11889     __ orn(as_Register($dst$$reg),
11890               as_Register($src1$$reg),
11891               as_Register($src2$$reg),
11892               Assembler::LSL,
11893               $src3$$constant & 0x3f);
11894   %}
11895 
11896   ins_pipe(ialu_reg_reg_shift);
11897 %}
11898 
11899 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11900                          iRegIorL2I src1, iRegIorL2I src2,
11901                          immI src3, rFlagsReg cr) %{
11902   match(Set dst (AndI src1 (URShiftI src2 src3)));
11903 
11904   ins_cost(1.9 * INSN_COST);
11905   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11906 
11907   ins_encode %{
11908     __ andw(as_Register($dst$$reg),
11909               as_Register($src1$$reg),
11910               as_Register($src2$$reg),
11911               Assembler::LSR,
11912               $src3$$constant & 0x1f);
11913   %}
11914 
11915   ins_pipe(ialu_reg_reg_shift);
11916 %}
11917 
11918 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11919                          iRegL src1, iRegL src2,
11920                          immI src3, rFlagsReg cr) %{
11921   match(Set dst (AndL src1 (URShiftL src2 src3)));
11922 
11923   ins_cost(1.9 * INSN_COST);
11924   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11925 
11926   ins_encode %{
11927     __ andr(as_Register($dst$$reg),
11928               as_Register($src1$$reg),
11929               as_Register($src2$$reg),
11930               Assembler::LSR,
11931               $src3$$constant & 0x3f);
11932   %}
11933 
11934   ins_pipe(ialu_reg_reg_shift);
11935 %}
11936 
11937 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11938                          iRegIorL2I src1, iRegIorL2I src2,
11939                          immI src3, rFlagsReg cr) %{
11940   match(Set dst (AndI src1 (RShiftI src2 src3)));
11941 
11942   ins_cost(1.9 * INSN_COST);
11943   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11944 
11945   ins_encode %{
11946     __ andw(as_Register($dst$$reg),
11947               as_Register($src1$$reg),
11948               as_Register($src2$$reg),
11949               Assembler::ASR,
11950               $src3$$constant & 0x1f);
11951   %}
11952 
11953   ins_pipe(ialu_reg_reg_shift);
11954 %}
11955 
11956 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11957                          iRegL src1, iRegL src2,
11958                          immI src3, rFlagsReg cr) %{
11959   match(Set dst (AndL src1 (RShiftL src2 src3)));
11960 
11961   ins_cost(1.9 * INSN_COST);
11962   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
11963 
11964   ins_encode %{
11965     __ andr(as_Register($dst$$reg),
11966               as_Register($src1$$reg),
11967               as_Register($src2$$reg),
11968               Assembler::ASR,
11969               $src3$$constant & 0x3f);
11970   %}
11971 
11972   ins_pipe(ialu_reg_reg_shift);
11973 %}
11974 
11975 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11976                          iRegIorL2I src1, iRegIorL2I src2,
11977                          immI src3, rFlagsReg cr) %{
11978   match(Set dst (AndI src1 (LShiftI src2 src3)));
11979 
11980   ins_cost(1.9 * INSN_COST);
11981   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
11982 
11983   ins_encode %{
11984     __ andw(as_Register($dst$$reg),
11985               as_Register($src1$$reg),
11986               as_Register($src2$$reg),
11987               Assembler::LSL,
11988               $src3$$constant & 0x1f);
11989   %}
11990 
11991   ins_pipe(ialu_reg_reg_shift);
11992 %}
11993 
11994 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11995                          iRegL src1, iRegL src2,
11996                          immI src3, rFlagsReg cr) %{
11997   match(Set dst (AndL src1 (LShiftL src2 src3)));
11998 
11999   ins_cost(1.9 * INSN_COST);
12000   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12001 
12002   ins_encode %{
12003     __ andr(as_Register($dst$$reg),
12004               as_Register($src1$$reg),
12005               as_Register($src2$$reg),
12006               Assembler::LSL,
12007               $src3$$constant & 0x3f);
12008   %}
12009 
12010   ins_pipe(ialu_reg_reg_shift);
12011 %}
12012 
12013 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12014                          iRegIorL2I src1, iRegIorL2I src2,
12015                          immI src3, rFlagsReg cr) %{
12016   match(Set dst (XorI src1 (URShiftI src2 src3)));
12017 
12018   ins_cost(1.9 * INSN_COST);
12019   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12020 
12021   ins_encode %{
12022     __ eorw(as_Register($dst$$reg),
12023               as_Register($src1$$reg),
12024               as_Register($src2$$reg),
12025               Assembler::LSR,
12026               $src3$$constant & 0x1f);
12027   %}
12028 
12029   ins_pipe(ialu_reg_reg_shift);
12030 %}
12031 
12032 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12033                          iRegL src1, iRegL src2,
12034                          immI src3, rFlagsReg cr) %{
12035   match(Set dst (XorL src1 (URShiftL src2 src3)));
12036 
12037   ins_cost(1.9 * INSN_COST);
12038   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12039 
12040   ins_encode %{
12041     __ eor(as_Register($dst$$reg),
12042               as_Register($src1$$reg),
12043               as_Register($src2$$reg),
12044               Assembler::LSR,
12045               $src3$$constant & 0x3f);
12046   %}
12047 
12048   ins_pipe(ialu_reg_reg_shift);
12049 %}
12050 
12051 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12052                          iRegIorL2I src1, iRegIorL2I src2,
12053                          immI src3, rFlagsReg cr) %{
12054   match(Set dst (XorI src1 (RShiftI src2 src3)));
12055 
12056   ins_cost(1.9 * INSN_COST);
12057   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12058 
12059   ins_encode %{
12060     __ eorw(as_Register($dst$$reg),
12061               as_Register($src1$$reg),
12062               as_Register($src2$$reg),
12063               Assembler::ASR,
12064               $src3$$constant & 0x1f);
12065   %}
12066 
12067   ins_pipe(ialu_reg_reg_shift);
12068 %}
12069 
12070 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12071                          iRegL src1, iRegL src2,
12072                          immI src3, rFlagsReg cr) %{
12073   match(Set dst (XorL src1 (RShiftL src2 src3)));
12074 
12075   ins_cost(1.9 * INSN_COST);
12076   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12077 
12078   ins_encode %{
12079     __ eor(as_Register($dst$$reg),
12080               as_Register($src1$$reg),
12081               as_Register($src2$$reg),
12082               Assembler::ASR,
12083               $src3$$constant & 0x3f);
12084   %}
12085 
12086   ins_pipe(ialu_reg_reg_shift);
12087 %}
12088 
12089 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12090                          iRegIorL2I src1, iRegIorL2I src2,
12091                          immI src3, rFlagsReg cr) %{
12092   match(Set dst (XorI src1 (LShiftI src2 src3)));
12093 
12094   ins_cost(1.9 * INSN_COST);
12095   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12096 
12097   ins_encode %{
12098     __ eorw(as_Register($dst$$reg),
12099               as_Register($src1$$reg),
12100               as_Register($src2$$reg),
12101               Assembler::LSL,
12102               $src3$$constant & 0x1f);
12103   %}
12104 
12105   ins_pipe(ialu_reg_reg_shift);
12106 %}
12107 
12108 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12109                          iRegL src1, iRegL src2,
12110                          immI src3, rFlagsReg cr) %{
12111   match(Set dst (XorL src1 (LShiftL src2 src3)));
12112 
12113   ins_cost(1.9 * INSN_COST);
12114   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12115 
12116   ins_encode %{
12117     __ eor(as_Register($dst$$reg),
12118               as_Register($src1$$reg),
12119               as_Register($src2$$reg),
12120               Assembler::LSL,
12121               $src3$$constant & 0x3f);
12122   %}
12123 
12124   ins_pipe(ialu_reg_reg_shift);
12125 %}
12126 
12127 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12128                          iRegIorL2I src1, iRegIorL2I src2,
12129                          immI src3, rFlagsReg cr) %{
12130   match(Set dst (OrI src1 (URShiftI src2 src3)));
12131 
12132   ins_cost(1.9 * INSN_COST);
12133   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12134 
12135   ins_encode %{
12136     __ orrw(as_Register($dst$$reg),
12137               as_Register($src1$$reg),
12138               as_Register($src2$$reg),
12139               Assembler::LSR,
12140               $src3$$constant & 0x1f);
12141   %}
12142 
12143   ins_pipe(ialu_reg_reg_shift);
12144 %}
12145 
12146 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12147                          iRegL src1, iRegL src2,
12148                          immI src3, rFlagsReg cr) %{
12149   match(Set dst (OrL src1 (URShiftL src2 src3)));
12150 
12151   ins_cost(1.9 * INSN_COST);
12152   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12153 
12154   ins_encode %{
12155     __ orr(as_Register($dst$$reg),
12156               as_Register($src1$$reg),
12157               as_Register($src2$$reg),
12158               Assembler::LSR,
12159               $src3$$constant & 0x3f);
12160   %}
12161 
12162   ins_pipe(ialu_reg_reg_shift);
12163 %}
12164 
12165 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12166                          iRegIorL2I src1, iRegIorL2I src2,
12167                          immI src3, rFlagsReg cr) %{
12168   match(Set dst (OrI src1 (RShiftI src2 src3)));
12169 
12170   ins_cost(1.9 * INSN_COST);
12171   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12172 
12173   ins_encode %{
12174     __ orrw(as_Register($dst$$reg),
12175               as_Register($src1$$reg),
12176               as_Register($src2$$reg),
12177               Assembler::ASR,
12178               $src3$$constant & 0x1f);
12179   %}
12180 
12181   ins_pipe(ialu_reg_reg_shift);
12182 %}
12183 
12184 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12185                          iRegL src1, iRegL src2,
12186                          immI src3, rFlagsReg cr) %{
12187   match(Set dst (OrL src1 (RShiftL src2 src3)));
12188 
12189   ins_cost(1.9 * INSN_COST);
12190   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12191 
12192   ins_encode %{
12193     __ orr(as_Register($dst$$reg),
12194               as_Register($src1$$reg),
12195               as_Register($src2$$reg),
12196               Assembler::ASR,
12197               $src3$$constant & 0x3f);
12198   %}
12199 
12200   ins_pipe(ialu_reg_reg_shift);
12201 %}
12202 
12203 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12204                          iRegIorL2I src1, iRegIorL2I src2,
12205                          immI src3, rFlagsReg cr) %{
12206   match(Set dst (OrI src1 (LShiftI src2 src3)));
12207 
12208   ins_cost(1.9 * INSN_COST);
12209   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12210 
12211   ins_encode %{
12212     __ orrw(as_Register($dst$$reg),
12213               as_Register($src1$$reg),
12214               as_Register($src2$$reg),
12215               Assembler::LSL,
12216               $src3$$constant & 0x1f);
12217   %}
12218 
12219   ins_pipe(ialu_reg_reg_shift);
12220 %}
12221 
12222 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12223                          iRegL src1, iRegL src2,
12224                          immI src3, rFlagsReg cr) %{
12225   match(Set dst (OrL src1 (LShiftL src2 src3)));
12226 
12227   ins_cost(1.9 * INSN_COST);
12228   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12229 
12230   ins_encode %{
12231     __ orr(as_Register($dst$$reg),
12232               as_Register($src1$$reg),
12233               as_Register($src2$$reg),
12234               Assembler::LSL,
12235               $src3$$constant & 0x3f);
12236   %}
12237 
12238   ins_pipe(ialu_reg_reg_shift);
12239 %}
12240 
12241 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12242                          iRegIorL2I src1, iRegIorL2I src2,
12243                          immI src3, rFlagsReg cr) %{
12244   match(Set dst (AddI src1 (URShiftI src2 src3)));
12245 
12246   ins_cost(1.9 * INSN_COST);
12247   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12248 
12249   ins_encode %{
12250     __ addw(as_Register($dst$$reg),
12251               as_Register($src1$$reg),
12252               as_Register($src2$$reg),
12253               Assembler::LSR,
12254               $src3$$constant & 0x1f);
12255   %}
12256 
12257   ins_pipe(ialu_reg_reg_shift);
12258 %}
12259 
12260 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12261                          iRegL src1, iRegL src2,
12262                          immI src3, rFlagsReg cr) %{
12263   match(Set dst (AddL src1 (URShiftL src2 src3)));
12264 
12265   ins_cost(1.9 * INSN_COST);
12266   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12267 
12268   ins_encode %{
12269     __ add(as_Register($dst$$reg),
12270               as_Register($src1$$reg),
12271               as_Register($src2$$reg),
12272               Assembler::LSR,
12273               $src3$$constant & 0x3f);
12274   %}
12275 
12276   ins_pipe(ialu_reg_reg_shift);
12277 %}
12278 
12279 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12280                          iRegIorL2I src1, iRegIorL2I src2,
12281                          immI src3, rFlagsReg cr) %{
12282   match(Set dst (AddI src1 (RShiftI src2 src3)));
12283 
12284   ins_cost(1.9 * INSN_COST);
12285   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12286 
12287   ins_encode %{
12288     __ addw(as_Register($dst$$reg),
12289               as_Register($src1$$reg),
12290               as_Register($src2$$reg),
12291               Assembler::ASR,
12292               $src3$$constant & 0x1f);
12293   %}
12294 
12295   ins_pipe(ialu_reg_reg_shift);
12296 %}
12297 
12298 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12299                          iRegL src1, iRegL src2,
12300                          immI src3, rFlagsReg cr) %{
12301   match(Set dst (AddL src1 (RShiftL src2 src3)));
12302 
12303   ins_cost(1.9 * INSN_COST);
12304   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12305 
12306   ins_encode %{
12307     __ add(as_Register($dst$$reg),
12308               as_Register($src1$$reg),
12309               as_Register($src2$$reg),
12310               Assembler::ASR,
12311               $src3$$constant & 0x3f);
12312   %}
12313 
12314   ins_pipe(ialu_reg_reg_shift);
12315 %}
12316 
12317 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12318                          iRegIorL2I src1, iRegIorL2I src2,
12319                          immI src3, rFlagsReg cr) %{
12320   match(Set dst (AddI src1 (LShiftI src2 src3)));
12321 
12322   ins_cost(1.9 * INSN_COST);
12323   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12324 
12325   ins_encode %{
12326     __ addw(as_Register($dst$$reg),
12327               as_Register($src1$$reg),
12328               as_Register($src2$$reg),
12329               Assembler::LSL,
12330               $src3$$constant & 0x1f);
12331   %}
12332 
12333   ins_pipe(ialu_reg_reg_shift);
12334 %}
12335 
12336 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12337                          iRegL src1, iRegL src2,
12338                          immI src3, rFlagsReg cr) %{
12339   match(Set dst (AddL src1 (LShiftL src2 src3)));
12340 
12341   ins_cost(1.9 * INSN_COST);
12342   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12343 
12344   ins_encode %{
12345     __ add(as_Register($dst$$reg),
12346               as_Register($src1$$reg),
12347               as_Register($src2$$reg),
12348               Assembler::LSL,
12349               $src3$$constant & 0x3f);
12350   %}
12351 
12352   ins_pipe(ialu_reg_reg_shift);
12353 %}
12354 
12355 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12356                          iRegIorL2I src1, iRegIorL2I src2,
12357                          immI src3, rFlagsReg cr) %{
12358   match(Set dst (SubI src1 (URShiftI src2 src3)));
12359 
12360   ins_cost(1.9 * INSN_COST);
12361   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12362 
12363   ins_encode %{
12364     __ subw(as_Register($dst$$reg),
12365               as_Register($src1$$reg),
12366               as_Register($src2$$reg),
12367               Assembler::LSR,
12368               $src3$$constant & 0x1f);
12369   %}
12370 
12371   ins_pipe(ialu_reg_reg_shift);
12372 %}
12373 
12374 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12375                          iRegL src1, iRegL src2,
12376                          immI src3, rFlagsReg cr) %{
12377   match(Set dst (SubL src1 (URShiftL src2 src3)));
12378 
12379   ins_cost(1.9 * INSN_COST);
12380   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12381 
12382   ins_encode %{
12383     __ sub(as_Register($dst$$reg),
12384               as_Register($src1$$reg),
12385               as_Register($src2$$reg),
12386               Assembler::LSR,
12387               $src3$$constant & 0x3f);
12388   %}
12389 
12390   ins_pipe(ialu_reg_reg_shift);
12391 %}
12392 
12393 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12394                          iRegIorL2I src1, iRegIorL2I src2,
12395                          immI src3, rFlagsReg cr) %{
12396   match(Set dst (SubI src1 (RShiftI src2 src3)));
12397 
12398   ins_cost(1.9 * INSN_COST);
12399   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12400 
12401   ins_encode %{
12402     __ subw(as_Register($dst$$reg),
12403               as_Register($src1$$reg),
12404               as_Register($src2$$reg),
12405               Assembler::ASR,
12406               $src3$$constant & 0x1f);
12407   %}
12408 
12409   ins_pipe(ialu_reg_reg_shift);
12410 %}
12411 
12412 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12413                          iRegL src1, iRegL src2,
12414                          immI src3, rFlagsReg cr) %{
12415   match(Set dst (SubL src1 (RShiftL src2 src3)));
12416 
12417   ins_cost(1.9 * INSN_COST);
12418   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12419 
12420   ins_encode %{
12421     __ sub(as_Register($dst$$reg),
12422               as_Register($src1$$reg),
12423               as_Register($src2$$reg),
12424               Assembler::ASR,
12425               $src3$$constant & 0x3f);
12426   %}
12427 
12428   ins_pipe(ialu_reg_reg_shift);
12429 %}
12430 
12431 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12432                          iRegIorL2I src1, iRegIorL2I src2,
12433                          immI src3, rFlagsReg cr) %{
12434   match(Set dst (SubI src1 (LShiftI src2 src3)));
12435 
12436   ins_cost(1.9 * INSN_COST);
12437   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12438 
12439   ins_encode %{
12440     __ subw(as_Register($dst$$reg),
12441               as_Register($src1$$reg),
12442               as_Register($src2$$reg),
12443               Assembler::LSL,
12444               $src3$$constant & 0x1f);
12445   %}
12446 
12447   ins_pipe(ialu_reg_reg_shift);
12448 %}
12449 
12450 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12451                          iRegL src1, iRegL src2,
12452                          immI src3, rFlagsReg cr) %{
12453   match(Set dst (SubL src1 (LShiftL src2 src3)));
12454 
12455   ins_cost(1.9 * INSN_COST);
12456   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12457 
12458   ins_encode %{
12459     __ sub(as_Register($dst$$reg),
12460               as_Register($src1$$reg),
12461               as_Register($src2$$reg),
12462               Assembler::LSL,
12463               $src3$$constant & 0x3f);
12464   %}
12465 
12466   ins_pipe(ialu_reg_reg_shift);
12467 %}
12468 
12469 
12470 
12471 // Shift Left followed by Shift Right.
12472 // This idiom is used by the compiler for the i2b bytecode etc.
12473 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12474 %{
12475   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12476   // Make sure we are not going to exceed what sbfm can do.
12477   predicate((unsigned int)n->in(2)->get_int() <= 63
12478             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12479 
12480   ins_cost(INSN_COST * 2);
12481   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12482   ins_encode %{
12483     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12484     int s = 63 - lshift;
12485     int r = (rshift - lshift) & 63;
12486     __ sbfm(as_Register($dst$$reg),
12487             as_Register($src$$reg),
12488             r, s);
12489   %}
12490 
12491   ins_pipe(ialu_reg_shift);
12492 %}
12493 
12494 // Shift Left followed by Shift Right.
12495 // This idiom is used by the compiler for the i2b bytecode etc.
12496 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12497 %{
12498   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12499   // Make sure we are not going to exceed what sbfmw can do.
12500   predicate((unsigned int)n->in(2)->get_int() <= 31
12501             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12502 
12503   ins_cost(INSN_COST * 2);
12504   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12505   ins_encode %{
12506     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12507     int s = 31 - lshift;
12508     int r = (rshift - lshift) & 31;
12509     __ sbfmw(as_Register($dst$$reg),
12510             as_Register($src$$reg),
12511             r, s);
12512   %}
12513 
12514   ins_pipe(ialu_reg_shift);
12515 %}
12516 
12517 // Shift Left followed by Shift Right.
12518 // This idiom is used by the compiler for the i2b bytecode etc.
12519 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12520 %{
12521   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12522   // Make sure we are not going to exceed what ubfm can do.
12523   predicate((unsigned int)n->in(2)->get_int() <= 63
12524             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12525 
12526   ins_cost(INSN_COST * 2);
12527   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12528   ins_encode %{
12529     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12530     int s = 63 - lshift;
12531     int r = (rshift - lshift) & 63;
12532     __ ubfm(as_Register($dst$$reg),
12533             as_Register($src$$reg),
12534             r, s);
12535   %}
12536 
12537   ins_pipe(ialu_reg_shift);
12538 %}
12539 
12540 // Shift Left followed by Shift Right.
12541 // This idiom is used by the compiler for the i2b bytecode etc.
12542 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12543 %{
12544   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12545   // Make sure we are not going to exceed what ubfmw can do.
12546   predicate((unsigned int)n->in(2)->get_int() <= 31
12547             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12548 
12549   ins_cost(INSN_COST * 2);
12550   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12551   ins_encode %{
12552     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12553     int s = 31 - lshift;
12554     int r = (rshift - lshift) & 31;
12555     __ ubfmw(as_Register($dst$$reg),
12556             as_Register($src$$reg),
12557             r, s);
12558   %}
12559 
12560   ins_pipe(ialu_reg_shift);
12561 %}
12562 // Bitfield extract with shift & mask
12563 
12564 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12565 %{
12566   match(Set dst (AndI (URShiftI src rshift) mask));
12567 
12568   ins_cost(INSN_COST);
12569   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12570   ins_encode %{
12571     int rshift = $rshift$$constant;
12572     long mask = $mask$$constant;
12573     int width = exact_log2(mask+1);
12574     __ ubfxw(as_Register($dst$$reg),
12575             as_Register($src$$reg), rshift, width);
12576   %}
12577   ins_pipe(ialu_reg_shift);
12578 %}
12579 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12580 %{
12581   match(Set dst (AndL (URShiftL src rshift) mask));
12582 
12583   ins_cost(INSN_COST);
12584   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12585   ins_encode %{
12586     int rshift = $rshift$$constant;
12587     long mask = $mask$$constant;
12588     int width = exact_log2(mask+1);
12589     __ ubfx(as_Register($dst$$reg),
12590             as_Register($src$$reg), rshift, width);
12591   %}
12592   ins_pipe(ialu_reg_shift);
12593 %}
12594 
12595 // We can use ubfx when extending an And with a mask when we know mask
12596 // is positive.  We know that because immI_bitmask guarantees it.
12597 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12598 %{
12599   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12600 
12601   ins_cost(INSN_COST * 2);
12602   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12603   ins_encode %{
12604     int rshift = $rshift$$constant;
12605     long mask = $mask$$constant;
12606     int width = exact_log2(mask+1);
12607     __ ubfx(as_Register($dst$$reg),
12608             as_Register($src$$reg), rshift, width);
12609   %}
12610   ins_pipe(ialu_reg_shift);
12611 %}
12612 
12613 // We can use ubfiz when masking by a positive number and then left shifting the result.
12614 // We know that the mask is positive because immI_bitmask guarantees it.
12615 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12616 %{
12617   match(Set dst (LShiftI (AndI src mask) lshift));
12618   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12619     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12620 
12621   ins_cost(INSN_COST);
12622   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12623   ins_encode %{
12624     int lshift = $lshift$$constant;
12625     long mask = $mask$$constant;
12626     int width = exact_log2(mask+1);
12627     __ ubfizw(as_Register($dst$$reg),
12628           as_Register($src$$reg), lshift, width);
12629   %}
12630   ins_pipe(ialu_reg_shift);
12631 %}
12632 // We can use ubfiz when masking by a positive number and then left shifting the result.
12633 // We know that the mask is positive because immL_bitmask guarantees it.
12634 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12635 %{
12636   match(Set dst (LShiftL (AndL src mask) lshift));
12637   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12638     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12639 
12640   ins_cost(INSN_COST);
12641   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12642   ins_encode %{
12643     int lshift = $lshift$$constant;
12644     long mask = $mask$$constant;
12645     int width = exact_log2(mask+1);
12646     __ ubfiz(as_Register($dst$$reg),
12647           as_Register($src$$reg), lshift, width);
12648   %}
12649   ins_pipe(ialu_reg_shift);
12650 %}
12651 
12652 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12653 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12654 %{
12655   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12656   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12657     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12658 
12659   ins_cost(INSN_COST);
12660   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12661   ins_encode %{
12662     int lshift = $lshift$$constant;
12663     long mask = $mask$$constant;
12664     int width = exact_log2(mask+1);
12665     __ ubfiz(as_Register($dst$$reg),
12666              as_Register($src$$reg), lshift, width);
12667   %}
12668   ins_pipe(ialu_reg_shift);
12669 %}
12670 
12671 // Rotations
12672 
12673 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12674 %{
12675   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12676   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12677 
12678   ins_cost(INSN_COST);
12679   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12680 
12681   ins_encode %{
12682     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12683             $rshift$$constant & 63);
12684   %}
12685   ins_pipe(ialu_reg_reg_extr);
12686 %}
12687 
12688 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12689 %{
12690   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12691   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12692 
12693   ins_cost(INSN_COST);
12694   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12695 
12696   ins_encode %{
12697     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12698             $rshift$$constant & 31);
12699   %}
12700   ins_pipe(ialu_reg_reg_extr);
12701 %}
12702 
12703 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12704 %{
12705   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12706   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12707 
12708   ins_cost(INSN_COST);
12709   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12710 
12711   ins_encode %{
12712     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12713             $rshift$$constant & 63);
12714   %}
12715   ins_pipe(ialu_reg_reg_extr);
12716 %}
12717 
12718 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12719 %{
12720   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12721   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12722 
12723   ins_cost(INSN_COST);
12724   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12725 
12726   ins_encode %{
12727     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12728             $rshift$$constant & 31);
12729   %}
12730   ins_pipe(ialu_reg_reg_extr);
12731 %}
12732 
12733 
12734 // rol expander
12735 
12736 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12737 %{
12738   effect(DEF dst, USE src, USE shift);
12739 
12740   format %{ "rol    $dst, $src, $shift" %}
12741   ins_cost(INSN_COST * 3);
12742   ins_encode %{
12743     __ subw(rscratch1, zr, as_Register($shift$$reg));
12744     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12745             rscratch1);
12746     %}
12747   ins_pipe(ialu_reg_reg_vshift);
12748 %}
12749 
12750 // rol expander
12751 
12752 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12753 %{
12754   effect(DEF dst, USE src, USE shift);
12755 
12756   format %{ "rol    $dst, $src, $shift" %}
12757   ins_cost(INSN_COST * 3);
12758   ins_encode %{
12759     __ subw(rscratch1, zr, as_Register($shift$$reg));
12760     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12761             rscratch1);
12762     %}
12763   ins_pipe(ialu_reg_reg_vshift);
12764 %}
12765 
12766 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12767 %{
12768   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12769 
12770   expand %{
12771     rolL_rReg(dst, src, shift, cr);
12772   %}
12773 %}
12774 
12775 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12776 %{
12777   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12778 
12779   expand %{
12780     rolL_rReg(dst, src, shift, cr);
12781   %}
12782 %}
12783 
12784 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12785 %{
12786   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12787 
12788   expand %{
12789     rolI_rReg(dst, src, shift, cr);
12790   %}
12791 %}
12792 
12793 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12794 %{
12795   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12796 
12797   expand %{
12798     rolI_rReg(dst, src, shift, cr);
12799   %}
12800 %}
12801 
12802 // ror expander
12803 
12804 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12805 %{
12806   effect(DEF dst, USE src, USE shift);
12807 
12808   format %{ "ror    $dst, $src, $shift" %}
12809   ins_cost(INSN_COST);
12810   ins_encode %{
12811     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12812             as_Register($shift$$reg));
12813     %}
12814   ins_pipe(ialu_reg_reg_vshift);
12815 %}
12816 
12817 // ror expander
12818 
12819 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12820 %{
12821   effect(DEF dst, USE src, USE shift);
12822 
12823   format %{ "ror    $dst, $src, $shift" %}
12824   ins_cost(INSN_COST);
12825   ins_encode %{
12826     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12827             as_Register($shift$$reg));
12828     %}
12829   ins_pipe(ialu_reg_reg_vshift);
12830 %}
12831 
12832 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12833 %{
12834   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12835 
12836   expand %{
12837     rorL_rReg(dst, src, shift, cr);
12838   %}
12839 %}
12840 
12841 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12842 %{
12843   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12844 
12845   expand %{
12846     rorL_rReg(dst, src, shift, cr);
12847   %}
12848 %}
12849 
12850 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12851 %{
12852   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12853 
12854   expand %{
12855     rorI_rReg(dst, src, shift, cr);
12856   %}
12857 %}
12858 
12859 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12860 %{
12861   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12862 
12863   expand %{
12864     rorI_rReg(dst, src, shift, cr);
12865   %}
12866 %}
12867 
12868 // Add/subtract (extended)
12869 
12870 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12871 %{
12872   match(Set dst (AddL src1 (ConvI2L src2)));
12873   ins_cost(INSN_COST);
12874   format %{ "add  $dst, $src1, $src2, sxtw" %}
12875 
12876    ins_encode %{
12877      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12878             as_Register($src2$$reg), ext::sxtw);
12879    %}
12880   ins_pipe(ialu_reg_reg);
12881 %};
12882 
12883 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12884 %{
12885   match(Set dst (SubL src1 (ConvI2L src2)));
12886   ins_cost(INSN_COST);
12887   format %{ "sub  $dst, $src1, $src2, sxtw" %}
12888 
12889    ins_encode %{
12890      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12891             as_Register($src2$$reg), ext::sxtw);
12892    %}
12893   ins_pipe(ialu_reg_reg);
12894 %};
12895 
12896 
12897 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12898 %{
12899   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12900   ins_cost(INSN_COST);
12901   format %{ "add  $dst, $src1, $src2, sxth" %}
12902 
12903    ins_encode %{
12904      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12905             as_Register($src2$$reg), ext::sxth);
12906    %}
12907   ins_pipe(ialu_reg_reg);
12908 %}
12909 
12910 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12911 %{
12912   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12913   ins_cost(INSN_COST);
12914   format %{ "add  $dst, $src1, $src2, sxtb" %}
12915 
12916    ins_encode %{
12917      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12918             as_Register($src2$$reg), ext::sxtb);
12919    %}
12920   ins_pipe(ialu_reg_reg);
12921 %}
12922 
12923 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12924 %{
12925   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12926   ins_cost(INSN_COST);
12927   format %{ "add  $dst, $src1, $src2, uxtb" %}
12928 
12929    ins_encode %{
12930      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12931             as_Register($src2$$reg), ext::uxtb);
12932    %}
12933   ins_pipe(ialu_reg_reg);
12934 %}
12935 
12936 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12937 %{
12938   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12939   ins_cost(INSN_COST);
12940   format %{ "add  $dst, $src1, $src2, sxth" %}
12941 
12942    ins_encode %{
12943      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12944             as_Register($src2$$reg), ext::sxth);
12945    %}
12946   ins_pipe(ialu_reg_reg);
12947 %}
12948 
12949 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12950 %{
12951   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12952   ins_cost(INSN_COST);
12953   format %{ "add  $dst, $src1, $src2, sxtw" %}
12954 
12955    ins_encode %{
12956      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12957             as_Register($src2$$reg), ext::sxtw);
12958    %}
12959   ins_pipe(ialu_reg_reg);
12960 %}
12961 
12962 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12963 %{
12964   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12965   ins_cost(INSN_COST);
12966   format %{ "add  $dst, $src1, $src2, sxtb" %}
12967 
12968    ins_encode %{
12969      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12970             as_Register($src2$$reg), ext::sxtb);
12971    %}
12972   ins_pipe(ialu_reg_reg);
12973 %}
12974 
12975 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12976 %{
12977   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12978   ins_cost(INSN_COST);
12979   format %{ "add  $dst, $src1, $src2, uxtb" %}
12980 
12981    ins_encode %{
12982      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12983             as_Register($src2$$reg), ext::uxtb);
12984    %}
12985   ins_pipe(ialu_reg_reg);
12986 %}
12987 
12988 
12989 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12990 %{
12991   match(Set dst (AddI src1 (AndI src2 mask)));
12992   ins_cost(INSN_COST);
12993   format %{ "addw  $dst, $src1, $src2, uxtb" %}
12994 
12995    ins_encode %{
12996      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12997             as_Register($src2$$reg), ext::uxtb);
12998    %}
12999   ins_pipe(ialu_reg_reg);
13000 %}
13001 
13002 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13003 %{
13004   match(Set dst (AddI src1 (AndI src2 mask)));
13005   ins_cost(INSN_COST);
13006   format %{ "addw  $dst, $src1, $src2, uxth" %}
13007 
13008    ins_encode %{
13009      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13010             as_Register($src2$$reg), ext::uxth);
13011    %}
13012   ins_pipe(ialu_reg_reg);
13013 %}
13014 
13015 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13016 %{
13017   match(Set dst (AddL src1 (AndL src2 mask)));
13018   ins_cost(INSN_COST);
13019   format %{ "add  $dst, $src1, $src2, uxtb" %}
13020 
13021    ins_encode %{
13022      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13023             as_Register($src2$$reg), ext::uxtb);
13024    %}
13025   ins_pipe(ialu_reg_reg);
13026 %}
13027 
13028 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13029 %{
13030   match(Set dst (AddL src1 (AndL src2 mask)));
13031   ins_cost(INSN_COST);
13032   format %{ "add  $dst, $src1, $src2, uxth" %}
13033 
13034    ins_encode %{
13035      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13036             as_Register($src2$$reg), ext::uxth);
13037    %}
13038   ins_pipe(ialu_reg_reg);
13039 %}
13040 
13041 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13042 %{
13043   match(Set dst (AddL src1 (AndL src2 mask)));
13044   ins_cost(INSN_COST);
13045   format %{ "add  $dst, $src1, $src2, uxtw" %}
13046 
13047    ins_encode %{
13048      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13049             as_Register($src2$$reg), ext::uxtw);
13050    %}
13051   ins_pipe(ialu_reg_reg);
13052 %}
13053 
13054 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13055 %{
13056   match(Set dst (SubI src1 (AndI src2 mask)));
13057   ins_cost(INSN_COST);
13058   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13059 
13060    ins_encode %{
13061      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13062             as_Register($src2$$reg), ext::uxtb);
13063    %}
13064   ins_pipe(ialu_reg_reg);
13065 %}
13066 
13067 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13068 %{
13069   match(Set dst (SubI src1 (AndI src2 mask)));
13070   ins_cost(INSN_COST);
13071   format %{ "subw  $dst, $src1, $src2, uxth" %}
13072 
13073    ins_encode %{
13074      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13075             as_Register($src2$$reg), ext::uxth);
13076    %}
13077   ins_pipe(ialu_reg_reg);
13078 %}
13079 
13080 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13081 %{
13082   match(Set dst (SubL src1 (AndL src2 mask)));
13083   ins_cost(INSN_COST);
13084   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13085 
13086    ins_encode %{
13087      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13088             as_Register($src2$$reg), ext::uxtb);
13089    %}
13090   ins_pipe(ialu_reg_reg);
13091 %}
13092 
13093 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13094 %{
13095   match(Set dst (SubL src1 (AndL src2 mask)));
13096   ins_cost(INSN_COST);
13097   format %{ "sub  $dst, $src1, $src2, uxth" %}
13098 
13099    ins_encode %{
13100      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13101             as_Register($src2$$reg), ext::uxth);
13102    %}
13103   ins_pipe(ialu_reg_reg);
13104 %}
13105 
13106 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13107 %{
13108   match(Set dst (SubL src1 (AndL src2 mask)));
13109   ins_cost(INSN_COST);
13110   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13111 
13112    ins_encode %{
13113      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13114             as_Register($src2$$reg), ext::uxtw);
13115    %}
13116   ins_pipe(ialu_reg_reg);
13117 %}
13118 
13119 
13120 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13121 %{
13122   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13123   ins_cost(1.9 * INSN_COST);
13124   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13125 
13126    ins_encode %{
13127      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13128             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13129    %}
13130   ins_pipe(ialu_reg_reg_shift);
13131 %}
13132 
13133 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13134 %{
13135   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13136   ins_cost(1.9 * INSN_COST);
13137   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13138 
13139    ins_encode %{
13140      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13141             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13142    %}
13143   ins_pipe(ialu_reg_reg_shift);
13144 %}
13145 
13146 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13147 %{
13148   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13149   ins_cost(1.9 * INSN_COST);
13150   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13151 
13152    ins_encode %{
13153      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13154             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13155    %}
13156   ins_pipe(ialu_reg_reg_shift);
13157 %}
13158 
13159 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13160 %{
13161   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13162   ins_cost(1.9 * INSN_COST);
13163   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13164 
13165    ins_encode %{
13166      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13167             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13168    %}
13169   ins_pipe(ialu_reg_reg_shift);
13170 %}
13171 
13172 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13173 %{
13174   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13175   ins_cost(1.9 * INSN_COST);
13176   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13177 
13178    ins_encode %{
13179      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13180             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13181    %}
13182   ins_pipe(ialu_reg_reg_shift);
13183 %}
13184 
13185 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13186 %{
13187   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13188   ins_cost(1.9 * INSN_COST);
13189   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13190 
13191    ins_encode %{
13192      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13193             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13194    %}
13195   ins_pipe(ialu_reg_reg_shift);
13196 %}
13197 
13198 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13199 %{
13200   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13201   ins_cost(1.9 * INSN_COST);
13202   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13203 
13204    ins_encode %{
13205      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13206             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13207    %}
13208   ins_pipe(ialu_reg_reg_shift);
13209 %}
13210 
13211 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13212 %{
13213   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13214   ins_cost(1.9 * INSN_COST);
13215   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13216 
13217    ins_encode %{
13218      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13219             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13220    %}
13221   ins_pipe(ialu_reg_reg_shift);
13222 %}
13223 
13224 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13225 %{
13226   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13227   ins_cost(1.9 * INSN_COST);
13228   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13229 
13230    ins_encode %{
13231      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13232             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13233    %}
13234   ins_pipe(ialu_reg_reg_shift);
13235 %}
13236 
13237 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13238 %{
13239   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13240   ins_cost(1.9 * INSN_COST);
13241   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13242 
13243    ins_encode %{
13244      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13245             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13246    %}
13247   ins_pipe(ialu_reg_reg_shift);
13248 %}
13249 
13250 
13251 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13252 %{
13253   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13254   ins_cost(1.9 * INSN_COST);
13255   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13256 
13257    ins_encode %{
13258      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13259             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13260    %}
13261   ins_pipe(ialu_reg_reg_shift);
13262 %};
13263 
13264 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13265 %{
13266   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13267   ins_cost(1.9 * INSN_COST);
13268   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13269 
13270    ins_encode %{
13271      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13272             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13273    %}
13274   ins_pipe(ialu_reg_reg_shift);
13275 %};
13276 
13277 
13278 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13279 %{
13280   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13281   ins_cost(1.9 * INSN_COST);
13282   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13283 
13284    ins_encode %{
13285      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13286             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13287    %}
13288   ins_pipe(ialu_reg_reg_shift);
13289 %}
13290 
13291 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13292 %{
13293   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13294   ins_cost(1.9 * INSN_COST);
13295   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13296 
13297    ins_encode %{
13298      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13299             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13300    %}
13301   ins_pipe(ialu_reg_reg_shift);
13302 %}
13303 
13304 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13305 %{
13306   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13307   ins_cost(1.9 * INSN_COST);
13308   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13309 
13310    ins_encode %{
13311      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13312             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13313    %}
13314   ins_pipe(ialu_reg_reg_shift);
13315 %}
13316 
13317 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13318 %{
13319   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13320   ins_cost(1.9 * INSN_COST);
13321   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13322 
13323    ins_encode %{
13324      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13325             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13326    %}
13327   ins_pipe(ialu_reg_reg_shift);
13328 %}
13329 
13330 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13331 %{
13332   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13333   ins_cost(1.9 * INSN_COST);
13334   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13335 
13336    ins_encode %{
13337      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13338             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13339    %}
13340   ins_pipe(ialu_reg_reg_shift);
13341 %}
13342 
13343 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13344 %{
13345   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13346   ins_cost(1.9 * INSN_COST);
13347   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13348 
13349    ins_encode %{
13350      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13351             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13352    %}
13353   ins_pipe(ialu_reg_reg_shift);
13354 %}
13355 
13356 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13357 %{
13358   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13359   ins_cost(1.9 * INSN_COST);
13360   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13361 
13362    ins_encode %{
13363      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13364             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13365    %}
13366   ins_pipe(ialu_reg_reg_shift);
13367 %}
13368 
13369 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13370 %{
13371   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13372   ins_cost(1.9 * INSN_COST);
13373   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13374 
13375    ins_encode %{
13376      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13377             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13378    %}
13379   ins_pipe(ialu_reg_reg_shift);
13380 %}
13381 
13382 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13383 %{
13384   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13385   ins_cost(1.9 * INSN_COST);
13386   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13387 
13388    ins_encode %{
13389      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13390             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13391    %}
13392   ins_pipe(ialu_reg_reg_shift);
13393 %}
13394 
13395 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13396 %{
13397   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13398   ins_cost(1.9 * INSN_COST);
13399   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13400 
13401    ins_encode %{
13402      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13403             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13404    %}
13405   ins_pipe(ialu_reg_reg_shift);
13406 %}
13407 // END This section of the file is automatically generated. Do not edit --------------
13408 
13409 // ============================================================================
13410 // Floating Point Arithmetic Instructions
13411 
13412 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13413   match(Set dst (AddF src1 src2));
13414 
13415   ins_cost(INSN_COST * 5);
13416   format %{ "fadds   $dst, $src1, $src2" %}
13417 
13418   ins_encode %{
13419     __ fadds(as_FloatRegister($dst$$reg),
13420              as_FloatRegister($src1$$reg),
13421              as_FloatRegister($src2$$reg));
13422   %}
13423 
13424   ins_pipe(fp_dop_reg_reg_s);
13425 %}
13426 
13427 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13428   match(Set dst (AddD src1 src2));
13429 
13430   ins_cost(INSN_COST * 5);
13431   format %{ "faddd   $dst, $src1, $src2" %}
13432 
13433   ins_encode %{
13434     __ faddd(as_FloatRegister($dst$$reg),
13435              as_FloatRegister($src1$$reg),
13436              as_FloatRegister($src2$$reg));
13437   %}
13438 
13439   ins_pipe(fp_dop_reg_reg_d);
13440 %}
13441 
13442 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13443   match(Set dst (SubF src1 src2));
13444 
13445   ins_cost(INSN_COST * 5);
13446   format %{ "fsubs   $dst, $src1, $src2" %}
13447 
13448   ins_encode %{
13449     __ fsubs(as_FloatRegister($dst$$reg),
13450              as_FloatRegister($src1$$reg),
13451              as_FloatRegister($src2$$reg));
13452   %}
13453 
13454   ins_pipe(fp_dop_reg_reg_s);
13455 %}
13456 
13457 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13458   match(Set dst (SubD src1 src2));
13459 
13460   ins_cost(INSN_COST * 5);
13461   format %{ "fsubd   $dst, $src1, $src2" %}
13462 
13463   ins_encode %{
13464     __ fsubd(as_FloatRegister($dst$$reg),
13465              as_FloatRegister($src1$$reg),
13466              as_FloatRegister($src2$$reg));
13467   %}
13468 
13469   ins_pipe(fp_dop_reg_reg_d);
13470 %}
13471 
13472 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13473   match(Set dst (MulF src1 src2));
13474 
13475   ins_cost(INSN_COST * 6);
13476   format %{ "fmuls   $dst, $src1, $src2" %}
13477 
13478   ins_encode %{
13479     __ fmuls(as_FloatRegister($dst$$reg),
13480              as_FloatRegister($src1$$reg),
13481              as_FloatRegister($src2$$reg));
13482   %}
13483 
13484   ins_pipe(fp_dop_reg_reg_s);
13485 %}
13486 
13487 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13488   match(Set dst (MulD src1 src2));
13489 
13490   ins_cost(INSN_COST * 6);
13491   format %{ "fmuld   $dst, $src1, $src2" %}
13492 
13493   ins_encode %{
13494     __ fmuld(as_FloatRegister($dst$$reg),
13495              as_FloatRegister($src1$$reg),
13496              as_FloatRegister($src2$$reg));
13497   %}
13498 
13499   ins_pipe(fp_dop_reg_reg_d);
13500 %}
13501 
13502 // src1 * src2 + src3
13503 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13504   predicate(UseFMA);
13505   match(Set dst (FmaF src3 (Binary src1 src2)));
13506 
13507   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13508 
13509   ins_encode %{
13510     __ fmadds(as_FloatRegister($dst$$reg),
13511              as_FloatRegister($src1$$reg),
13512              as_FloatRegister($src2$$reg),
13513              as_FloatRegister($src3$$reg));
13514   %}
13515 
13516   ins_pipe(pipe_class_default);
13517 %}
13518 
13519 // src1 * src2 + src3
13520 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13521   predicate(UseFMA);
13522   match(Set dst (FmaD src3 (Binary src1 src2)));
13523 
13524   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13525 
13526   ins_encode %{
13527     __ fmaddd(as_FloatRegister($dst$$reg),
13528              as_FloatRegister($src1$$reg),
13529              as_FloatRegister($src2$$reg),
13530              as_FloatRegister($src3$$reg));
13531   %}
13532 
13533   ins_pipe(pipe_class_default);
13534 %}
13535 
13536 // -src1 * src2 + src3
13537 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13538   predicate(UseFMA);
13539   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13540   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13541 
13542   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13543 
13544   ins_encode %{
13545     __ fmsubs(as_FloatRegister($dst$$reg),
13546               as_FloatRegister($src1$$reg),
13547               as_FloatRegister($src2$$reg),
13548               as_FloatRegister($src3$$reg));
13549   %}
13550 
13551   ins_pipe(pipe_class_default);
13552 %}
13553 
13554 // -src1 * src2 + src3
13555 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13556   predicate(UseFMA);
13557   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13558   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13559 
13560   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13561 
13562   ins_encode %{
13563     __ fmsubd(as_FloatRegister($dst$$reg),
13564               as_FloatRegister($src1$$reg),
13565               as_FloatRegister($src2$$reg),
13566               as_FloatRegister($src3$$reg));
13567   %}
13568 
13569   ins_pipe(pipe_class_default);
13570 %}
13571 
13572 // -src1 * src2 - src3
13573 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13574   predicate(UseFMA);
13575   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13576   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13577 
13578   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13579 
13580   ins_encode %{
13581     __ fnmadds(as_FloatRegister($dst$$reg),
13582                as_FloatRegister($src1$$reg),
13583                as_FloatRegister($src2$$reg),
13584                as_FloatRegister($src3$$reg));
13585   %}
13586 
13587   ins_pipe(pipe_class_default);
13588 %}
13589 
13590 // -src1 * src2 - src3
13591 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13592   predicate(UseFMA);
13593   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13594   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13595 
13596   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13597 
13598   ins_encode %{
13599     __ fnmaddd(as_FloatRegister($dst$$reg),
13600                as_FloatRegister($src1$$reg),
13601                as_FloatRegister($src2$$reg),
13602                as_FloatRegister($src3$$reg));
13603   %}
13604 
13605   ins_pipe(pipe_class_default);
13606 %}
13607 
13608 // src1 * src2 - src3
13609 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13610   predicate(UseFMA);
13611   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13612 
13613   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13614 
13615   ins_encode %{
13616     __ fnmsubs(as_FloatRegister($dst$$reg),
13617                as_FloatRegister($src1$$reg),
13618                as_FloatRegister($src2$$reg),
13619                as_FloatRegister($src3$$reg));
13620   %}
13621 
13622   ins_pipe(pipe_class_default);
13623 %}
13624 
13625 // src1 * src2 - src3
13626 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13627   predicate(UseFMA);
13628   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13629 
13630   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13631 
13632   ins_encode %{
13633   // n.b. insn name should be fnmsubd
13634     __ fnmsub(as_FloatRegister($dst$$reg),
13635               as_FloatRegister($src1$$reg),
13636               as_FloatRegister($src2$$reg),
13637               as_FloatRegister($src3$$reg));
13638   %}
13639 
13640   ins_pipe(pipe_class_default);
13641 %}
13642 
13643 
13644 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13645   match(Set dst (DivF src1  src2));
13646 
13647   ins_cost(INSN_COST * 18);
13648   format %{ "fdivs   $dst, $src1, $src2" %}
13649 
13650   ins_encode %{
13651     __ fdivs(as_FloatRegister($dst$$reg),
13652              as_FloatRegister($src1$$reg),
13653              as_FloatRegister($src2$$reg));
13654   %}
13655 
13656   ins_pipe(fp_div_s);
13657 %}
13658 
13659 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13660   match(Set dst (DivD src1  src2));
13661 
13662   ins_cost(INSN_COST * 32);
13663   format %{ "fdivd   $dst, $src1, $src2" %}
13664 
13665   ins_encode %{
13666     __ fdivd(as_FloatRegister($dst$$reg),
13667              as_FloatRegister($src1$$reg),
13668              as_FloatRegister($src2$$reg));
13669   %}
13670 
13671   ins_pipe(fp_div_d);
13672 %}
13673 
13674 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13675   match(Set dst (NegF src));
13676 
13677   ins_cost(INSN_COST * 3);
13678   format %{ "fneg   $dst, $src" %}
13679 
13680   ins_encode %{
13681     __ fnegs(as_FloatRegister($dst$$reg),
13682              as_FloatRegister($src$$reg));
13683   %}
13684 
13685   ins_pipe(fp_uop_s);
13686 %}
13687 
13688 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13689   match(Set dst (NegD src));
13690 
13691   ins_cost(INSN_COST * 3);
13692   format %{ "fnegd   $dst, $src" %}
13693 
13694   ins_encode %{
13695     __ fnegd(as_FloatRegister($dst$$reg),
13696              as_FloatRegister($src$$reg));
13697   %}
13698 
13699   ins_pipe(fp_uop_d);
13700 %}
13701 
13702 instruct absF_reg(vRegF dst, vRegF src) %{
13703   match(Set dst (AbsF src));
13704 
13705   ins_cost(INSN_COST * 3);
13706   format %{ "fabss   $dst, $src" %}
13707   ins_encode %{
13708     __ fabss(as_FloatRegister($dst$$reg),
13709              as_FloatRegister($src$$reg));
13710   %}
13711 
13712   ins_pipe(fp_uop_s);
13713 %}
13714 
13715 instruct absD_reg(vRegD dst, vRegD src) %{
13716   match(Set dst (AbsD src));
13717 
13718   ins_cost(INSN_COST * 3);
13719   format %{ "fabsd   $dst, $src" %}
13720   ins_encode %{
13721     __ fabsd(as_FloatRegister($dst$$reg),
13722              as_FloatRegister($src$$reg));
13723   %}
13724 
13725   ins_pipe(fp_uop_d);
13726 %}
13727 
13728 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13729   match(Set dst (SqrtD src));
13730 
13731   ins_cost(INSN_COST * 50);
13732   format %{ "fsqrtd  $dst, $src" %}
13733   ins_encode %{
13734     __ fsqrtd(as_FloatRegister($dst$$reg),
13735              as_FloatRegister($src$$reg));
13736   %}
13737 
13738   ins_pipe(fp_div_s);
13739 %}
13740 
13741 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13742   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13743 
13744   ins_cost(INSN_COST * 50);
13745   format %{ "fsqrts  $dst, $src" %}
13746   ins_encode %{
13747     __ fsqrts(as_FloatRegister($dst$$reg),
13748              as_FloatRegister($src$$reg));
13749   %}
13750 
13751   ins_pipe(fp_div_d);
13752 %}
13753 
13754 // ============================================================================
13755 // Logical Instructions
13756 
13757 // Integer Logical Instructions
13758 
13759 // And Instructions
13760 
13761 
13762 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13763   match(Set dst (AndI src1 src2));
13764 
13765   format %{ "andw  $dst, $src1, $src2\t# int" %}
13766 
13767   ins_cost(INSN_COST);
13768   ins_encode %{
13769     __ andw(as_Register($dst$$reg),
13770             as_Register($src1$$reg),
13771             as_Register($src2$$reg));
13772   %}
13773 
13774   ins_pipe(ialu_reg_reg);
13775 %}
13776 
13777 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13778   match(Set dst (AndI src1 src2));
13779 
13780   format %{ "andsw  $dst, $src1, $src2\t# int" %}
13781 
13782   ins_cost(INSN_COST);
13783   ins_encode %{
13784     __ andw(as_Register($dst$$reg),
13785             as_Register($src1$$reg),
13786             (unsigned long)($src2$$constant));
13787   %}
13788 
13789   ins_pipe(ialu_reg_imm);
13790 %}
13791 
13792 // Or Instructions
13793 
13794 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13795   match(Set dst (OrI src1 src2));
13796 
13797   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13798 
13799   ins_cost(INSN_COST);
13800   ins_encode %{
13801     __ orrw(as_Register($dst$$reg),
13802             as_Register($src1$$reg),
13803             as_Register($src2$$reg));
13804   %}
13805 
13806   ins_pipe(ialu_reg_reg);
13807 %}
13808 
13809 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13810   match(Set dst (OrI src1 src2));
13811 
13812   format %{ "orrw  $dst, $src1, $src2\t# int" %}
13813 
13814   ins_cost(INSN_COST);
13815   ins_encode %{
13816     __ orrw(as_Register($dst$$reg),
13817             as_Register($src1$$reg),
13818             (unsigned long)($src2$$constant));
13819   %}
13820 
13821   ins_pipe(ialu_reg_imm);
13822 %}
13823 
13824 // Xor Instructions
13825 
13826 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13827   match(Set dst (XorI src1 src2));
13828 
13829   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13830 
13831   ins_cost(INSN_COST);
13832   ins_encode %{
13833     __ eorw(as_Register($dst$$reg),
13834             as_Register($src1$$reg),
13835             as_Register($src2$$reg));
13836   %}
13837 
13838   ins_pipe(ialu_reg_reg);
13839 %}
13840 
13841 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13842   match(Set dst (XorI src1 src2));
13843 
13844   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13845 
13846   ins_cost(INSN_COST);
13847   ins_encode %{
13848     __ eorw(as_Register($dst$$reg),
13849             as_Register($src1$$reg),
13850             (unsigned long)($src2$$constant));
13851   %}
13852 
13853   ins_pipe(ialu_reg_imm);
13854 %}
13855 
13856 // Long Logical Instructions
13857 // TODO
13858 
13859 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13860   match(Set dst (AndL src1 src2));
13861 
13862   format %{ "and  $dst, $src1, $src2\t# int" %}
13863 
13864   ins_cost(INSN_COST);
13865   ins_encode %{
13866     __ andr(as_Register($dst$$reg),
13867             as_Register($src1$$reg),
13868             as_Register($src2$$reg));
13869   %}
13870 
13871   ins_pipe(ialu_reg_reg);
13872 %}
13873 
13874 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13875   match(Set dst (AndL src1 src2));
13876 
13877   format %{ "and  $dst, $src1, $src2\t# int" %}
13878 
13879   ins_cost(INSN_COST);
13880   ins_encode %{
13881     __ andr(as_Register($dst$$reg),
13882             as_Register($src1$$reg),
13883             (unsigned long)($src2$$constant));
13884   %}
13885 
13886   ins_pipe(ialu_reg_imm);
13887 %}
13888 
13889 // Or Instructions
13890 
13891 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13892   match(Set dst (OrL src1 src2));
13893 
13894   format %{ "orr  $dst, $src1, $src2\t# int" %}
13895 
13896   ins_cost(INSN_COST);
13897   ins_encode %{
13898     __ orr(as_Register($dst$$reg),
13899            as_Register($src1$$reg),
13900            as_Register($src2$$reg));
13901   %}
13902 
13903   ins_pipe(ialu_reg_reg);
13904 %}
13905 
13906 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13907   match(Set dst (OrL src1 src2));
13908 
13909   format %{ "orr  $dst, $src1, $src2\t# int" %}
13910 
13911   ins_cost(INSN_COST);
13912   ins_encode %{
13913     __ orr(as_Register($dst$$reg),
13914            as_Register($src1$$reg),
13915            (unsigned long)($src2$$constant));
13916   %}
13917 
13918   ins_pipe(ialu_reg_imm);
13919 %}
13920 
13921 // Xor Instructions
13922 
13923 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13924   match(Set dst (XorL src1 src2));
13925 
13926   format %{ "eor  $dst, $src1, $src2\t# int" %}
13927 
13928   ins_cost(INSN_COST);
13929   ins_encode %{
13930     __ eor(as_Register($dst$$reg),
13931            as_Register($src1$$reg),
13932            as_Register($src2$$reg));
13933   %}
13934 
13935   ins_pipe(ialu_reg_reg);
13936 %}
13937 
13938 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13939   match(Set dst (XorL src1 src2));
13940 
13941   ins_cost(INSN_COST);
13942   format %{ "eor  $dst, $src1, $src2\t# int" %}
13943 
13944   ins_encode %{
13945     __ eor(as_Register($dst$$reg),
13946            as_Register($src1$$reg),
13947            (unsigned long)($src2$$constant));
13948   %}
13949 
13950   ins_pipe(ialu_reg_imm);
13951 %}
13952 
13953 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13954 %{
13955   match(Set dst (ConvI2L src));
13956 
13957   ins_cost(INSN_COST);
13958   format %{ "sxtw  $dst, $src\t# i2l" %}
13959   ins_encode %{
13960     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13961   %}
13962   ins_pipe(ialu_reg_shift);
13963 %}
13964 
13965 // this pattern occurs in bigmath arithmetic
13966 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13967 %{
13968   match(Set dst (AndL (ConvI2L src) mask));
13969 
13970   ins_cost(INSN_COST);
13971   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13972   ins_encode %{
13973     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13974   %}
13975 
13976   ins_pipe(ialu_reg_shift);
13977 %}
13978 
13979 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13980   match(Set dst (ConvL2I src));
13981 
13982   ins_cost(INSN_COST);
13983   format %{ "movw  $dst, $src \t// l2i" %}
13984 
13985   ins_encode %{
13986     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13987   %}
13988 
13989   ins_pipe(ialu_reg);
13990 %}
13991 
13992 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13993 %{
13994   match(Set dst (Conv2B src));
13995   effect(KILL cr);
13996 
13997   format %{
13998     "cmpw $src, zr\n\t"
13999     "cset $dst, ne"
14000   %}
14001 
14002   ins_encode %{
14003     __ cmpw(as_Register($src$$reg), zr);
14004     __ cset(as_Register($dst$$reg), Assembler::NE);
14005   %}
14006 
14007   ins_pipe(ialu_reg);
14008 %}
14009 
14010 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
14011 %{
14012   match(Set dst (Conv2B src));
14013   effect(KILL cr);
14014 
14015   format %{
14016     "cmp  $src, zr\n\t"
14017     "cset $dst, ne"
14018   %}
14019 
14020   ins_encode %{
14021     __ cmp(as_Register($src$$reg), zr);
14022     __ cset(as_Register($dst$$reg), Assembler::NE);
14023   %}
14024 
14025   ins_pipe(ialu_reg);
14026 %}
14027 
14028 instruct convD2F_reg(vRegF dst, vRegD src) %{
14029   match(Set dst (ConvD2F src));
14030 
14031   ins_cost(INSN_COST * 5);
14032   format %{ "fcvtd  $dst, $src \t// d2f" %}
14033 
14034   ins_encode %{
14035     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14036   %}
14037 
14038   ins_pipe(fp_d2f);
14039 %}
14040 
14041 instruct convF2D_reg(vRegD dst, vRegF src) %{
14042   match(Set dst (ConvF2D src));
14043 
14044   ins_cost(INSN_COST * 5);
14045   format %{ "fcvts  $dst, $src \t// f2d" %}
14046 
14047   ins_encode %{
14048     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14049   %}
14050 
14051   ins_pipe(fp_f2d);
14052 %}
14053 
14054 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14055   match(Set dst (ConvF2I src));
14056 
14057   ins_cost(INSN_COST * 5);
14058   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
14059 
14060   ins_encode %{
14061     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14062   %}
14063 
14064   ins_pipe(fp_f2i);
14065 %}
14066 
14067 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
14068   match(Set dst (ConvF2L src));
14069 
14070   ins_cost(INSN_COST * 5);
14071   format %{ "fcvtzs  $dst, $src \t// f2l" %}
14072 
14073   ins_encode %{
14074     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14075   %}
14076 
14077   ins_pipe(fp_f2l);
14078 %}
14079 
14080 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
14081   match(Set dst (ConvI2F src));
14082 
14083   ins_cost(INSN_COST * 5);
14084   format %{ "scvtfws  $dst, $src \t// i2f" %}
14085 
14086   ins_encode %{
14087     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14088   %}
14089 
14090   ins_pipe(fp_i2f);
14091 %}
14092 
14093 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14094   match(Set dst (ConvL2F src));
14095 
14096   ins_cost(INSN_COST * 5);
14097   format %{ "scvtfs  $dst, $src \t// l2f" %}
14098 
14099   ins_encode %{
14100     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14101   %}
14102 
14103   ins_pipe(fp_l2f);
14104 %}
14105 
14106 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14107   match(Set dst (ConvD2I src));
14108 
14109   ins_cost(INSN_COST * 5);
14110   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14111 
14112   ins_encode %{
14113     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14114   %}
14115 
14116   ins_pipe(fp_d2i);
14117 %}
14118 
14119 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14120   match(Set dst (ConvD2L src));
14121 
14122   ins_cost(INSN_COST * 5);
14123   format %{ "fcvtzd  $dst, $src \t// d2l" %}
14124 
14125   ins_encode %{
14126     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14127   %}
14128 
14129   ins_pipe(fp_d2l);
14130 %}
14131 
14132 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14133   match(Set dst (ConvI2D src));
14134 
14135   ins_cost(INSN_COST * 5);
14136   format %{ "scvtfwd  $dst, $src \t// i2d" %}
14137 
14138   ins_encode %{
14139     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14140   %}
14141 
14142   ins_pipe(fp_i2d);
14143 %}
14144 
14145 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14146   match(Set dst (ConvL2D src));
14147 
14148   ins_cost(INSN_COST * 5);
14149   format %{ "scvtfd  $dst, $src \t// l2d" %}
14150 
14151   ins_encode %{
14152     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14153   %}
14154 
14155   ins_pipe(fp_l2d);
14156 %}
14157 
14158 // stack <-> reg and reg <-> reg shuffles with no conversion
14159 
14160 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14161 
14162   match(Set dst (MoveF2I src));
14163 
14164   effect(DEF dst, USE src);
14165 
14166   ins_cost(4 * INSN_COST);
14167 
14168   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14169 
14170   ins_encode %{
14171     __ ldrw($dst$$Register, Address(sp, $src$$disp));
14172   %}
14173 
14174   ins_pipe(iload_reg_reg);
14175 
14176 %}
14177 
14178 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14179 
14180   match(Set dst (MoveI2F src));
14181 
14182   effect(DEF dst, USE src);
14183 
14184   ins_cost(4 * INSN_COST);
14185 
14186   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14187 
14188   ins_encode %{
14189     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14190   %}
14191 
14192   ins_pipe(pipe_class_memory);
14193 
14194 %}
14195 
14196 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14197 
14198   match(Set dst (MoveD2L src));
14199 
14200   effect(DEF dst, USE src);
14201 
14202   ins_cost(4 * INSN_COST);
14203 
14204   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14205 
14206   ins_encode %{
14207     __ ldr($dst$$Register, Address(sp, $src$$disp));
14208   %}
14209 
14210   ins_pipe(iload_reg_reg);
14211 
14212 %}
14213 
14214 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14215 
14216   match(Set dst (MoveL2D src));
14217 
14218   effect(DEF dst, USE src);
14219 
14220   ins_cost(4 * INSN_COST);
14221 
14222   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14223 
14224   ins_encode %{
14225     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14226   %}
14227 
14228   ins_pipe(pipe_class_memory);
14229 
14230 %}
14231 
14232 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14233 
14234   match(Set dst (MoveF2I src));
14235 
14236   effect(DEF dst, USE src);
14237 
14238   ins_cost(INSN_COST);
14239 
14240   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14241 
14242   ins_encode %{
14243     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14244   %}
14245 
14246   ins_pipe(pipe_class_memory);
14247 
14248 %}
14249 
14250 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14251 
14252   match(Set dst (MoveI2F src));
14253 
14254   effect(DEF dst, USE src);
14255 
14256   ins_cost(INSN_COST);
14257 
14258   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14259 
14260   ins_encode %{
14261     __ strw($src$$Register, Address(sp, $dst$$disp));
14262   %}
14263 
14264   ins_pipe(istore_reg_reg);
14265 
14266 %}
14267 
14268 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14269 
14270   match(Set dst (MoveD2L src));
14271 
14272   effect(DEF dst, USE src);
14273 
14274   ins_cost(INSN_COST);
14275 
14276   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14277 
14278   ins_encode %{
14279     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14280   %}
14281 
14282   ins_pipe(pipe_class_memory);
14283 
14284 %}
14285 
14286 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14287 
14288   match(Set dst (MoveL2D src));
14289 
14290   effect(DEF dst, USE src);
14291 
14292   ins_cost(INSN_COST);
14293 
14294   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14295 
14296   ins_encode %{
14297     __ str($src$$Register, Address(sp, $dst$$disp));
14298   %}
14299 
14300   ins_pipe(istore_reg_reg);
14301 
14302 %}
14303 
14304 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14305 
14306   match(Set dst (MoveF2I src));
14307 
14308   effect(DEF dst, USE src);
14309 
14310   ins_cost(INSN_COST);
14311 
14312   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14313 
14314   ins_encode %{
14315     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14316   %}
14317 
14318   ins_pipe(fp_f2i);
14319 
14320 %}
14321 
14322 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14323 
14324   match(Set dst (MoveI2F src));
14325 
14326   effect(DEF dst, USE src);
14327 
14328   ins_cost(INSN_COST);
14329 
14330   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14331 
14332   ins_encode %{
14333     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14334   %}
14335 
14336   ins_pipe(fp_i2f);
14337 
14338 %}
14339 
14340 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14341 
14342   match(Set dst (MoveD2L src));
14343 
14344   effect(DEF dst, USE src);
14345 
14346   ins_cost(INSN_COST);
14347 
14348   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14349 
14350   ins_encode %{
14351     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14352   %}
14353 
14354   ins_pipe(fp_d2l);
14355 
14356 %}
14357 
14358 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14359 
14360   match(Set dst (MoveL2D src));
14361 
14362   effect(DEF dst, USE src);
14363 
14364   ins_cost(INSN_COST);
14365 
14366   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14367 
14368   ins_encode %{
14369     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14370   %}
14371 
14372   ins_pipe(fp_l2d);
14373 
14374 %}
14375 
14376 // ============================================================================
14377 // clearing of an array
14378 
14379 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14380 %{
14381   match(Set dummy (ClearArray cnt base));
14382   effect(USE_KILL cnt, USE_KILL base);
14383 
14384   ins_cost(4 * INSN_COST);
14385   format %{ "ClearArray $cnt, $base" %}
14386 
14387   ins_encode %{
14388     __ zero_words($base$$Register, $cnt$$Register);
14389   %}
14390 
14391   ins_pipe(pipe_class_memory);
14392 %}
14393 
14394 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14395 %{
14396   predicate((u_int64_t)n->in(2)->get_long()
14397             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14398   match(Set dummy (ClearArray cnt base));
14399   effect(USE_KILL base);
14400 
14401   ins_cost(4 * INSN_COST);
14402   format %{ "ClearArray $cnt, $base" %}
14403 
14404   ins_encode %{
14405     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14406   %}
14407 
14408   ins_pipe(pipe_class_memory);
14409 %}
14410 
14411 // ============================================================================
14412 // Overflow Math Instructions
14413 
14414 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14415 %{
14416   match(Set cr (OverflowAddI op1 op2));
14417 
14418   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14419   ins_cost(INSN_COST);
14420   ins_encode %{
14421     __ cmnw($op1$$Register, $op2$$Register);
14422   %}
14423 
14424   ins_pipe(icmp_reg_reg);
14425 %}
14426 
14427 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14428 %{
14429   match(Set cr (OverflowAddI op1 op2));
14430 
14431   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14432   ins_cost(INSN_COST);
14433   ins_encode %{
14434     __ cmnw($op1$$Register, $op2$$constant);
14435   %}
14436 
14437   ins_pipe(icmp_reg_imm);
14438 %}
14439 
14440 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14441 %{
14442   match(Set cr (OverflowAddL op1 op2));
14443 
14444   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14445   ins_cost(INSN_COST);
14446   ins_encode %{
14447     __ cmn($op1$$Register, $op2$$Register);
14448   %}
14449 
14450   ins_pipe(icmp_reg_reg);
14451 %}
14452 
14453 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14454 %{
14455   match(Set cr (OverflowAddL op1 op2));
14456 
14457   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14458   ins_cost(INSN_COST);
14459   ins_encode %{
14460     __ cmn($op1$$Register, $op2$$constant);
14461   %}
14462 
14463   ins_pipe(icmp_reg_imm);
14464 %}
14465 
14466 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14467 %{
14468   match(Set cr (OverflowSubI op1 op2));
14469 
14470   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14471   ins_cost(INSN_COST);
14472   ins_encode %{
14473     __ cmpw($op1$$Register, $op2$$Register);
14474   %}
14475 
14476   ins_pipe(icmp_reg_reg);
14477 %}
14478 
14479 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14480 %{
14481   match(Set cr (OverflowSubI op1 op2));
14482 
14483   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14484   ins_cost(INSN_COST);
14485   ins_encode %{
14486     __ cmpw($op1$$Register, $op2$$constant);
14487   %}
14488 
14489   ins_pipe(icmp_reg_imm);
14490 %}
14491 
14492 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14493 %{
14494   match(Set cr (OverflowSubL op1 op2));
14495 
14496   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14497   ins_cost(INSN_COST);
14498   ins_encode %{
14499     __ cmp($op1$$Register, $op2$$Register);
14500   %}
14501 
14502   ins_pipe(icmp_reg_reg);
14503 %}
14504 
14505 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14506 %{
14507   match(Set cr (OverflowSubL op1 op2));
14508 
14509   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14510   ins_cost(INSN_COST);
14511   ins_encode %{
14512     __ subs(zr, $op1$$Register, $op2$$constant);
14513   %}
14514 
14515   ins_pipe(icmp_reg_imm);
14516 %}
14517 
14518 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14519 %{
14520   match(Set cr (OverflowSubI zero op1));
14521 
14522   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14523   ins_cost(INSN_COST);
14524   ins_encode %{
14525     __ cmpw(zr, $op1$$Register);
14526   %}
14527 
14528   ins_pipe(icmp_reg_imm);
14529 %}
14530 
14531 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14532 %{
14533   match(Set cr (OverflowSubL zero op1));
14534 
14535   format %{ "cmp   zr, $op1\t# overflow check long" %}
14536   ins_cost(INSN_COST);
14537   ins_encode %{
14538     __ cmp(zr, $op1$$Register);
14539   %}
14540 
14541   ins_pipe(icmp_reg_imm);
14542 %}
14543 
14544 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14545 %{
14546   match(Set cr (OverflowMulI op1 op2));
14547 
14548   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14549             "cmp   rscratch1, rscratch1, sxtw\n\t"
14550             "movw  rscratch1, #0x80000000\n\t"
14551             "cselw rscratch1, rscratch1, zr, NE\n\t"
14552             "cmpw  rscratch1, #1" %}
14553   ins_cost(5 * INSN_COST);
14554   ins_encode %{
14555     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14556     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14557     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14558     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14559     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14560   %}
14561 
14562   ins_pipe(pipe_slow);
14563 %}
14564 
14565 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14566 %{
14567   match(If cmp (OverflowMulI op1 op2));
14568   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14569             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14570   effect(USE labl, KILL cr);
14571 
14572   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14573             "cmp   rscratch1, rscratch1, sxtw\n\t"
14574             "b$cmp   $labl" %}
14575   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14576   ins_encode %{
14577     Label* L = $labl$$label;
14578     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14579     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14580     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14581     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14582   %}
14583 
14584   ins_pipe(pipe_serial);
14585 %}
14586 
14587 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14588 %{
14589   match(Set cr (OverflowMulL op1 op2));
14590 
14591   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14592             "smulh rscratch2, $op1, $op2\n\t"
14593             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14594             "movw  rscratch1, #0x80000000\n\t"
14595             "cselw rscratch1, rscratch1, zr, NE\n\t"
14596             "cmpw  rscratch1, #1" %}
14597   ins_cost(6 * INSN_COST);
14598   ins_encode %{
14599     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14600     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14601     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14602     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14603     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14604     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14605   %}
14606 
14607   ins_pipe(pipe_slow);
14608 %}
14609 
14610 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14611 %{
14612   match(If cmp (OverflowMulL op1 op2));
14613   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14614             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14615   effect(USE labl, KILL cr);
14616 
14617   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14618             "smulh rscratch2, $op1, $op2\n\t"
14619             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14620             "b$cmp $labl" %}
14621   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14622   ins_encode %{
14623     Label* L = $labl$$label;
14624     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14625     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14626     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14627     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14628     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14629   %}
14630 
14631   ins_pipe(pipe_serial);
14632 %}
14633 
14634 // ============================================================================
14635 // Compare Instructions
14636 
14637 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14638 %{
14639   match(Set cr (CmpI op1 op2));
14640 
14641   effect(DEF cr, USE op1, USE op2);
14642 
14643   ins_cost(INSN_COST);
14644   format %{ "cmpw  $op1, $op2" %}
14645 
14646   ins_encode(aarch64_enc_cmpw(op1, op2));
14647 
14648   ins_pipe(icmp_reg_reg);
14649 %}
14650 
14651 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14652 %{
14653   match(Set cr (CmpI op1 zero));
14654 
14655   effect(DEF cr, USE op1);
14656 
14657   ins_cost(INSN_COST);
14658   format %{ "cmpw $op1, 0" %}
14659 
14660   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14661 
14662   ins_pipe(icmp_reg_imm);
14663 %}
14664 
14665 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14666 %{
14667   match(Set cr (CmpI op1 op2));
14668 
14669   effect(DEF cr, USE op1);
14670 
14671   ins_cost(INSN_COST);
14672   format %{ "cmpw  $op1, $op2" %}
14673 
14674   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14675 
14676   ins_pipe(icmp_reg_imm);
14677 %}
14678 
14679 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14680 %{
14681   match(Set cr (CmpI op1 op2));
14682 
14683   effect(DEF cr, USE op1);
14684 
14685   ins_cost(INSN_COST * 2);
14686   format %{ "cmpw  $op1, $op2" %}
14687 
14688   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14689 
14690   ins_pipe(icmp_reg_imm);
14691 %}
14692 
14693 // Unsigned compare Instructions; really, same as signed compare
14694 // except it should only be used to feed an If or a CMovI which takes a
14695 // cmpOpU.
14696 
14697 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14698 %{
14699   match(Set cr (CmpU op1 op2));
14700 
14701   effect(DEF cr, USE op1, USE op2);
14702 
14703   ins_cost(INSN_COST);
14704   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14705 
14706   ins_encode(aarch64_enc_cmpw(op1, op2));
14707 
14708   ins_pipe(icmp_reg_reg);
14709 %}
14710 
14711 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14712 %{
14713   match(Set cr (CmpU op1 zero));
14714 
14715   effect(DEF cr, USE op1);
14716 
14717   ins_cost(INSN_COST);
14718   format %{ "cmpw $op1, #0\t# unsigned" %}
14719 
14720   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14721 
14722   ins_pipe(icmp_reg_imm);
14723 %}
14724 
14725 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14726 %{
14727   match(Set cr (CmpU op1 op2));
14728 
14729   effect(DEF cr, USE op1);
14730 
14731   ins_cost(INSN_COST);
14732   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14733 
14734   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14735 
14736   ins_pipe(icmp_reg_imm);
14737 %}
14738 
14739 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14740 %{
14741   match(Set cr (CmpU op1 op2));
14742 
14743   effect(DEF cr, USE op1);
14744 
14745   ins_cost(INSN_COST * 2);
14746   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14747 
14748   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14749 
14750   ins_pipe(icmp_reg_imm);
14751 %}
14752 
14753 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14754 %{
14755   match(Set cr (CmpL op1 op2));
14756 
14757   effect(DEF cr, USE op1, USE op2);
14758 
14759   ins_cost(INSN_COST);
14760   format %{ "cmp  $op1, $op2" %}
14761 
14762   ins_encode(aarch64_enc_cmp(op1, op2));
14763 
14764   ins_pipe(icmp_reg_reg);
14765 %}
14766 
14767 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
14768 %{
14769   match(Set cr (CmpL op1 zero));
14770 
14771   effect(DEF cr, USE op1);
14772 
14773   ins_cost(INSN_COST);
14774   format %{ "tst  $op1" %}
14775 
14776   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14777 
14778   ins_pipe(icmp_reg_imm);
14779 %}
14780 
14781 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14782 %{
14783   match(Set cr (CmpL op1 op2));
14784 
14785   effect(DEF cr, USE op1);
14786 
14787   ins_cost(INSN_COST);
14788   format %{ "cmp  $op1, $op2" %}
14789 
14790   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14791 
14792   ins_pipe(icmp_reg_imm);
14793 %}
14794 
14795 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14796 %{
14797   match(Set cr (CmpL op1 op2));
14798 
14799   effect(DEF cr, USE op1);
14800 
14801   ins_cost(INSN_COST * 2);
14802   format %{ "cmp  $op1, $op2" %}
14803 
14804   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14805 
14806   ins_pipe(icmp_reg_imm);
14807 %}
14808 
14809 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
14810 %{
14811   match(Set cr (CmpUL op1 op2));
14812 
14813   effect(DEF cr, USE op1, USE op2);
14814 
14815   ins_cost(INSN_COST);
14816   format %{ "cmp  $op1, $op2" %}
14817 
14818   ins_encode(aarch64_enc_cmp(op1, op2));
14819 
14820   ins_pipe(icmp_reg_reg);
14821 %}
14822 
14823 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
14824 %{
14825   match(Set cr (CmpUL op1 zero));
14826 
14827   effect(DEF cr, USE op1);
14828 
14829   ins_cost(INSN_COST);
14830   format %{ "tst  $op1" %}
14831 
14832   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14833 
14834   ins_pipe(icmp_reg_imm);
14835 %}
14836 
14837 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
14838 %{
14839   match(Set cr (CmpUL op1 op2));
14840 
14841   effect(DEF cr, USE op1);
14842 
14843   ins_cost(INSN_COST);
14844   format %{ "cmp  $op1, $op2" %}
14845 
14846   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14847 
14848   ins_pipe(icmp_reg_imm);
14849 %}
14850 
14851 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
14852 %{
14853   match(Set cr (CmpUL op1 op2));
14854 
14855   effect(DEF cr, USE op1);
14856 
14857   ins_cost(INSN_COST * 2);
14858   format %{ "cmp  $op1, $op2" %}
14859 
14860   ins_encode(aarch64_enc_cmp_imm(op1, op2));
14861 
14862   ins_pipe(icmp_reg_imm);
14863 %}
14864 
14865 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14866 %{
14867   match(Set cr (CmpP op1 op2));
14868 
14869   effect(DEF cr, USE op1, USE op2);
14870 
14871   ins_cost(INSN_COST);
14872   format %{ "cmp  $op1, $op2\t // ptr" %}
14873 
14874   ins_encode(aarch64_enc_cmpp(op1, op2));
14875 
14876   ins_pipe(icmp_reg_reg);
14877 %}
14878 
14879 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14880 %{
14881   match(Set cr (CmpN op1 op2));
14882 
14883   effect(DEF cr, USE op1, USE op2);
14884 
14885   ins_cost(INSN_COST);
14886   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14887 
14888   ins_encode(aarch64_enc_cmpn(op1, op2));
14889 
14890   ins_pipe(icmp_reg_reg);
14891 %}
14892 
14893 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14894 %{
14895   match(Set cr (CmpP op1 zero));
14896 
14897   effect(DEF cr, USE op1, USE zero);
14898 
14899   ins_cost(INSN_COST);
14900   format %{ "cmp  $op1, 0\t // ptr" %}
14901 
14902   ins_encode(aarch64_enc_testp(op1));
14903 
14904   ins_pipe(icmp_reg_imm);
14905 %}
14906 
14907 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14908 %{
14909   match(Set cr (CmpN op1 zero));
14910 
14911   effect(DEF cr, USE op1, USE zero);
14912 
14913   ins_cost(INSN_COST);
14914   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14915 
14916   ins_encode(aarch64_enc_testn(op1));
14917 
14918   ins_pipe(icmp_reg_imm);
14919 %}
14920 
14921 // FP comparisons
14922 //
14923 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14924 // using normal cmpOp. See declaration of rFlagsReg for details.
14925 
14926 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14927 %{
14928   match(Set cr (CmpF src1 src2));
14929 
14930   ins_cost(3 * INSN_COST);
14931   format %{ "fcmps $src1, $src2" %}
14932 
14933   ins_encode %{
14934     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14935   %}
14936 
14937   ins_pipe(pipe_class_compare);
14938 %}
14939 
14940 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14941 %{
14942   match(Set cr (CmpF src1 src2));
14943 
14944   ins_cost(3 * INSN_COST);
14945   format %{ "fcmps $src1, 0.0" %}
14946 
14947   ins_encode %{
14948     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14949   %}
14950 
14951   ins_pipe(pipe_class_compare);
14952 %}
14953 // FROM HERE
14954 
14955 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14956 %{
14957   match(Set cr (CmpD src1 src2));
14958 
14959   ins_cost(3 * INSN_COST);
14960   format %{ "fcmpd $src1, $src2" %}
14961 
14962   ins_encode %{
14963     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14964   %}
14965 
14966   ins_pipe(pipe_class_compare);
14967 %}
14968 
14969 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14970 %{
14971   match(Set cr (CmpD src1 src2));
14972 
14973   ins_cost(3 * INSN_COST);
14974   format %{ "fcmpd $src1, 0.0" %}
14975 
14976   ins_encode %{
14977     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14978   %}
14979 
14980   ins_pipe(pipe_class_compare);
14981 %}
14982 
14983 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14984 %{
14985   match(Set dst (CmpF3 src1 src2));
14986   effect(KILL cr);
14987 
14988   ins_cost(5 * INSN_COST);
14989   format %{ "fcmps $src1, $src2\n\t"
14990             "csinvw($dst, zr, zr, eq\n\t"
14991             "csnegw($dst, $dst, $dst, lt)"
14992   %}
14993 
14994   ins_encode %{
14995     Label done;
14996     FloatRegister s1 = as_FloatRegister($src1$$reg);
14997     FloatRegister s2 = as_FloatRegister($src2$$reg);
14998     Register d = as_Register($dst$$reg);
14999     __ fcmps(s1, s2);
15000     // installs 0 if EQ else -1
15001     __ csinvw(d, zr, zr, Assembler::EQ);
15002     // keeps -1 if less or unordered else installs 1
15003     __ csnegw(d, d, d, Assembler::LT);
15004     __ bind(done);
15005   %}
15006 
15007   ins_pipe(pipe_class_default);
15008 
15009 %}
15010 
15011 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
15012 %{
15013   match(Set dst (CmpD3 src1 src2));
15014   effect(KILL cr);
15015 
15016   ins_cost(5 * INSN_COST);
15017   format %{ "fcmpd $src1, $src2\n\t"
15018             "csinvw($dst, zr, zr, eq\n\t"
15019             "csnegw($dst, $dst, $dst, lt)"
15020   %}
15021 
15022   ins_encode %{
15023     Label done;
15024     FloatRegister s1 = as_FloatRegister($src1$$reg);
15025     FloatRegister s2 = as_FloatRegister($src2$$reg);
15026     Register d = as_Register($dst$$reg);
15027     __ fcmpd(s1, s2);
15028     // installs 0 if EQ else -1
15029     __ csinvw(d, zr, zr, Assembler::EQ);
15030     // keeps -1 if less or unordered else installs 1
15031     __ csnegw(d, d, d, Assembler::LT);
15032     __ bind(done);
15033   %}
15034   ins_pipe(pipe_class_default);
15035 
15036 %}
15037 
15038 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
15039 %{
15040   match(Set dst (CmpF3 src1 zero));
15041   effect(KILL cr);
15042 
15043   ins_cost(5 * INSN_COST);
15044   format %{ "fcmps $src1, 0.0\n\t"
15045             "csinvw($dst, zr, zr, eq\n\t"
15046             "csnegw($dst, $dst, $dst, lt)"
15047   %}
15048 
15049   ins_encode %{
15050     Label done;
15051     FloatRegister s1 = as_FloatRegister($src1$$reg);
15052     Register d = as_Register($dst$$reg);
15053     __ fcmps(s1, 0.0D);
15054     // installs 0 if EQ else -1
15055     __ csinvw(d, zr, zr, Assembler::EQ);
15056     // keeps -1 if less or unordered else installs 1
15057     __ csnegw(d, d, d, Assembler::LT);
15058     __ bind(done);
15059   %}
15060 
15061   ins_pipe(pipe_class_default);
15062 
15063 %}
15064 
15065 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
15066 %{
15067   match(Set dst (CmpD3 src1 zero));
15068   effect(KILL cr);
15069 
15070   ins_cost(5 * INSN_COST);
15071   format %{ "fcmpd $src1, 0.0\n\t"
15072             "csinvw($dst, zr, zr, eq\n\t"
15073             "csnegw($dst, $dst, $dst, lt)"
15074   %}
15075 
15076   ins_encode %{
15077     Label done;
15078     FloatRegister s1 = as_FloatRegister($src1$$reg);
15079     Register d = as_Register($dst$$reg);
15080     __ fcmpd(s1, 0.0D);
15081     // installs 0 if EQ else -1
15082     __ csinvw(d, zr, zr, Assembler::EQ);
15083     // keeps -1 if less or unordered else installs 1
15084     __ csnegw(d, d, d, Assembler::LT);
15085     __ bind(done);
15086   %}
15087   ins_pipe(pipe_class_default);
15088 
15089 %}
15090 
15091 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15092 %{
15093   match(Set dst (CmpLTMask p q));
15094   effect(KILL cr);
15095 
15096   ins_cost(3 * INSN_COST);
15097 
15098   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15099             "csetw $dst, lt\n\t"
15100             "subw $dst, zr, $dst"
15101   %}
15102 
15103   ins_encode %{
15104     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15105     __ csetw(as_Register($dst$$reg), Assembler::LT);
15106     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15107   %}
15108 
15109   ins_pipe(ialu_reg_reg);
15110 %}
15111 
15112 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15113 %{
15114   match(Set dst (CmpLTMask src zero));
15115   effect(KILL cr);
15116 
15117   ins_cost(INSN_COST);
15118 
15119   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15120 
15121   ins_encode %{
15122     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15123   %}
15124 
15125   ins_pipe(ialu_reg_shift);
15126 %}
15127 
15128 // ============================================================================
15129 // Max and Min
15130 
15131 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15132 %{
15133   match(Set dst (MinI src1 src2));
15134 
15135   effect(DEF dst, USE src1, USE src2, KILL cr);
15136   size(8);
15137 
15138   ins_cost(INSN_COST * 3);
15139   format %{
15140     "cmpw $src1 $src2\t signed int\n\t"
15141     "cselw $dst, $src1, $src2 lt\t"
15142   %}
15143 
15144   ins_encode %{
15145     __ cmpw(as_Register($src1$$reg),
15146             as_Register($src2$$reg));
15147     __ cselw(as_Register($dst$$reg),
15148              as_Register($src1$$reg),
15149              as_Register($src2$$reg),
15150              Assembler::LT);
15151   %}
15152 
15153   ins_pipe(ialu_reg_reg);
15154 %}
15155 // FROM HERE
15156 
15157 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15158 %{
15159   match(Set dst (MaxI src1 src2));
15160 
15161   effect(DEF dst, USE src1, USE src2, KILL cr);
15162   size(8);
15163 
15164   ins_cost(INSN_COST * 3);
15165   format %{
15166     "cmpw $src1 $src2\t signed int\n\t"
15167     "cselw $dst, $src1, $src2 gt\t"
15168   %}
15169 
15170   ins_encode %{
15171     __ cmpw(as_Register($src1$$reg),
15172             as_Register($src2$$reg));
15173     __ cselw(as_Register($dst$$reg),
15174              as_Register($src1$$reg),
15175              as_Register($src2$$reg),
15176              Assembler::GT);
15177   %}
15178 
15179   ins_pipe(ialu_reg_reg);
15180 %}
15181 
15182 // ============================================================================
15183 // Branch Instructions
15184 
15185 // Direct Branch.
15186 instruct branch(label lbl)
15187 %{
15188   match(Goto);
15189 
15190   effect(USE lbl);
15191 
15192   ins_cost(BRANCH_COST);
15193   format %{ "b  $lbl" %}
15194 
15195   ins_encode(aarch64_enc_b(lbl));
15196 
15197   ins_pipe(pipe_branch);
15198 %}
15199 
15200 // Conditional Near Branch
15201 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15202 %{
15203   // Same match rule as `branchConFar'.
15204   match(If cmp cr);
15205 
15206   effect(USE lbl);
15207 
15208   ins_cost(BRANCH_COST);
15209   // If set to 1 this indicates that the current instruction is a
15210   // short variant of a long branch. This avoids using this
15211   // instruction in first-pass matching. It will then only be used in
15212   // the `Shorten_branches' pass.
15213   // ins_short_branch(1);
15214   format %{ "b$cmp  $lbl" %}
15215 
15216   ins_encode(aarch64_enc_br_con(cmp, lbl));
15217 
15218   ins_pipe(pipe_branch_cond);
15219 %}
15220 
15221 // Conditional Near Branch Unsigned
15222 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15223 %{
15224   // Same match rule as `branchConFar'.
15225   match(If cmp cr);
15226 
15227   effect(USE lbl);
15228 
15229   ins_cost(BRANCH_COST);
15230   // If set to 1 this indicates that the current instruction is a
15231   // short variant of a long branch. This avoids using this
15232   // instruction in first-pass matching. It will then only be used in
15233   // the `Shorten_branches' pass.
15234   // ins_short_branch(1);
15235   format %{ "b$cmp  $lbl\t# unsigned" %}
15236 
15237   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15238 
15239   ins_pipe(pipe_branch_cond);
15240 %}
15241 
15242 // Make use of CBZ and CBNZ.  These instructions, as well as being
15243 // shorter than (cmp; branch), have the additional benefit of not
15244 // killing the flags.
15245 
15246 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15247   match(If cmp (CmpI op1 op2));
15248   effect(USE labl);
15249 
15250   ins_cost(BRANCH_COST);
15251   format %{ "cbw$cmp   $op1, $labl" %}
15252   ins_encode %{
15253     Label* L = $labl$$label;
15254     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15255     if (cond == Assembler::EQ)
15256       __ cbzw($op1$$Register, *L);
15257     else
15258       __ cbnzw($op1$$Register, *L);
15259   %}
15260   ins_pipe(pipe_cmp_branch);
15261 %}
15262 
15263 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15264   match(If cmp (CmpL op1 op2));
15265   effect(USE labl);
15266 
15267   ins_cost(BRANCH_COST);
15268   format %{ "cb$cmp   $op1, $labl" %}
15269   ins_encode %{
15270     Label* L = $labl$$label;
15271     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15272     if (cond == Assembler::EQ)
15273       __ cbz($op1$$Register, *L);
15274     else
15275       __ cbnz($op1$$Register, *L);
15276   %}
15277   ins_pipe(pipe_cmp_branch);
15278 %}
15279 
15280 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15281   match(If cmp (CmpP op1 op2));
15282   effect(USE labl);
15283 
15284   ins_cost(BRANCH_COST);
15285   format %{ "cb$cmp   $op1, $labl" %}
15286   ins_encode %{
15287     Label* L = $labl$$label;
15288     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15289     if (cond == Assembler::EQ)
15290       __ cbz($op1$$Register, *L);
15291     else
15292       __ cbnz($op1$$Register, *L);
15293   %}
15294   ins_pipe(pipe_cmp_branch);
15295 %}
15296 
15297 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15298   match(If cmp (CmpN op1 op2));
15299   effect(USE labl);
15300 
15301   ins_cost(BRANCH_COST);
15302   format %{ "cbw$cmp   $op1, $labl" %}
15303   ins_encode %{
15304     Label* L = $labl$$label;
15305     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15306     if (cond == Assembler::EQ)
15307       __ cbzw($op1$$Register, *L);
15308     else
15309       __ cbnzw($op1$$Register, *L);
15310   %}
15311   ins_pipe(pipe_cmp_branch);
15312 %}
15313 
15314 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15315   match(If cmp (CmpP (DecodeN oop) zero));
15316   effect(USE labl);
15317 
15318   ins_cost(BRANCH_COST);
15319   format %{ "cb$cmp   $oop, $labl" %}
15320   ins_encode %{
15321     Label* L = $labl$$label;
15322     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15323     if (cond == Assembler::EQ)
15324       __ cbzw($oop$$Register, *L);
15325     else
15326       __ cbnzw($oop$$Register, *L);
15327   %}
15328   ins_pipe(pipe_cmp_branch);
15329 %}
15330 
15331 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15332   match(If cmp (CmpU op1 op2));
15333   effect(USE labl);
15334 
15335   ins_cost(BRANCH_COST);
15336   format %{ "cbw$cmp   $op1, $labl" %}
15337   ins_encode %{
15338     Label* L = $labl$$label;
15339     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15340     if (cond == Assembler::EQ || cond == Assembler::LS)
15341       __ cbzw($op1$$Register, *L);
15342     else
15343       __ cbnzw($op1$$Register, *L);
15344   %}
15345   ins_pipe(pipe_cmp_branch);
15346 %}
15347 
15348 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15349   match(If cmp (CmpUL op1 op2));
15350   effect(USE labl);
15351 
15352   ins_cost(BRANCH_COST);
15353   format %{ "cb$cmp   $op1, $labl" %}
15354   ins_encode %{
15355     Label* L = $labl$$label;
15356     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15357     if (cond == Assembler::EQ || cond == Assembler::LS)
15358       __ cbz($op1$$Register, *L);
15359     else
15360       __ cbnz($op1$$Register, *L);
15361   %}
15362   ins_pipe(pipe_cmp_branch);
15363 %}
15364 
15365 // Test bit and Branch
15366 
15367 // Patterns for short (< 32KiB) variants
15368 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15369   match(If cmp (CmpL op1 op2));
15370   effect(USE labl);
15371 
15372   ins_cost(BRANCH_COST);
15373   format %{ "cb$cmp   $op1, $labl # long" %}
15374   ins_encode %{
15375     Label* L = $labl$$label;
15376     Assembler::Condition cond =
15377       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15378     __ tbr(cond, $op1$$Register, 63, *L);
15379   %}
15380   ins_pipe(pipe_cmp_branch);
15381   ins_short_branch(1);
15382 %}
15383 
15384 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15385   match(If cmp (CmpI op1 op2));
15386   effect(USE labl);
15387 
15388   ins_cost(BRANCH_COST);
15389   format %{ "cb$cmp   $op1, $labl # int" %}
15390   ins_encode %{
15391     Label* L = $labl$$label;
15392     Assembler::Condition cond =
15393       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15394     __ tbr(cond, $op1$$Register, 31, *L);
15395   %}
15396   ins_pipe(pipe_cmp_branch);
15397   ins_short_branch(1);
15398 %}
15399 
15400 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15401   match(If cmp (CmpL (AndL op1 op2) op3));
15402   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15403   effect(USE labl);
15404 
15405   ins_cost(BRANCH_COST);
15406   format %{ "tb$cmp   $op1, $op2, $labl" %}
15407   ins_encode %{
15408     Label* L = $labl$$label;
15409     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15410     int bit = exact_log2($op2$$constant);
15411     __ tbr(cond, $op1$$Register, bit, *L);
15412   %}
15413   ins_pipe(pipe_cmp_branch);
15414   ins_short_branch(1);
15415 %}
15416 
15417 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15418   match(If cmp (CmpI (AndI op1 op2) op3));
15419   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15420   effect(USE labl);
15421 
15422   ins_cost(BRANCH_COST);
15423   format %{ "tb$cmp   $op1, $op2, $labl" %}
15424   ins_encode %{
15425     Label* L = $labl$$label;
15426     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15427     int bit = exact_log2($op2$$constant);
15428     __ tbr(cond, $op1$$Register, bit, *L);
15429   %}
15430   ins_pipe(pipe_cmp_branch);
15431   ins_short_branch(1);
15432 %}
15433 
15434 // And far variants
15435 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15436   match(If cmp (CmpL op1 op2));
15437   effect(USE labl);
15438 
15439   ins_cost(BRANCH_COST);
15440   format %{ "cb$cmp   $op1, $labl # long" %}
15441   ins_encode %{
15442     Label* L = $labl$$label;
15443     Assembler::Condition cond =
15444       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15445     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15446   %}
15447   ins_pipe(pipe_cmp_branch);
15448 %}
15449 
15450 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15451   match(If cmp (CmpI op1 op2));
15452   effect(USE labl);
15453 
15454   ins_cost(BRANCH_COST);
15455   format %{ "cb$cmp   $op1, $labl # int" %}
15456   ins_encode %{
15457     Label* L = $labl$$label;
15458     Assembler::Condition cond =
15459       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15460     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15461   %}
15462   ins_pipe(pipe_cmp_branch);
15463 %}
15464 
15465 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15466   match(If cmp (CmpL (AndL op1 op2) op3));
15467   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15468   effect(USE labl);
15469 
15470   ins_cost(BRANCH_COST);
15471   format %{ "tb$cmp   $op1, $op2, $labl" %}
15472   ins_encode %{
15473     Label* L = $labl$$label;
15474     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15475     int bit = exact_log2($op2$$constant);
15476     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15477   %}
15478   ins_pipe(pipe_cmp_branch);
15479 %}
15480 
15481 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15482   match(If cmp (CmpI (AndI op1 op2) op3));
15483   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15484   effect(USE labl);
15485 
15486   ins_cost(BRANCH_COST);
15487   format %{ "tb$cmp   $op1, $op2, $labl" %}
15488   ins_encode %{
15489     Label* L = $labl$$label;
15490     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15491     int bit = exact_log2($op2$$constant);
15492     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15493   %}
15494   ins_pipe(pipe_cmp_branch);
15495 %}
15496 
15497 // Test bits
15498 
15499 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15500   match(Set cr (CmpL (AndL op1 op2) op3));
15501   predicate(Assembler::operand_valid_for_logical_immediate
15502             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15503 
15504   ins_cost(INSN_COST);
15505   format %{ "tst $op1, $op2 # long" %}
15506   ins_encode %{
15507     __ tst($op1$$Register, $op2$$constant);
15508   %}
15509   ins_pipe(ialu_reg_reg);
15510 %}
15511 
15512 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15513   match(Set cr (CmpI (AndI op1 op2) op3));
15514   predicate(Assembler::operand_valid_for_logical_immediate
15515             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15516 
15517   ins_cost(INSN_COST);
15518   format %{ "tst $op1, $op2 # int" %}
15519   ins_encode %{
15520     __ tstw($op1$$Register, $op2$$constant);
15521   %}
15522   ins_pipe(ialu_reg_reg);
15523 %}
15524 
15525 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15526   match(Set cr (CmpL (AndL op1 op2) op3));
15527 
15528   ins_cost(INSN_COST);
15529   format %{ "tst $op1, $op2 # long" %}
15530   ins_encode %{
15531     __ tst($op1$$Register, $op2$$Register);
15532   %}
15533   ins_pipe(ialu_reg_reg);
15534 %}
15535 
15536 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15537   match(Set cr (CmpI (AndI op1 op2) op3));
15538 
15539   ins_cost(INSN_COST);
15540   format %{ "tstw $op1, $op2 # int" %}
15541   ins_encode %{
15542     __ tstw($op1$$Register, $op2$$Register);
15543   %}
15544   ins_pipe(ialu_reg_reg);
15545 %}
15546 
15547 
15548 // Conditional Far Branch
15549 // Conditional Far Branch Unsigned
15550 // TODO: fixme
15551 
15552 // counted loop end branch near
15553 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15554 %{
15555   match(CountedLoopEnd cmp cr);
15556 
15557   effect(USE lbl);
15558 
15559   ins_cost(BRANCH_COST);
15560   // short variant.
15561   // ins_short_branch(1);
15562   format %{ "b$cmp $lbl \t// counted loop end" %}
15563 
15564   ins_encode(aarch64_enc_br_con(cmp, lbl));
15565 
15566   ins_pipe(pipe_branch);
15567 %}
15568 
15569 // counted loop end branch near Unsigned
15570 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15571 %{
15572   match(CountedLoopEnd cmp cr);
15573 
15574   effect(USE lbl);
15575 
15576   ins_cost(BRANCH_COST);
15577   // short variant.
15578   // ins_short_branch(1);
15579   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15580 
15581   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15582 
15583   ins_pipe(pipe_branch);
15584 %}
15585 
15586 // counted loop end branch far
15587 // counted loop end branch far unsigned
15588 // TODO: fixme
15589 
15590 // ============================================================================
15591 // inlined locking and unlocking
15592 
15593 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15594 %{
15595   match(Set cr (FastLock object box));
15596   effect(TEMP tmp, TEMP tmp2);
15597 
15598   // TODO
15599   // identify correct cost
15600   ins_cost(5 * INSN_COST);
15601   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15602 
15603   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15604 
15605   ins_pipe(pipe_serial);
15606 %}
15607 
15608 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15609 %{
15610   match(Set cr (FastUnlock object box));
15611   effect(TEMP tmp, TEMP tmp2);
15612 
15613   ins_cost(5 * INSN_COST);
15614   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15615 
15616   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15617 
15618   ins_pipe(pipe_serial);
15619 %}
15620 
15621 
15622 // ============================================================================
15623 // Safepoint Instructions
15624 
15625 // TODO
15626 // provide a near and far version of this code
15627 
15628 instruct safePoint(iRegP poll)
15629 %{
15630   match(SafePoint poll);
15631 
15632   format %{
15633     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15634   %}
15635   ins_encode %{
15636     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15637   %}
15638   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15639 %}
15640 
15641 
15642 // ============================================================================
15643 // Procedure Call/Return Instructions
15644 
15645 // Call Java Static Instruction
15646 
15647 instruct CallStaticJavaDirect(method meth)
15648 %{
15649   match(CallStaticJava);
15650 
15651   effect(USE meth);
15652 
15653   ins_cost(CALL_COST);
15654 
15655   format %{ "call,static $meth \t// ==> " %}
15656 
15657   ins_encode( aarch64_enc_java_static_call(meth),
15658               aarch64_enc_call_epilog );
15659 
15660   ins_pipe(pipe_class_call);
15661 %}
15662 
15663 // TO HERE
15664 
15665 // Call Java Dynamic Instruction
15666 instruct CallDynamicJavaDirect(method meth)
15667 %{
15668   match(CallDynamicJava);
15669 
15670   effect(USE meth);
15671 
15672   ins_cost(CALL_COST);
15673 
15674   format %{ "CALL,dynamic $meth \t// ==> " %}
15675 
15676   ins_encode( aarch64_enc_java_dynamic_call(meth),
15677                aarch64_enc_call_epilog );
15678 
15679   ins_pipe(pipe_class_call);
15680 %}
15681 
15682 // Call Runtime Instruction
15683 
15684 instruct CallRuntimeDirect(method meth)
15685 %{
15686   match(CallRuntime);
15687 
15688   effect(USE meth);
15689 
15690   ins_cost(CALL_COST);
15691 
15692   format %{ "CALL, runtime $meth" %}
15693 
15694   ins_encode( aarch64_enc_java_to_runtime(meth) );
15695 
15696   ins_pipe(pipe_class_call);
15697 %}
15698 
15699 // Call Runtime Instruction
15700 
15701 instruct CallLeafDirect(method meth)
15702 %{
15703   match(CallLeaf);
15704 
15705   effect(USE meth);
15706 
15707   ins_cost(CALL_COST);
15708 
15709   format %{ "CALL, runtime leaf $meth" %}
15710 
15711   ins_encode( aarch64_enc_java_to_runtime(meth) );
15712 
15713   ins_pipe(pipe_class_call);
15714 %}
15715 
15716 // Call Runtime Instruction
15717 
15718 instruct CallLeafNoFPDirect(method meth)
15719 %{
15720   match(CallLeafNoFP);
15721 
15722   effect(USE meth);
15723 
15724   ins_cost(CALL_COST);
15725 
15726   format %{ "CALL, runtime leaf nofp $meth" %}
15727 
15728   ins_encode( aarch64_enc_java_to_runtime(meth) );
15729 
15730   ins_pipe(pipe_class_call);
15731 %}
15732 
15733 // Tail Call; Jump from runtime stub to Java code.
15734 // Also known as an 'interprocedural jump'.
15735 // Target of jump will eventually return to caller.
15736 // TailJump below removes the return address.
15737 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15738 %{
15739   match(TailCall jump_target method_oop);
15740 
15741   ins_cost(CALL_COST);
15742 
15743   format %{ "br $jump_target\t# $method_oop holds method oop" %}
15744 
15745   ins_encode(aarch64_enc_tail_call(jump_target));
15746 
15747   ins_pipe(pipe_class_call);
15748 %}
15749 
15750 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15751 %{
15752   match(TailJump jump_target ex_oop);
15753 
15754   ins_cost(CALL_COST);
15755 
15756   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15757 
15758   ins_encode(aarch64_enc_tail_jmp(jump_target));
15759 
15760   ins_pipe(pipe_class_call);
15761 %}
15762 
15763 // Create exception oop: created by stack-crawling runtime code.
15764 // Created exception is now available to this handler, and is setup
15765 // just prior to jumping to this handler. No code emitted.
15766 // TODO check
15767 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15768 instruct CreateException(iRegP_R0 ex_oop)
15769 %{
15770   match(Set ex_oop (CreateEx));
15771 
15772   format %{ " -- \t// exception oop; no code emitted" %}
15773 
15774   size(0);
15775 
15776   ins_encode( /*empty*/ );
15777 
15778   ins_pipe(pipe_class_empty);
15779 %}
15780 
15781 // Rethrow exception: The exception oop will come in the first
15782 // argument position. Then JUMP (not call) to the rethrow stub code.
15783 instruct RethrowException() %{
15784   match(Rethrow);
15785   ins_cost(CALL_COST);
15786 
15787   format %{ "b rethrow_stub" %}
15788 
15789   ins_encode( aarch64_enc_rethrow() );
15790 
15791   ins_pipe(pipe_class_call);
15792 %}
15793 
15794 
15795 // Return Instruction
15796 // epilog node loads ret address into lr as part of frame pop
15797 instruct Ret()
15798 %{
15799   match(Return);
15800 
15801   format %{ "ret\t// return register" %}
15802 
15803   ins_encode( aarch64_enc_ret() );
15804 
15805   ins_pipe(pipe_branch);
15806 %}
15807 
15808 // Die now.
15809 instruct ShouldNotReachHere() %{
15810   match(Halt);
15811 
15812   ins_cost(CALL_COST);
15813   format %{ "ShouldNotReachHere" %}
15814 
15815   ins_encode %{
15816     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
15817     // return true
15818     __ dpcs1(0xdead + 1);
15819   %}
15820 
15821   ins_pipe(pipe_class_default);
15822 %}
15823 
15824 // ============================================================================
15825 // Partial Subtype Check
15826 //
15827 // superklass array for an instance of the superklass.  Set a hidden
15828 // internal cache on a hit (cache is checked with exposed code in
15829 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
15830 // encoding ALSO sets flags.
15831 
15832 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15833 %{
15834   match(Set result (PartialSubtypeCheck sub super));
15835   effect(KILL cr, KILL temp);
15836 
15837   ins_cost(1100);  // slightly larger than the next version
15838   format %{ "partialSubtypeCheck $result, $sub, $super" %}
15839 
15840   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15841 
15842   opcode(0x1); // Force zero of result reg on hit
15843 
15844   ins_pipe(pipe_class_memory);
15845 %}
15846 
15847 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15848 %{
15849   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15850   effect(KILL temp, KILL result);
15851 
15852   ins_cost(1100);  // slightly larger than the next version
15853   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15854 
15855   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15856 
15857   opcode(0x0); // Don't zero result reg on hit
15858 
15859   ins_pipe(pipe_class_memory);
15860 %}
15861 
15862 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15863                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
15864 %{
15865   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15866   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15867   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15868 
15869   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15870   ins_encode %{
15871     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15872     __ string_compare($str1$$Register, $str2$$Register,
15873                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15874                       $tmp1$$Register, $tmp2$$Register,
15875                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
15876   %}
15877   ins_pipe(pipe_class_memory);
15878 %}
15879 
15880 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15881                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
15882 %{
15883   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15884   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15885   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15886 
15887   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
15888   ins_encode %{
15889     __ string_compare($str1$$Register, $str2$$Register,
15890                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15891                       $tmp1$$Register, $tmp2$$Register,
15892                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
15893   %}
15894   ins_pipe(pipe_class_memory);
15895 %}
15896 
15897 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15898                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
15899                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
15900 %{
15901   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15902   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15903   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
15904          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15905 
15906   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
15907   ins_encode %{
15908     __ string_compare($str1$$Register, $str2$$Register,
15909                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15910                       $tmp1$$Register, $tmp2$$Register,
15911                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
15912                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
15913   %}
15914   ins_pipe(pipe_class_memory);
15915 %}
15916 
15917 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15918                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
15919                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
15920 %{
15921   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15922   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15923   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
15924          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15925 
15926   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
15927   ins_encode %{
15928     __ string_compare($str1$$Register, $str2$$Register,
15929                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15930                       $tmp1$$Register, $tmp2$$Register,
15931                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
15932                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
15933   %}
15934   ins_pipe(pipe_class_memory);
15935 %}
15936 
15937 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15938        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15939        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15940 %{
15941   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15942   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15943   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15944          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15945   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15946 
15947   ins_encode %{
15948     __ string_indexof($str1$$Register, $str2$$Register,
15949                       $cnt1$$Register, $cnt2$$Register,
15950                       $tmp1$$Register, $tmp2$$Register,
15951                       $tmp3$$Register, $tmp4$$Register,
15952                       $tmp5$$Register, $tmp6$$Register,
15953                       -1, $result$$Register, StrIntrinsicNode::UU);
15954   %}
15955   ins_pipe(pipe_class_memory);
15956 %}
15957 
15958 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15959        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15960        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15961 %{
15962   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15963   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15964   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15965          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15966   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15967 
15968   ins_encode %{
15969     __ string_indexof($str1$$Register, $str2$$Register,
15970                       $cnt1$$Register, $cnt2$$Register,
15971                       $tmp1$$Register, $tmp2$$Register,
15972                       $tmp3$$Register, $tmp4$$Register,
15973                       $tmp5$$Register, $tmp6$$Register,
15974                       -1, $result$$Register, StrIntrinsicNode::LL);
15975   %}
15976   ins_pipe(pipe_class_memory);
15977 %}
15978 
15979 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15980        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15981        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15982 %{
15983   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15984   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15985   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15986          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15987   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15988 
15989   ins_encode %{
15990     __ string_indexof($str1$$Register, $str2$$Register,
15991                       $cnt1$$Register, $cnt2$$Register,
15992                       $tmp1$$Register, $tmp2$$Register,
15993                       $tmp3$$Register, $tmp4$$Register,
15994                       $tmp5$$Register, $tmp6$$Register,
15995                       -1, $result$$Register, StrIntrinsicNode::UL);
15996   %}
15997   ins_pipe(pipe_class_memory);
15998 %}
15999 
16000 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16001                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16002                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16003 %{
16004   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16005   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16006   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16007          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16008   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
16009 
16010   ins_encode %{
16011     int icnt2 = (int)$int_cnt2$$constant;
16012     __ string_indexof($str1$$Register, $str2$$Register,
16013                       $cnt1$$Register, zr,
16014                       $tmp1$$Register, $tmp2$$Register,
16015                       $tmp3$$Register, $tmp4$$Register, zr, zr,
16016                       icnt2, $result$$Register, StrIntrinsicNode::UU);
16017   %}
16018   ins_pipe(pipe_class_memory);
16019 %}
16020 
16021 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16022                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16023                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16024 %{
16025   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16026   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16027   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16028          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16029   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
16030 
16031   ins_encode %{
16032     int icnt2 = (int)$int_cnt2$$constant;
16033     __ string_indexof($str1$$Register, $str2$$Register,
16034                       $cnt1$$Register, zr,
16035                       $tmp1$$Register, $tmp2$$Register,
16036                       $tmp3$$Register, $tmp4$$Register, zr, zr,
16037                       icnt2, $result$$Register, StrIntrinsicNode::LL);
16038   %}
16039   ins_pipe(pipe_class_memory);
16040 %}
16041 
16042 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16043                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16044                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16045 %{
16046   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16047   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16048   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16049          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16050   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
16051 
16052   ins_encode %{
16053     int icnt2 = (int)$int_cnt2$$constant;
16054     __ string_indexof($str1$$Register, $str2$$Register,
16055                       $cnt1$$Register, zr,
16056                       $tmp1$$Register, $tmp2$$Register,
16057                       $tmp3$$Register, $tmp4$$Register, zr, zr,
16058                       icnt2, $result$$Register, StrIntrinsicNode::UL);
16059   %}
16060   ins_pipe(pipe_class_memory);
16061 %}
16062 
16063 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
16064                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16065                               iRegINoSp tmp3, rFlagsReg cr)
16066 %{
16067   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16068   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
16069          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16070 
16071   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
16072 
16073   ins_encode %{
16074     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
16075                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
16076                            $tmp3$$Register);
16077   %}
16078   ins_pipe(pipe_class_memory);
16079 %}
16080 
16081 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16082                         iRegI_R0 result, rFlagsReg cr)
16083 %{
16084   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
16085   match(Set result (StrEquals (Binary str1 str2) cnt));
16086   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16087 
16088   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16089   ins_encode %{
16090     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16091     __ string_equals($str1$$Register, $str2$$Register,
16092                      $result$$Register, $cnt$$Register, 1);
16093   %}
16094   ins_pipe(pipe_class_memory);
16095 %}
16096 
16097 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16098                         iRegI_R0 result, rFlagsReg cr)
16099 %{
16100   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16101   match(Set result (StrEquals (Binary str1 str2) cnt));
16102   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16103 
16104   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16105   ins_encode %{
16106     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16107     __ string_equals($str1$$Register, $str2$$Register,
16108                      $result$$Register, $cnt$$Register, 2);
16109   %}
16110   ins_pipe(pipe_class_memory);
16111 %}
16112 
16113 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16114                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16115                        iRegP_R10 tmp, rFlagsReg cr)
16116 %{
16117   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16118   match(Set result (AryEq ary1 ary2));
16119   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16120 
16121   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16122   ins_encode %{
16123     __ arrays_equals($ary1$$Register, $ary2$$Register,
16124                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16125                      $result$$Register, $tmp$$Register, 1);
16126     %}
16127   ins_pipe(pipe_class_memory);
16128 %}
16129 
16130 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16131                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
16132                        iRegP_R10 tmp, rFlagsReg cr)
16133 %{
16134   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16135   match(Set result (AryEq ary1 ary2));
16136   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16137 
16138   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16139   ins_encode %{
16140     __ arrays_equals($ary1$$Register, $ary2$$Register,
16141                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16142                      $result$$Register, $tmp$$Register, 2);
16143   %}
16144   ins_pipe(pipe_class_memory);
16145 %}
16146 
16147 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16148 %{
16149   match(Set result (HasNegatives ary1 len));
16150   effect(USE_KILL ary1, USE_KILL len, KILL cr);
16151   format %{ "has negatives byte[] $ary1,$len -> $result" %}
16152   ins_encode %{
16153     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16154   %}
16155   ins_pipe( pipe_slow );
16156 %}
16157 
16158 // fast char[] to byte[] compression
16159 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16160                          vRegD_V0 tmp1, vRegD_V1 tmp2,
16161                          vRegD_V2 tmp3, vRegD_V3 tmp4,
16162                          iRegI_R0 result, rFlagsReg cr)
16163 %{
16164   match(Set result (StrCompressedCopy src (Binary dst len)));
16165   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16166 
16167   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16168   ins_encode %{
16169     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16170                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16171                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16172                            $result$$Register);
16173   %}
16174   ins_pipe( pipe_slow );
16175 %}
16176 
16177 // fast byte[] to char[] inflation
16178 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16179                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16180 %{
16181   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16182   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16183 
16184   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16185   ins_encode %{
16186     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16187                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16188   %}
16189   ins_pipe(pipe_class_memory);
16190 %}
16191 
16192 // encode char[] to byte[] in ISO_8859_1
16193 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16194                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16195                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16196                           iRegI_R0 result, rFlagsReg cr)
16197 %{
16198   match(Set result (EncodeISOArray src (Binary dst len)));
16199   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16200          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16201 
16202   format %{ "Encode array $src,$dst,$len -> $result" %}
16203   ins_encode %{
16204     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16205          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16206          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16207   %}
16208   ins_pipe( pipe_class_memory );
16209 %}
16210 
16211 // ============================================================================
16212 // This name is KNOWN by the ADLC and cannot be changed.
16213 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16214 // for this guy.
16215 instruct tlsLoadP(thread_RegP dst)
16216 %{
16217   match(Set dst (ThreadLocal));
16218 
16219   ins_cost(0);
16220 
16221   format %{ " -- \t// $dst=Thread::current(), empty" %}
16222 
16223   size(0);
16224 
16225   ins_encode( /*empty*/ );
16226 
16227   ins_pipe(pipe_class_empty);
16228 %}
16229 
16230 // ====================VECTOR INSTRUCTIONS=====================================
16231 
16232 // Load vector (32 bits)
16233 instruct loadV4(vecD dst, vmem4 mem)
16234 %{
16235   predicate(n->as_LoadVector()->memory_size() == 4);
16236   match(Set dst (LoadVector mem));
16237   ins_cost(4 * INSN_COST);
16238   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16239   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16240   ins_pipe(vload_reg_mem64);
16241 %}
16242 
16243 // Load vector (64 bits)
16244 instruct loadV8(vecD dst, vmem8 mem)
16245 %{
16246   predicate(n->as_LoadVector()->memory_size() == 8);
16247   match(Set dst (LoadVector mem));
16248   ins_cost(4 * INSN_COST);
16249   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16250   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16251   ins_pipe(vload_reg_mem64);
16252 %}
16253 
16254 // Load Vector (128 bits)
16255 instruct loadV16(vecX dst, vmem16 mem)
16256 %{
16257   predicate(n->as_LoadVector()->memory_size() == 16);
16258   match(Set dst (LoadVector mem));
16259   ins_cost(4 * INSN_COST);
16260   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16261   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16262   ins_pipe(vload_reg_mem128);
16263 %}
16264 
16265 // Store Vector (32 bits)
16266 instruct storeV4(vecD src, vmem4 mem)
16267 %{
16268   predicate(n->as_StoreVector()->memory_size() == 4);
16269   match(Set mem (StoreVector mem src));
16270   ins_cost(4 * INSN_COST);
16271   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16272   ins_encode( aarch64_enc_strvS(src, mem) );
16273   ins_pipe(vstore_reg_mem64);
16274 %}
16275 
16276 // Store Vector (64 bits)
16277 instruct storeV8(vecD src, vmem8 mem)
16278 %{
16279   predicate(n->as_StoreVector()->memory_size() == 8);
16280   match(Set mem (StoreVector mem src));
16281   ins_cost(4 * INSN_COST);
16282   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16283   ins_encode( aarch64_enc_strvD(src, mem) );
16284   ins_pipe(vstore_reg_mem64);
16285 %}
16286 
16287 // Store Vector (128 bits)
16288 instruct storeV16(vecX src, vmem16 mem)
16289 %{
16290   predicate(n->as_StoreVector()->memory_size() == 16);
16291   match(Set mem (StoreVector mem src));
16292   ins_cost(4 * INSN_COST);
16293   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16294   ins_encode( aarch64_enc_strvQ(src, mem) );
16295   ins_pipe(vstore_reg_mem128);
16296 %}
16297 
16298 instruct replicate8B(vecD dst, iRegIorL2I src)
16299 %{
16300   predicate(n->as_Vector()->length() == 4 ||
16301             n->as_Vector()->length() == 8);
16302   match(Set dst (ReplicateB src));
16303   ins_cost(INSN_COST);
16304   format %{ "dup  $dst, $src\t# vector (8B)" %}
16305   ins_encode %{
16306     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16307   %}
16308   ins_pipe(vdup_reg_reg64);
16309 %}
16310 
16311 instruct replicate16B(vecX dst, iRegIorL2I src)
16312 %{
16313   predicate(n->as_Vector()->length() == 16);
16314   match(Set dst (ReplicateB src));
16315   ins_cost(INSN_COST);
16316   format %{ "dup  $dst, $src\t# vector (16B)" %}
16317   ins_encode %{
16318     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16319   %}
16320   ins_pipe(vdup_reg_reg128);
16321 %}
16322 
16323 instruct replicate8B_imm(vecD dst, immI con)
16324 %{
16325   predicate(n->as_Vector()->length() == 4 ||
16326             n->as_Vector()->length() == 8);
16327   match(Set dst (ReplicateB con));
16328   ins_cost(INSN_COST);
16329   format %{ "movi  $dst, $con\t# vector(8B)" %}
16330   ins_encode %{
16331     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16332   %}
16333   ins_pipe(vmovi_reg_imm64);
16334 %}
16335 
16336 instruct replicate16B_imm(vecX dst, immI con)
16337 %{
16338   predicate(n->as_Vector()->length() == 16);
16339   match(Set dst (ReplicateB con));
16340   ins_cost(INSN_COST);
16341   format %{ "movi  $dst, $con\t# vector(16B)" %}
16342   ins_encode %{
16343     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16344   %}
16345   ins_pipe(vmovi_reg_imm128);
16346 %}
16347 
16348 instruct replicate4S(vecD dst, iRegIorL2I src)
16349 %{
16350   predicate(n->as_Vector()->length() == 2 ||
16351             n->as_Vector()->length() == 4);
16352   match(Set dst (ReplicateS src));
16353   ins_cost(INSN_COST);
16354   format %{ "dup  $dst, $src\t# vector (4S)" %}
16355   ins_encode %{
16356     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16357   %}
16358   ins_pipe(vdup_reg_reg64);
16359 %}
16360 
16361 instruct replicate8S(vecX dst, iRegIorL2I src)
16362 %{
16363   predicate(n->as_Vector()->length() == 8);
16364   match(Set dst (ReplicateS src));
16365   ins_cost(INSN_COST);
16366   format %{ "dup  $dst, $src\t# vector (8S)" %}
16367   ins_encode %{
16368     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16369   %}
16370   ins_pipe(vdup_reg_reg128);
16371 %}
16372 
16373 instruct replicate4S_imm(vecD dst, immI con)
16374 %{
16375   predicate(n->as_Vector()->length() == 2 ||
16376             n->as_Vector()->length() == 4);
16377   match(Set dst (ReplicateS con));
16378   ins_cost(INSN_COST);
16379   format %{ "movi  $dst, $con\t# vector(4H)" %}
16380   ins_encode %{
16381     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16382   %}
16383   ins_pipe(vmovi_reg_imm64);
16384 %}
16385 
16386 instruct replicate8S_imm(vecX dst, immI con)
16387 %{
16388   predicate(n->as_Vector()->length() == 8);
16389   match(Set dst (ReplicateS con));
16390   ins_cost(INSN_COST);
16391   format %{ "movi  $dst, $con\t# vector(8H)" %}
16392   ins_encode %{
16393     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16394   %}
16395   ins_pipe(vmovi_reg_imm128);
16396 %}
16397 
16398 instruct replicate2I(vecD dst, iRegIorL2I src)
16399 %{
16400   predicate(n->as_Vector()->length() == 2);
16401   match(Set dst (ReplicateI src));
16402   ins_cost(INSN_COST);
16403   format %{ "dup  $dst, $src\t# vector (2I)" %}
16404   ins_encode %{
16405     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16406   %}
16407   ins_pipe(vdup_reg_reg64);
16408 %}
16409 
16410 instruct replicate4I(vecX dst, iRegIorL2I src)
16411 %{
16412   predicate(n->as_Vector()->length() == 4);
16413   match(Set dst (ReplicateI src));
16414   ins_cost(INSN_COST);
16415   format %{ "dup  $dst, $src\t# vector (4I)" %}
16416   ins_encode %{
16417     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16418   %}
16419   ins_pipe(vdup_reg_reg128);
16420 %}
16421 
16422 instruct replicate2I_imm(vecD dst, immI con)
16423 %{
16424   predicate(n->as_Vector()->length() == 2);
16425   match(Set dst (ReplicateI con));
16426   ins_cost(INSN_COST);
16427   format %{ "movi  $dst, $con\t# vector(2I)" %}
16428   ins_encode %{
16429     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16430   %}
16431   ins_pipe(vmovi_reg_imm64);
16432 %}
16433 
16434 instruct replicate4I_imm(vecX dst, immI con)
16435 %{
16436   predicate(n->as_Vector()->length() == 4);
16437   match(Set dst (ReplicateI con));
16438   ins_cost(INSN_COST);
16439   format %{ "movi  $dst, $con\t# vector(4I)" %}
16440   ins_encode %{
16441     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16442   %}
16443   ins_pipe(vmovi_reg_imm128);
16444 %}
16445 
16446 instruct replicate2L(vecX dst, iRegL src)
16447 %{
16448   predicate(n->as_Vector()->length() == 2);
16449   match(Set dst (ReplicateL src));
16450   ins_cost(INSN_COST);
16451   format %{ "dup  $dst, $src\t# vector (2L)" %}
16452   ins_encode %{
16453     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16454   %}
16455   ins_pipe(vdup_reg_reg128);
16456 %}
16457 
16458 instruct replicate2L_zero(vecX dst, immI0 zero)
16459 %{
16460   predicate(n->as_Vector()->length() == 2);
16461   match(Set dst (ReplicateI zero));
16462   ins_cost(INSN_COST);
16463   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16464   ins_encode %{
16465     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16466            as_FloatRegister($dst$$reg),
16467            as_FloatRegister($dst$$reg));
16468   %}
16469   ins_pipe(vmovi_reg_imm128);
16470 %}
16471 
16472 instruct replicate2F(vecD dst, vRegF src)
16473 %{
16474   predicate(n->as_Vector()->length() == 2);
16475   match(Set dst (ReplicateF src));
16476   ins_cost(INSN_COST);
16477   format %{ "dup  $dst, $src\t# vector (2F)" %}
16478   ins_encode %{
16479     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16480            as_FloatRegister($src$$reg));
16481   %}
16482   ins_pipe(vdup_reg_freg64);
16483 %}
16484 
16485 instruct replicate4F(vecX dst, vRegF src)
16486 %{
16487   predicate(n->as_Vector()->length() == 4);
16488   match(Set dst (ReplicateF src));
16489   ins_cost(INSN_COST);
16490   format %{ "dup  $dst, $src\t# vector (4F)" %}
16491   ins_encode %{
16492     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16493            as_FloatRegister($src$$reg));
16494   %}
16495   ins_pipe(vdup_reg_freg128);
16496 %}
16497 
16498 instruct replicate2D(vecX dst, vRegD src)
16499 %{
16500   predicate(n->as_Vector()->length() == 2);
16501   match(Set dst (ReplicateD src));
16502   ins_cost(INSN_COST);
16503   format %{ "dup  $dst, $src\t# vector (2D)" %}
16504   ins_encode %{
16505     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16506            as_FloatRegister($src$$reg));
16507   %}
16508   ins_pipe(vdup_reg_dreg128);
16509 %}
16510 
16511 // ====================REDUCTION ARITHMETIC====================================
16512 
16513 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16514 %{
16515   match(Set dst (AddReductionVI src1 src2));
16516   ins_cost(INSN_COST);
16517   effect(TEMP tmp, TEMP tmp2);
16518   format %{ "umov  $tmp, $src2, S, 0\n\t"
16519             "umov  $tmp2, $src2, S, 1\n\t"
16520             "addw  $dst, $src1, $tmp\n\t"
16521             "addw  $dst, $dst, $tmp2\t add reduction2i"
16522   %}
16523   ins_encode %{
16524     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16525     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16526     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16527     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16528   %}
16529   ins_pipe(pipe_class_default);
16530 %}
16531 
16532 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16533 %{
16534   match(Set dst (AddReductionVI src1 src2));
16535   ins_cost(INSN_COST);
16536   effect(TEMP tmp, TEMP tmp2);
16537   format %{ "addv  $tmp, T4S, $src2\n\t"
16538             "umov  $tmp2, $tmp, S, 0\n\t"
16539             "addw  $dst, $tmp2, $src1\t add reduction4i"
16540   %}
16541   ins_encode %{
16542     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16543             as_FloatRegister($src2$$reg));
16544     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16545     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16546   %}
16547   ins_pipe(pipe_class_default);
16548 %}
16549 
16550 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16551 %{
16552   match(Set dst (MulReductionVI src1 src2));
16553   ins_cost(INSN_COST);
16554   effect(TEMP tmp, TEMP dst);
16555   format %{ "umov  $tmp, $src2, S, 0\n\t"
16556             "mul   $dst, $tmp, $src1\n\t"
16557             "umov  $tmp, $src2, S, 1\n\t"
16558             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16559   %}
16560   ins_encode %{
16561     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16562     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16563     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16564     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16565   %}
16566   ins_pipe(pipe_class_default);
16567 %}
16568 
16569 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16570 %{
16571   match(Set dst (MulReductionVI src1 src2));
16572   ins_cost(INSN_COST);
16573   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16574   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16575             "mul   $tmp, $tmp, $src2\n\t"
16576             "umov  $tmp2, $tmp, S, 0\n\t"
16577             "mul   $dst, $tmp2, $src1\n\t"
16578             "umov  $tmp2, $tmp, S, 1\n\t"
16579             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16580   %}
16581   ins_encode %{
16582     __ ins(as_FloatRegister($tmp$$reg), __ D,
16583            as_FloatRegister($src2$$reg), 0, 1);
16584     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16585            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16586     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16587     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16588     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16589     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16590   %}
16591   ins_pipe(pipe_class_default);
16592 %}
16593 
16594 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16595 %{
16596   match(Set dst (AddReductionVF src1 src2));
16597   ins_cost(INSN_COST);
16598   effect(TEMP tmp, TEMP dst);
16599   format %{ "fadds $dst, $src1, $src2\n\t"
16600             "ins   $tmp, S, $src2, 0, 1\n\t"
16601             "fadds $dst, $dst, $tmp\t add reduction2f"
16602   %}
16603   ins_encode %{
16604     __ fadds(as_FloatRegister($dst$$reg),
16605              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16606     __ ins(as_FloatRegister($tmp$$reg), __ S,
16607            as_FloatRegister($src2$$reg), 0, 1);
16608     __ fadds(as_FloatRegister($dst$$reg),
16609              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16610   %}
16611   ins_pipe(pipe_class_default);
16612 %}
16613 
16614 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16615 %{
16616   match(Set dst (AddReductionVF src1 src2));
16617   ins_cost(INSN_COST);
16618   effect(TEMP tmp, TEMP dst);
16619   format %{ "fadds $dst, $src1, $src2\n\t"
16620             "ins   $tmp, S, $src2, 0, 1\n\t"
16621             "fadds $dst, $dst, $tmp\n\t"
16622             "ins   $tmp, S, $src2, 0, 2\n\t"
16623             "fadds $dst, $dst, $tmp\n\t"
16624             "ins   $tmp, S, $src2, 0, 3\n\t"
16625             "fadds $dst, $dst, $tmp\t add reduction4f"
16626   %}
16627   ins_encode %{
16628     __ fadds(as_FloatRegister($dst$$reg),
16629              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16630     __ ins(as_FloatRegister($tmp$$reg), __ S,
16631            as_FloatRegister($src2$$reg), 0, 1);
16632     __ fadds(as_FloatRegister($dst$$reg),
16633              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16634     __ ins(as_FloatRegister($tmp$$reg), __ S,
16635            as_FloatRegister($src2$$reg), 0, 2);
16636     __ fadds(as_FloatRegister($dst$$reg),
16637              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16638     __ ins(as_FloatRegister($tmp$$reg), __ S,
16639            as_FloatRegister($src2$$reg), 0, 3);
16640     __ fadds(as_FloatRegister($dst$$reg),
16641              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16642   %}
16643   ins_pipe(pipe_class_default);
16644 %}
16645 
16646 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16647 %{
16648   match(Set dst (MulReductionVF src1 src2));
16649   ins_cost(INSN_COST);
16650   effect(TEMP tmp, TEMP dst);
16651   format %{ "fmuls $dst, $src1, $src2\n\t"
16652             "ins   $tmp, S, $src2, 0, 1\n\t"
16653             "fmuls $dst, $dst, $tmp\t add reduction4f"
16654   %}
16655   ins_encode %{
16656     __ fmuls(as_FloatRegister($dst$$reg),
16657              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16658     __ ins(as_FloatRegister($tmp$$reg), __ S,
16659            as_FloatRegister($src2$$reg), 0, 1);
16660     __ fmuls(as_FloatRegister($dst$$reg),
16661              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16662   %}
16663   ins_pipe(pipe_class_default);
16664 %}
16665 
16666 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16667 %{
16668   match(Set dst (MulReductionVF src1 src2));
16669   ins_cost(INSN_COST);
16670   effect(TEMP tmp, TEMP dst);
16671   format %{ "fmuls $dst, $src1, $src2\n\t"
16672             "ins   $tmp, S, $src2, 0, 1\n\t"
16673             "fmuls $dst, $dst, $tmp\n\t"
16674             "ins   $tmp, S, $src2, 0, 2\n\t"
16675             "fmuls $dst, $dst, $tmp\n\t"
16676             "ins   $tmp, S, $src2, 0, 3\n\t"
16677             "fmuls $dst, $dst, $tmp\t add reduction4f"
16678   %}
16679   ins_encode %{
16680     __ fmuls(as_FloatRegister($dst$$reg),
16681              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16682     __ ins(as_FloatRegister($tmp$$reg), __ S,
16683            as_FloatRegister($src2$$reg), 0, 1);
16684     __ fmuls(as_FloatRegister($dst$$reg),
16685              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16686     __ ins(as_FloatRegister($tmp$$reg), __ S,
16687            as_FloatRegister($src2$$reg), 0, 2);
16688     __ fmuls(as_FloatRegister($dst$$reg),
16689              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16690     __ ins(as_FloatRegister($tmp$$reg), __ S,
16691            as_FloatRegister($src2$$reg), 0, 3);
16692     __ fmuls(as_FloatRegister($dst$$reg),
16693              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16694   %}
16695   ins_pipe(pipe_class_default);
16696 %}
16697 
16698 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16699 %{
16700   match(Set dst (AddReductionVD src1 src2));
16701   ins_cost(INSN_COST);
16702   effect(TEMP tmp, TEMP dst);
16703   format %{ "faddd $dst, $src1, $src2\n\t"
16704             "ins   $tmp, D, $src2, 0, 1\n\t"
16705             "faddd $dst, $dst, $tmp\t add reduction2d"
16706   %}
16707   ins_encode %{
16708     __ faddd(as_FloatRegister($dst$$reg),
16709              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16710     __ ins(as_FloatRegister($tmp$$reg), __ D,
16711            as_FloatRegister($src2$$reg), 0, 1);
16712     __ faddd(as_FloatRegister($dst$$reg),
16713              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16714   %}
16715   ins_pipe(pipe_class_default);
16716 %}
16717 
16718 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16719 %{
16720   match(Set dst (MulReductionVD src1 src2));
16721   ins_cost(INSN_COST);
16722   effect(TEMP tmp, TEMP dst);
16723   format %{ "fmuld $dst, $src1, $src2\n\t"
16724             "ins   $tmp, D, $src2, 0, 1\n\t"
16725             "fmuld $dst, $dst, $tmp\t add reduction2d"
16726   %}
16727   ins_encode %{
16728     __ fmuld(as_FloatRegister($dst$$reg),
16729              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16730     __ ins(as_FloatRegister($tmp$$reg), __ D,
16731            as_FloatRegister($src2$$reg), 0, 1);
16732     __ fmuld(as_FloatRegister($dst$$reg),
16733              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16734   %}
16735   ins_pipe(pipe_class_default);
16736 %}
16737 
16738 // ====================VECTOR ARITHMETIC=======================================
16739 
16740 // --------------------------------- ADD --------------------------------------
16741 
16742 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16743 %{
16744   predicate(n->as_Vector()->length() == 4 ||
16745             n->as_Vector()->length() == 8);
16746   match(Set dst (AddVB src1 src2));
16747   ins_cost(INSN_COST);
16748   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
16749   ins_encode %{
16750     __ addv(as_FloatRegister($dst$$reg), __ T8B,
16751             as_FloatRegister($src1$$reg),
16752             as_FloatRegister($src2$$reg));
16753   %}
16754   ins_pipe(vdop64);
16755 %}
16756 
16757 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16758 %{
16759   predicate(n->as_Vector()->length() == 16);
16760   match(Set dst (AddVB src1 src2));
16761   ins_cost(INSN_COST);
16762   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
16763   ins_encode %{
16764     __ addv(as_FloatRegister($dst$$reg), __ T16B,
16765             as_FloatRegister($src1$$reg),
16766             as_FloatRegister($src2$$reg));
16767   %}
16768   ins_pipe(vdop128);
16769 %}
16770 
16771 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16772 %{
16773   predicate(n->as_Vector()->length() == 2 ||
16774             n->as_Vector()->length() == 4);
16775   match(Set dst (AddVS src1 src2));
16776   ins_cost(INSN_COST);
16777   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
16778   ins_encode %{
16779     __ addv(as_FloatRegister($dst$$reg), __ T4H,
16780             as_FloatRegister($src1$$reg),
16781             as_FloatRegister($src2$$reg));
16782   %}
16783   ins_pipe(vdop64);
16784 %}
16785 
16786 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16787 %{
16788   predicate(n->as_Vector()->length() == 8);
16789   match(Set dst (AddVS src1 src2));
16790   ins_cost(INSN_COST);
16791   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
16792   ins_encode %{
16793     __ addv(as_FloatRegister($dst$$reg), __ T8H,
16794             as_FloatRegister($src1$$reg),
16795             as_FloatRegister($src2$$reg));
16796   %}
16797   ins_pipe(vdop128);
16798 %}
16799 
16800 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16801 %{
16802   predicate(n->as_Vector()->length() == 2);
16803   match(Set dst (AddVI src1 src2));
16804   ins_cost(INSN_COST);
16805   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
16806   ins_encode %{
16807     __ addv(as_FloatRegister($dst$$reg), __ T2S,
16808             as_FloatRegister($src1$$reg),
16809             as_FloatRegister($src2$$reg));
16810   %}
16811   ins_pipe(vdop64);
16812 %}
16813 
16814 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16815 %{
16816   predicate(n->as_Vector()->length() == 4);
16817   match(Set dst (AddVI src1 src2));
16818   ins_cost(INSN_COST);
16819   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
16820   ins_encode %{
16821     __ addv(as_FloatRegister($dst$$reg), __ T4S,
16822             as_FloatRegister($src1$$reg),
16823             as_FloatRegister($src2$$reg));
16824   %}
16825   ins_pipe(vdop128);
16826 %}
16827 
16828 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16829 %{
16830   predicate(n->as_Vector()->length() == 2);
16831   match(Set dst (AddVL src1 src2));
16832   ins_cost(INSN_COST);
16833   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
16834   ins_encode %{
16835     __ addv(as_FloatRegister($dst$$reg), __ T2D,
16836             as_FloatRegister($src1$$reg),
16837             as_FloatRegister($src2$$reg));
16838   %}
16839   ins_pipe(vdop128);
16840 %}
16841 
16842 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16843 %{
16844   predicate(n->as_Vector()->length() == 2);
16845   match(Set dst (AddVF src1 src2));
16846   ins_cost(INSN_COST);
16847   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
16848   ins_encode %{
16849     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16850             as_FloatRegister($src1$$reg),
16851             as_FloatRegister($src2$$reg));
16852   %}
16853   ins_pipe(vdop_fp64);
16854 %}
16855 
16856 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16857 %{
16858   predicate(n->as_Vector()->length() == 4);
16859   match(Set dst (AddVF src1 src2));
16860   ins_cost(INSN_COST);
16861   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
16862   ins_encode %{
16863     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16864             as_FloatRegister($src1$$reg),
16865             as_FloatRegister($src2$$reg));
16866   %}
16867   ins_pipe(vdop_fp128);
16868 %}
16869 
16870 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16871 %{
16872   match(Set dst (AddVD src1 src2));
16873   ins_cost(INSN_COST);
16874   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
16875   ins_encode %{
16876     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16877             as_FloatRegister($src1$$reg),
16878             as_FloatRegister($src2$$reg));
16879   %}
16880   ins_pipe(vdop_fp128);
16881 %}
16882 
16883 // --------------------------------- SUB --------------------------------------
16884 
16885 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16886 %{
16887   predicate(n->as_Vector()->length() == 4 ||
16888             n->as_Vector()->length() == 8);
16889   match(Set dst (SubVB src1 src2));
16890   ins_cost(INSN_COST);
16891   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
16892   ins_encode %{
16893     __ subv(as_FloatRegister($dst$$reg), __ T8B,
16894             as_FloatRegister($src1$$reg),
16895             as_FloatRegister($src2$$reg));
16896   %}
16897   ins_pipe(vdop64);
16898 %}
16899 
16900 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16901 %{
16902   predicate(n->as_Vector()->length() == 16);
16903   match(Set dst (SubVB src1 src2));
16904   ins_cost(INSN_COST);
16905   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
16906   ins_encode %{
16907     __ subv(as_FloatRegister($dst$$reg), __ T16B,
16908             as_FloatRegister($src1$$reg),
16909             as_FloatRegister($src2$$reg));
16910   %}
16911   ins_pipe(vdop128);
16912 %}
16913 
16914 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16915 %{
16916   predicate(n->as_Vector()->length() == 2 ||
16917             n->as_Vector()->length() == 4);
16918   match(Set dst (SubVS src1 src2));
16919   ins_cost(INSN_COST);
16920   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
16921   ins_encode %{
16922     __ subv(as_FloatRegister($dst$$reg), __ T4H,
16923             as_FloatRegister($src1$$reg),
16924             as_FloatRegister($src2$$reg));
16925   %}
16926   ins_pipe(vdop64);
16927 %}
16928 
16929 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16930 %{
16931   predicate(n->as_Vector()->length() == 8);
16932   match(Set dst (SubVS src1 src2));
16933   ins_cost(INSN_COST);
16934   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16935   ins_encode %{
16936     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16937             as_FloatRegister($src1$$reg),
16938             as_FloatRegister($src2$$reg));
16939   %}
16940   ins_pipe(vdop128);
16941 %}
16942 
16943 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16944 %{
16945   predicate(n->as_Vector()->length() == 2);
16946   match(Set dst (SubVI src1 src2));
16947   ins_cost(INSN_COST);
16948   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16949   ins_encode %{
16950     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16951             as_FloatRegister($src1$$reg),
16952             as_FloatRegister($src2$$reg));
16953   %}
16954   ins_pipe(vdop64);
16955 %}
16956 
16957 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16958 %{
16959   predicate(n->as_Vector()->length() == 4);
16960   match(Set dst (SubVI src1 src2));
16961   ins_cost(INSN_COST);
16962   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16963   ins_encode %{
16964     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16965             as_FloatRegister($src1$$reg),
16966             as_FloatRegister($src2$$reg));
16967   %}
16968   ins_pipe(vdop128);
16969 %}
16970 
16971 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16972 %{
16973   predicate(n->as_Vector()->length() == 2);
16974   match(Set dst (SubVL src1 src2));
16975   ins_cost(INSN_COST);
16976   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16977   ins_encode %{
16978     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16979             as_FloatRegister($src1$$reg),
16980             as_FloatRegister($src2$$reg));
16981   %}
16982   ins_pipe(vdop128);
16983 %}
16984 
16985 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16986 %{
16987   predicate(n->as_Vector()->length() == 2);
16988   match(Set dst (SubVF src1 src2));
16989   ins_cost(INSN_COST);
16990   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16991   ins_encode %{
16992     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16993             as_FloatRegister($src1$$reg),
16994             as_FloatRegister($src2$$reg));
16995   %}
16996   ins_pipe(vdop_fp64);
16997 %}
16998 
16999 instruct vsub4F(vecX dst, vecX src1, vecX src2)
17000 %{
17001   predicate(n->as_Vector()->length() == 4);
17002   match(Set dst (SubVF src1 src2));
17003   ins_cost(INSN_COST);
17004   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
17005   ins_encode %{
17006     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
17007             as_FloatRegister($src1$$reg),
17008             as_FloatRegister($src2$$reg));
17009   %}
17010   ins_pipe(vdop_fp128);
17011 %}
17012 
17013 instruct vsub2D(vecX dst, vecX src1, vecX src2)
17014 %{
17015   predicate(n->as_Vector()->length() == 2);
17016   match(Set dst (SubVD src1 src2));
17017   ins_cost(INSN_COST);
17018   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
17019   ins_encode %{
17020     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
17021             as_FloatRegister($src1$$reg),
17022             as_FloatRegister($src2$$reg));
17023   %}
17024   ins_pipe(vdop_fp128);
17025 %}
17026 
17027 // --------------------------------- MUL --------------------------------------
17028 
17029 instruct vmul4S(vecD dst, vecD src1, vecD src2)
17030 %{
17031   predicate(n->as_Vector()->length() == 2 ||
17032             n->as_Vector()->length() == 4);
17033   match(Set dst (MulVS src1 src2));
17034   ins_cost(INSN_COST);
17035   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
17036   ins_encode %{
17037     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
17038             as_FloatRegister($src1$$reg),
17039             as_FloatRegister($src2$$reg));
17040   %}
17041   ins_pipe(vmul64);
17042 %}
17043 
17044 instruct vmul8S(vecX dst, vecX src1, vecX src2)
17045 %{
17046   predicate(n->as_Vector()->length() == 8);
17047   match(Set dst (MulVS src1 src2));
17048   ins_cost(INSN_COST);
17049   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
17050   ins_encode %{
17051     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
17052             as_FloatRegister($src1$$reg),
17053             as_FloatRegister($src2$$reg));
17054   %}
17055   ins_pipe(vmul128);
17056 %}
17057 
17058 instruct vmul2I(vecD dst, vecD src1, vecD src2)
17059 %{
17060   predicate(n->as_Vector()->length() == 2);
17061   match(Set dst (MulVI src1 src2));
17062   ins_cost(INSN_COST);
17063   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
17064   ins_encode %{
17065     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
17066             as_FloatRegister($src1$$reg),
17067             as_FloatRegister($src2$$reg));
17068   %}
17069   ins_pipe(vmul64);
17070 %}
17071 
17072 instruct vmul4I(vecX dst, vecX src1, vecX src2)
17073 %{
17074   predicate(n->as_Vector()->length() == 4);
17075   match(Set dst (MulVI src1 src2));
17076   ins_cost(INSN_COST);
17077   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
17078   ins_encode %{
17079     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
17080             as_FloatRegister($src1$$reg),
17081             as_FloatRegister($src2$$reg));
17082   %}
17083   ins_pipe(vmul128);
17084 %}
17085 
17086 instruct vmul2F(vecD dst, vecD src1, vecD src2)
17087 %{
17088   predicate(n->as_Vector()->length() == 2);
17089   match(Set dst (MulVF src1 src2));
17090   ins_cost(INSN_COST);
17091   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17092   ins_encode %{
17093     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17094             as_FloatRegister($src1$$reg),
17095             as_FloatRegister($src2$$reg));
17096   %}
17097   ins_pipe(vmuldiv_fp64);
17098 %}
17099 
17100 instruct vmul4F(vecX dst, vecX src1, vecX src2)
17101 %{
17102   predicate(n->as_Vector()->length() == 4);
17103   match(Set dst (MulVF src1 src2));
17104   ins_cost(INSN_COST);
17105   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17106   ins_encode %{
17107     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17108             as_FloatRegister($src1$$reg),
17109             as_FloatRegister($src2$$reg));
17110   %}
17111   ins_pipe(vmuldiv_fp128);
17112 %}
17113 
17114 instruct vmul2D(vecX dst, vecX src1, vecX src2)
17115 %{
17116   predicate(n->as_Vector()->length() == 2);
17117   match(Set dst (MulVD src1 src2));
17118   ins_cost(INSN_COST);
17119   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17120   ins_encode %{
17121     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17122             as_FloatRegister($src1$$reg),
17123             as_FloatRegister($src2$$reg));
17124   %}
17125   ins_pipe(vmuldiv_fp128);
17126 %}
17127 
17128 // --------------------------------- MLA --------------------------------------
17129 
17130 instruct vmla4S(vecD dst, vecD src1, vecD src2)
17131 %{
17132   predicate(n->as_Vector()->length() == 2 ||
17133             n->as_Vector()->length() == 4);
17134   match(Set dst (AddVS dst (MulVS src1 src2)));
17135   ins_cost(INSN_COST);
17136   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17137   ins_encode %{
17138     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17139             as_FloatRegister($src1$$reg),
17140             as_FloatRegister($src2$$reg));
17141   %}
17142   ins_pipe(vmla64);
17143 %}
17144 
17145 instruct vmla8S(vecX dst, vecX src1, vecX src2)
17146 %{
17147   predicate(n->as_Vector()->length() == 8);
17148   match(Set dst (AddVS dst (MulVS src1 src2)));
17149   ins_cost(INSN_COST);
17150   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17151   ins_encode %{
17152     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17153             as_FloatRegister($src1$$reg),
17154             as_FloatRegister($src2$$reg));
17155   %}
17156   ins_pipe(vmla128);
17157 %}
17158 
17159 instruct vmla2I(vecD dst, vecD src1, vecD src2)
17160 %{
17161   predicate(n->as_Vector()->length() == 2);
17162   match(Set dst (AddVI dst (MulVI src1 src2)));
17163   ins_cost(INSN_COST);
17164   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17165   ins_encode %{
17166     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17167             as_FloatRegister($src1$$reg),
17168             as_FloatRegister($src2$$reg));
17169   %}
17170   ins_pipe(vmla64);
17171 %}
17172 
17173 instruct vmla4I(vecX dst, vecX src1, vecX src2)
17174 %{
17175   predicate(n->as_Vector()->length() == 4);
17176   match(Set dst (AddVI dst (MulVI src1 src2)));
17177   ins_cost(INSN_COST);
17178   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17179   ins_encode %{
17180     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17181             as_FloatRegister($src1$$reg),
17182             as_FloatRegister($src2$$reg));
17183   %}
17184   ins_pipe(vmla128);
17185 %}
17186 
17187 // dst + src1 * src2
17188 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17189   predicate(UseFMA && n->as_Vector()->length() == 2);
17190   match(Set dst (FmaVF  dst (Binary src1 src2)));
17191   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17192   ins_cost(INSN_COST);
17193   ins_encode %{
17194     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17195             as_FloatRegister($src1$$reg),
17196             as_FloatRegister($src2$$reg));
17197   %}
17198   ins_pipe(vmuldiv_fp64);
17199 %}
17200 
17201 // dst + src1 * src2
17202 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17203   predicate(UseFMA && n->as_Vector()->length() == 4);
17204   match(Set dst (FmaVF  dst (Binary src1 src2)));
17205   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17206   ins_cost(INSN_COST);
17207   ins_encode %{
17208     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17209             as_FloatRegister($src1$$reg),
17210             as_FloatRegister($src2$$reg));
17211   %}
17212   ins_pipe(vmuldiv_fp128);
17213 %}
17214 
17215 // dst + src1 * src2
17216 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17217   predicate(UseFMA && n->as_Vector()->length() == 2);
17218   match(Set dst (FmaVD  dst (Binary src1 src2)));
17219   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17220   ins_cost(INSN_COST);
17221   ins_encode %{
17222     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17223             as_FloatRegister($src1$$reg),
17224             as_FloatRegister($src2$$reg));
17225   %}
17226   ins_pipe(vmuldiv_fp128);
17227 %}
17228 
17229 // --------------------------------- MLS --------------------------------------
17230 
17231 instruct vmls4S(vecD dst, vecD src1, vecD src2)
17232 %{
17233   predicate(n->as_Vector()->length() == 2 ||
17234             n->as_Vector()->length() == 4);
17235   match(Set dst (SubVS dst (MulVS src1 src2)));
17236   ins_cost(INSN_COST);
17237   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17238   ins_encode %{
17239     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17240             as_FloatRegister($src1$$reg),
17241             as_FloatRegister($src2$$reg));
17242   %}
17243   ins_pipe(vmla64);
17244 %}
17245 
17246 instruct vmls8S(vecX dst, vecX src1, vecX src2)
17247 %{
17248   predicate(n->as_Vector()->length() == 8);
17249   match(Set dst (SubVS dst (MulVS src1 src2)));
17250   ins_cost(INSN_COST);
17251   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17252   ins_encode %{
17253     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17254             as_FloatRegister($src1$$reg),
17255             as_FloatRegister($src2$$reg));
17256   %}
17257   ins_pipe(vmla128);
17258 %}
17259 
17260 instruct vmls2I(vecD dst, vecD src1, vecD src2)
17261 %{
17262   predicate(n->as_Vector()->length() == 2);
17263   match(Set dst (SubVI dst (MulVI src1 src2)));
17264   ins_cost(INSN_COST);
17265   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17266   ins_encode %{
17267     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17268             as_FloatRegister($src1$$reg),
17269             as_FloatRegister($src2$$reg));
17270   %}
17271   ins_pipe(vmla64);
17272 %}
17273 
17274 instruct vmls4I(vecX dst, vecX src1, vecX src2)
17275 %{
17276   predicate(n->as_Vector()->length() == 4);
17277   match(Set dst (SubVI dst (MulVI src1 src2)));
17278   ins_cost(INSN_COST);
17279   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17280   ins_encode %{
17281     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17282             as_FloatRegister($src1$$reg),
17283             as_FloatRegister($src2$$reg));
17284   %}
17285   ins_pipe(vmla128);
17286 %}
17287 
17288 // dst - src1 * src2
17289 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17290   predicate(UseFMA && n->as_Vector()->length() == 2);
17291   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17292   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17293   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17294   ins_cost(INSN_COST);
17295   ins_encode %{
17296     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17297             as_FloatRegister($src1$$reg),
17298             as_FloatRegister($src2$$reg));
17299   %}
17300   ins_pipe(vmuldiv_fp64);
17301 %}
17302 
17303 // dst - src1 * src2
17304 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17305   predicate(UseFMA && n->as_Vector()->length() == 4);
17306   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17307   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17308   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17309   ins_cost(INSN_COST);
17310   ins_encode %{
17311     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17312             as_FloatRegister($src1$$reg),
17313             as_FloatRegister($src2$$reg));
17314   %}
17315   ins_pipe(vmuldiv_fp128);
17316 %}
17317 
17318 // dst - src1 * src2
17319 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17320   predicate(UseFMA && n->as_Vector()->length() == 2);
17321   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17322   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17323   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17324   ins_cost(INSN_COST);
17325   ins_encode %{
17326     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17327             as_FloatRegister($src1$$reg),
17328             as_FloatRegister($src2$$reg));
17329   %}
17330   ins_pipe(vmuldiv_fp128);
17331 %}
17332 
17333 // --------------------------------- DIV --------------------------------------
17334 
17335 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17336 %{
17337   predicate(n->as_Vector()->length() == 2);
17338   match(Set dst (DivVF src1 src2));
17339   ins_cost(INSN_COST);
17340   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17341   ins_encode %{
17342     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17343             as_FloatRegister($src1$$reg),
17344             as_FloatRegister($src2$$reg));
17345   %}
17346   ins_pipe(vmuldiv_fp64);
17347 %}
17348 
17349 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17350 %{
17351   predicate(n->as_Vector()->length() == 4);
17352   match(Set dst (DivVF src1 src2));
17353   ins_cost(INSN_COST);
17354   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17355   ins_encode %{
17356     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17357             as_FloatRegister($src1$$reg),
17358             as_FloatRegister($src2$$reg));
17359   %}
17360   ins_pipe(vmuldiv_fp128);
17361 %}
17362 
17363 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17364 %{
17365   predicate(n->as_Vector()->length() == 2);
17366   match(Set dst (DivVD src1 src2));
17367   ins_cost(INSN_COST);
17368   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17369   ins_encode %{
17370     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17371             as_FloatRegister($src1$$reg),
17372             as_FloatRegister($src2$$reg));
17373   %}
17374   ins_pipe(vmuldiv_fp128);
17375 %}
17376 
17377 // --------------------------------- SQRT -------------------------------------
17378 
17379 instruct vsqrt2D(vecX dst, vecX src)
17380 %{
17381   predicate(n->as_Vector()->length() == 2);
17382   match(Set dst (SqrtVD src));
17383   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17384   ins_encode %{
17385     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17386              as_FloatRegister($src$$reg));
17387   %}
17388   ins_pipe(vsqrt_fp128);
17389 %}
17390 
17391 // --------------------------------- ABS --------------------------------------
17392 
17393 instruct vabs2F(vecD dst, vecD src)
17394 %{
17395   predicate(n->as_Vector()->length() == 2);
17396   match(Set dst (AbsVF src));
17397   ins_cost(INSN_COST * 3);
17398   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17399   ins_encode %{
17400     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17401             as_FloatRegister($src$$reg));
17402   %}
17403   ins_pipe(vunop_fp64);
17404 %}
17405 
17406 instruct vabs4F(vecX dst, vecX src)
17407 %{
17408   predicate(n->as_Vector()->length() == 4);
17409   match(Set dst (AbsVF src));
17410   ins_cost(INSN_COST * 3);
17411   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17412   ins_encode %{
17413     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17414             as_FloatRegister($src$$reg));
17415   %}
17416   ins_pipe(vunop_fp128);
17417 %}
17418 
17419 instruct vabs2D(vecX dst, vecX src)
17420 %{
17421   predicate(n->as_Vector()->length() == 2);
17422   match(Set dst (AbsVD src));
17423   ins_cost(INSN_COST * 3);
17424   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17425   ins_encode %{
17426     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17427             as_FloatRegister($src$$reg));
17428   %}
17429   ins_pipe(vunop_fp128);
17430 %}
17431 
17432 // --------------------------------- NEG --------------------------------------
17433 
17434 instruct vneg2F(vecD dst, vecD src)
17435 %{
17436   predicate(n->as_Vector()->length() == 2);
17437   match(Set dst (NegVF src));
17438   ins_cost(INSN_COST * 3);
17439   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17440   ins_encode %{
17441     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17442             as_FloatRegister($src$$reg));
17443   %}
17444   ins_pipe(vunop_fp64);
17445 %}
17446 
17447 instruct vneg4F(vecX dst, vecX src)
17448 %{
17449   predicate(n->as_Vector()->length() == 4);
17450   match(Set dst (NegVF src));
17451   ins_cost(INSN_COST * 3);
17452   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17453   ins_encode %{
17454     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17455             as_FloatRegister($src$$reg));
17456   %}
17457   ins_pipe(vunop_fp128);
17458 %}
17459 
17460 instruct vneg2D(vecX dst, vecX src)
17461 %{
17462   predicate(n->as_Vector()->length() == 2);
17463   match(Set dst (NegVD src));
17464   ins_cost(INSN_COST * 3);
17465   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17466   ins_encode %{
17467     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17468             as_FloatRegister($src$$reg));
17469   %}
17470   ins_pipe(vunop_fp128);
17471 %}
17472 
17473 // --------------------------------- AND --------------------------------------
17474 
17475 instruct vand8B(vecD dst, vecD src1, vecD src2)
17476 %{
17477   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17478             n->as_Vector()->length_in_bytes() == 8);
17479   match(Set dst (AndV src1 src2));
17480   ins_cost(INSN_COST);
17481   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17482   ins_encode %{
17483     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17484             as_FloatRegister($src1$$reg),
17485             as_FloatRegister($src2$$reg));
17486   %}
17487   ins_pipe(vlogical64);
17488 %}
17489 
17490 instruct vand16B(vecX dst, vecX src1, vecX src2)
17491 %{
17492   predicate(n->as_Vector()->length_in_bytes() == 16);
17493   match(Set dst (AndV src1 src2));
17494   ins_cost(INSN_COST);
17495   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17496   ins_encode %{
17497     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17498             as_FloatRegister($src1$$reg),
17499             as_FloatRegister($src2$$reg));
17500   %}
17501   ins_pipe(vlogical128);
17502 %}
17503 
17504 // --------------------------------- OR ---------------------------------------
17505 
17506 instruct vor8B(vecD dst, vecD src1, vecD src2)
17507 %{
17508   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17509             n->as_Vector()->length_in_bytes() == 8);
17510   match(Set dst (OrV src1 src2));
17511   ins_cost(INSN_COST);
17512   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17513   ins_encode %{
17514     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17515             as_FloatRegister($src1$$reg),
17516             as_FloatRegister($src2$$reg));
17517   %}
17518   ins_pipe(vlogical64);
17519 %}
17520 
17521 instruct vor16B(vecX dst, vecX src1, vecX src2)
17522 %{
17523   predicate(n->as_Vector()->length_in_bytes() == 16);
17524   match(Set dst (OrV src1 src2));
17525   ins_cost(INSN_COST);
17526   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17527   ins_encode %{
17528     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17529             as_FloatRegister($src1$$reg),
17530             as_FloatRegister($src2$$reg));
17531   %}
17532   ins_pipe(vlogical128);
17533 %}
17534 
17535 // --------------------------------- XOR --------------------------------------
17536 
17537 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17538 %{
17539   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17540             n->as_Vector()->length_in_bytes() == 8);
17541   match(Set dst (XorV src1 src2));
17542   ins_cost(INSN_COST);
17543   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17544   ins_encode %{
17545     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17546             as_FloatRegister($src1$$reg),
17547             as_FloatRegister($src2$$reg));
17548   %}
17549   ins_pipe(vlogical64);
17550 %}
17551 
17552 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17553 %{
17554   predicate(n->as_Vector()->length_in_bytes() == 16);
17555   match(Set dst (XorV src1 src2));
17556   ins_cost(INSN_COST);
17557   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17558   ins_encode %{
17559     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17560             as_FloatRegister($src1$$reg),
17561             as_FloatRegister($src2$$reg));
17562   %}
17563   ins_pipe(vlogical128);
17564 %}
17565 
17566 // ------------------------------ Shift ---------------------------------------
17567 
17568 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17569   match(Set dst (LShiftCntV cnt));
17570   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17571   ins_encode %{
17572     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17573   %}
17574   ins_pipe(vdup_reg_reg128);
17575 %}
17576 
17577 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17578 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17579   match(Set dst (RShiftCntV cnt));
17580   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17581   ins_encode %{
17582     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17583     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17584   %}
17585   ins_pipe(vdup_reg_reg128);
17586 %}
17587 
17588 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17589   predicate(n->as_Vector()->length() == 4 ||
17590             n->as_Vector()->length() == 8);
17591   match(Set dst (LShiftVB src shift));
17592   match(Set dst (RShiftVB src shift));
17593   ins_cost(INSN_COST);
17594   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17595   ins_encode %{
17596     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17597             as_FloatRegister($src$$reg),
17598             as_FloatRegister($shift$$reg));
17599   %}
17600   ins_pipe(vshift64);
17601 %}
17602 
17603 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17604   predicate(n->as_Vector()->length() == 16);
17605   match(Set dst (LShiftVB src shift));
17606   match(Set dst (RShiftVB src shift));
17607   ins_cost(INSN_COST);
17608   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17609   ins_encode %{
17610     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17611             as_FloatRegister($src$$reg),
17612             as_FloatRegister($shift$$reg));
17613   %}
17614   ins_pipe(vshift128);
17615 %}
17616 
17617 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17618   predicate(n->as_Vector()->length() == 4 ||
17619             n->as_Vector()->length() == 8);
17620   match(Set dst (URShiftVB src shift));
17621   ins_cost(INSN_COST);
17622   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17623   ins_encode %{
17624     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17625             as_FloatRegister($src$$reg),
17626             as_FloatRegister($shift$$reg));
17627   %}
17628   ins_pipe(vshift64);
17629 %}
17630 
17631 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17632   predicate(n->as_Vector()->length() == 16);
17633   match(Set dst (URShiftVB src shift));
17634   ins_cost(INSN_COST);
17635   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17636   ins_encode %{
17637     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17638             as_FloatRegister($src$$reg),
17639             as_FloatRegister($shift$$reg));
17640   %}
17641   ins_pipe(vshift128);
17642 %}
17643 
17644 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17645   predicate(n->as_Vector()->length() == 4 ||
17646             n->as_Vector()->length() == 8);
17647   match(Set dst (LShiftVB src shift));
17648   ins_cost(INSN_COST);
17649   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17650   ins_encode %{
17651     int sh = (int)$shift$$constant;
17652     if (sh >= 8) {
17653       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17654              as_FloatRegister($src$$reg),
17655              as_FloatRegister($src$$reg));
17656     } else {
17657       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17658              as_FloatRegister($src$$reg), sh);
17659     }
17660   %}
17661   ins_pipe(vshift64_imm);
17662 %}
17663 
17664 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17665   predicate(n->as_Vector()->length() == 16);
17666   match(Set dst (LShiftVB src shift));
17667   ins_cost(INSN_COST);
17668   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17669   ins_encode %{
17670     int sh = (int)$shift$$constant;
17671     if (sh >= 8) {
17672       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17673              as_FloatRegister($src$$reg),
17674              as_FloatRegister($src$$reg));
17675     } else {
17676       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17677              as_FloatRegister($src$$reg), sh);
17678     }
17679   %}
17680   ins_pipe(vshift128_imm);
17681 %}
17682 
17683 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17684   predicate(n->as_Vector()->length() == 4 ||
17685             n->as_Vector()->length() == 8);
17686   match(Set dst (RShiftVB src shift));
17687   ins_cost(INSN_COST);
17688   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17689   ins_encode %{
17690     int sh = (int)$shift$$constant;
17691     if (sh >= 8) sh = 7;
17692     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17693            as_FloatRegister($src$$reg), sh);
17694   %}
17695   ins_pipe(vshift64_imm);
17696 %}
17697 
17698 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17699   predicate(n->as_Vector()->length() == 16);
17700   match(Set dst (RShiftVB src shift));
17701   ins_cost(INSN_COST);
17702   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
17703   ins_encode %{
17704     int sh = (int)$shift$$constant;
17705     if (sh >= 8) sh = 7;
17706     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17707            as_FloatRegister($src$$reg), sh);
17708   %}
17709   ins_pipe(vshift128_imm);
17710 %}
17711 
17712 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17713   predicate(n->as_Vector()->length() == 4 ||
17714             n->as_Vector()->length() == 8);
17715   match(Set dst (URShiftVB src shift));
17716   ins_cost(INSN_COST);
17717   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
17718   ins_encode %{
17719     int sh = (int)$shift$$constant;
17720     if (sh >= 8) {
17721       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17722              as_FloatRegister($src$$reg),
17723              as_FloatRegister($src$$reg));
17724     } else {
17725       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17726              as_FloatRegister($src$$reg), sh);
17727     }
17728   %}
17729   ins_pipe(vshift64_imm);
17730 %}
17731 
17732 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17733   predicate(n->as_Vector()->length() == 16);
17734   match(Set dst (URShiftVB src shift));
17735   ins_cost(INSN_COST);
17736   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
17737   ins_encode %{
17738     int sh = (int)$shift$$constant;
17739     if (sh >= 8) {
17740       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17741              as_FloatRegister($src$$reg),
17742              as_FloatRegister($src$$reg));
17743     } else {
17744       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17745              as_FloatRegister($src$$reg), sh);
17746     }
17747   %}
17748   ins_pipe(vshift128_imm);
17749 %}
17750 
17751 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17752   predicate(n->as_Vector()->length() == 2 ||
17753             n->as_Vector()->length() == 4);
17754   match(Set dst (LShiftVS src shift));
17755   match(Set dst (RShiftVS src shift));
17756   ins_cost(INSN_COST);
17757   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
17758   ins_encode %{
17759     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17760             as_FloatRegister($src$$reg),
17761             as_FloatRegister($shift$$reg));
17762   %}
17763   ins_pipe(vshift64);
17764 %}
17765 
17766 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17767   predicate(n->as_Vector()->length() == 8);
17768   match(Set dst (LShiftVS src shift));
17769   match(Set dst (RShiftVS src shift));
17770   ins_cost(INSN_COST);
17771   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
17772   ins_encode %{
17773     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17774             as_FloatRegister($src$$reg),
17775             as_FloatRegister($shift$$reg));
17776   %}
17777   ins_pipe(vshift128);
17778 %}
17779 
17780 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17781   predicate(n->as_Vector()->length() == 2 ||
17782             n->as_Vector()->length() == 4);
17783   match(Set dst (URShiftVS src shift));
17784   ins_cost(INSN_COST);
17785   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
17786   ins_encode %{
17787     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17788             as_FloatRegister($src$$reg),
17789             as_FloatRegister($shift$$reg));
17790   %}
17791   ins_pipe(vshift64);
17792 %}
17793 
17794 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17795   predicate(n->as_Vector()->length() == 8);
17796   match(Set dst (URShiftVS src shift));
17797   ins_cost(INSN_COST);
17798   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
17799   ins_encode %{
17800     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17801             as_FloatRegister($src$$reg),
17802             as_FloatRegister($shift$$reg));
17803   %}
17804   ins_pipe(vshift128);
17805 %}
17806 
17807 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17808   predicate(n->as_Vector()->length() == 2 ||
17809             n->as_Vector()->length() == 4);
17810   match(Set dst (LShiftVS src shift));
17811   ins_cost(INSN_COST);
17812   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
17813   ins_encode %{
17814     int sh = (int)$shift$$constant;
17815     if (sh >= 16) {
17816       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17817              as_FloatRegister($src$$reg),
17818              as_FloatRegister($src$$reg));
17819     } else {
17820       __ shl(as_FloatRegister($dst$$reg), __ T4H,
17821              as_FloatRegister($src$$reg), sh);
17822     }
17823   %}
17824   ins_pipe(vshift64_imm);
17825 %}
17826 
17827 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17828   predicate(n->as_Vector()->length() == 8);
17829   match(Set dst (LShiftVS src shift));
17830   ins_cost(INSN_COST);
17831   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
17832   ins_encode %{
17833     int sh = (int)$shift$$constant;
17834     if (sh >= 16) {
17835       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17836              as_FloatRegister($src$$reg),
17837              as_FloatRegister($src$$reg));
17838     } else {
17839       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17840              as_FloatRegister($src$$reg), sh);
17841     }
17842   %}
17843   ins_pipe(vshift128_imm);
17844 %}
17845 
17846 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17847   predicate(n->as_Vector()->length() == 2 ||
17848             n->as_Vector()->length() == 4);
17849   match(Set dst (RShiftVS src shift));
17850   ins_cost(INSN_COST);
17851   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17852   ins_encode %{
17853     int sh = (int)$shift$$constant;
17854     if (sh >= 16) sh = 15;
17855     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17856            as_FloatRegister($src$$reg), sh);
17857   %}
17858   ins_pipe(vshift64_imm);
17859 %}
17860 
17861 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17862   predicate(n->as_Vector()->length() == 8);
17863   match(Set dst (RShiftVS src shift));
17864   ins_cost(INSN_COST);
17865   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17866   ins_encode %{
17867     int sh = (int)$shift$$constant;
17868     if (sh >= 16) sh = 15;
17869     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17870            as_FloatRegister($src$$reg), sh);
17871   %}
17872   ins_pipe(vshift128_imm);
17873 %}
17874 
17875 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17876   predicate(n->as_Vector()->length() == 2 ||
17877             n->as_Vector()->length() == 4);
17878   match(Set dst (URShiftVS src shift));
17879   ins_cost(INSN_COST);
17880   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17881   ins_encode %{
17882     int sh = (int)$shift$$constant;
17883     if (sh >= 16) {
17884       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17885              as_FloatRegister($src$$reg),
17886              as_FloatRegister($src$$reg));
17887     } else {
17888       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17889              as_FloatRegister($src$$reg), sh);
17890     }
17891   %}
17892   ins_pipe(vshift64_imm);
17893 %}
17894 
17895 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17896   predicate(n->as_Vector()->length() == 8);
17897   match(Set dst (URShiftVS src shift));
17898   ins_cost(INSN_COST);
17899   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17900   ins_encode %{
17901     int sh = (int)$shift$$constant;
17902     if (sh >= 16) {
17903       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17904              as_FloatRegister($src$$reg),
17905              as_FloatRegister($src$$reg));
17906     } else {
17907       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17908              as_FloatRegister($src$$reg), sh);
17909     }
17910   %}
17911   ins_pipe(vshift128_imm);
17912 %}
17913 
17914 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17915   predicate(n->as_Vector()->length() == 2);
17916   match(Set dst (LShiftVI src shift));
17917   match(Set dst (RShiftVI src shift));
17918   ins_cost(INSN_COST);
17919   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17920   ins_encode %{
17921     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17922             as_FloatRegister($src$$reg),
17923             as_FloatRegister($shift$$reg));
17924   %}
17925   ins_pipe(vshift64);
17926 %}
17927 
17928 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17929   predicate(n->as_Vector()->length() == 4);
17930   match(Set dst (LShiftVI src shift));
17931   match(Set dst (RShiftVI src shift));
17932   ins_cost(INSN_COST);
17933   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17934   ins_encode %{
17935     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17936             as_FloatRegister($src$$reg),
17937             as_FloatRegister($shift$$reg));
17938   %}
17939   ins_pipe(vshift128);
17940 %}
17941 
17942 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17943   predicate(n->as_Vector()->length() == 2);
17944   match(Set dst (URShiftVI src shift));
17945   ins_cost(INSN_COST);
17946   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
17947   ins_encode %{
17948     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17949             as_FloatRegister($src$$reg),
17950             as_FloatRegister($shift$$reg));
17951   %}
17952   ins_pipe(vshift64);
17953 %}
17954 
17955 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17956   predicate(n->as_Vector()->length() == 4);
17957   match(Set dst (URShiftVI src shift));
17958   ins_cost(INSN_COST);
17959   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
17960   ins_encode %{
17961     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17962             as_FloatRegister($src$$reg),
17963             as_FloatRegister($shift$$reg));
17964   %}
17965   ins_pipe(vshift128);
17966 %}
17967 
17968 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17969   predicate(n->as_Vector()->length() == 2);
17970   match(Set dst (LShiftVI src shift));
17971   ins_cost(INSN_COST);
17972   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17973   ins_encode %{
17974     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17975            as_FloatRegister($src$$reg),
17976            (int)$shift$$constant);
17977   %}
17978   ins_pipe(vshift64_imm);
17979 %}
17980 
17981 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17982   predicate(n->as_Vector()->length() == 4);
17983   match(Set dst (LShiftVI src shift));
17984   ins_cost(INSN_COST);
17985   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17986   ins_encode %{
17987     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17988            as_FloatRegister($src$$reg),
17989            (int)$shift$$constant);
17990   %}
17991   ins_pipe(vshift128_imm);
17992 %}
17993 
17994 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17995   predicate(n->as_Vector()->length() == 2);
17996   match(Set dst (RShiftVI src shift));
17997   ins_cost(INSN_COST);
17998   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17999   ins_encode %{
18000     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
18001             as_FloatRegister($src$$reg),
18002             (int)$shift$$constant);
18003   %}
18004   ins_pipe(vshift64_imm);
18005 %}
18006 
18007 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
18008   predicate(n->as_Vector()->length() == 4);
18009   match(Set dst (RShiftVI src shift));
18010   ins_cost(INSN_COST);
18011   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
18012   ins_encode %{
18013     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
18014             as_FloatRegister($src$$reg),
18015             (int)$shift$$constant);
18016   %}
18017   ins_pipe(vshift128_imm);
18018 %}
18019 
18020 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
18021   predicate(n->as_Vector()->length() == 2);
18022   match(Set dst (URShiftVI src shift));
18023   ins_cost(INSN_COST);
18024   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
18025   ins_encode %{
18026     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
18027             as_FloatRegister($src$$reg),
18028             (int)$shift$$constant);
18029   %}
18030   ins_pipe(vshift64_imm);
18031 %}
18032 
18033 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
18034   predicate(n->as_Vector()->length() == 4);
18035   match(Set dst (URShiftVI src shift));
18036   ins_cost(INSN_COST);
18037   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
18038   ins_encode %{
18039     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
18040             as_FloatRegister($src$$reg),
18041             (int)$shift$$constant);
18042   %}
18043   ins_pipe(vshift128_imm);
18044 %}
18045 
18046 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
18047   predicate(n->as_Vector()->length() == 2);
18048   match(Set dst (LShiftVL src shift));
18049   match(Set dst (RShiftVL src shift));
18050   ins_cost(INSN_COST);
18051   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
18052   ins_encode %{
18053     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
18054             as_FloatRegister($src$$reg),
18055             as_FloatRegister($shift$$reg));
18056   %}
18057   ins_pipe(vshift128);
18058 %}
18059 
18060 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
18061   predicate(n->as_Vector()->length() == 2);
18062   match(Set dst (URShiftVL src shift));
18063   ins_cost(INSN_COST);
18064   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
18065   ins_encode %{
18066     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
18067             as_FloatRegister($src$$reg),
18068             as_FloatRegister($shift$$reg));
18069   %}
18070   ins_pipe(vshift128);
18071 %}
18072 
18073 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
18074   predicate(n->as_Vector()->length() == 2);
18075   match(Set dst (LShiftVL src shift));
18076   ins_cost(INSN_COST);
18077   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
18078   ins_encode %{
18079     __ shl(as_FloatRegister($dst$$reg), __ T2D,
18080            as_FloatRegister($src$$reg),
18081            (int)$shift$$constant);
18082   %}
18083   ins_pipe(vshift128_imm);
18084 %}
18085 
18086 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
18087   predicate(n->as_Vector()->length() == 2);
18088   match(Set dst (RShiftVL src shift));
18089   ins_cost(INSN_COST);
18090   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18091   ins_encode %{
18092     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18093             as_FloatRegister($src$$reg),
18094             (int)$shift$$constant);
18095   %}
18096   ins_pipe(vshift128_imm);
18097 %}
18098 
18099 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18100   predicate(n->as_Vector()->length() == 2);
18101   match(Set dst (URShiftVL src shift));
18102   ins_cost(INSN_COST);
18103   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18104   ins_encode %{
18105     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18106             as_FloatRegister($src$$reg),
18107             (int)$shift$$constant);
18108   %}
18109   ins_pipe(vshift128_imm);
18110 %}
18111 
18112 //----------PEEPHOLE RULES-----------------------------------------------------
18113 // These must follow all instruction definitions as they use the names
18114 // defined in the instructions definitions.
18115 //
18116 // peepmatch ( root_instr_name [preceding_instruction]* );
18117 //
18118 // peepconstraint %{
18119 // (instruction_number.operand_name relational_op instruction_number.operand_name
18120 //  [, ...] );
18121 // // instruction numbers are zero-based using left to right order in peepmatch
18122 //
18123 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18124 // // provide an instruction_number.operand_name for each operand that appears
18125 // // in the replacement instruction's match rule
18126 //
18127 // ---------VM FLAGS---------------------------------------------------------
18128 //
18129 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18130 //
18131 // Each peephole rule is given an identifying number starting with zero and
18132 // increasing by one in the order seen by the parser.  An individual peephole
18133 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18134 // on the command-line.
18135 //
18136 // ---------CURRENT LIMITATIONS----------------------------------------------
18137 //
18138 // Only match adjacent instructions in same basic block
18139 // Only equality constraints
18140 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18141 // Only one replacement instruction
18142 //
18143 // ---------EXAMPLE----------------------------------------------------------
18144 //
18145 // // pertinent parts of existing instructions in architecture description
18146 // instruct movI(iRegINoSp dst, iRegI src)
18147 // %{
18148 //   match(Set dst (CopyI src));
18149 // %}
18150 //
18151 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18152 // %{
18153 //   match(Set dst (AddI dst src));
18154 //   effect(KILL cr);
18155 // %}
18156 //
18157 // // Change (inc mov) to lea
18158 // peephole %{
18159 //   // increment preceeded by register-register move
18160 //   peepmatch ( incI_iReg movI );
18161 //   // require that the destination register of the increment
18162 //   // match the destination register of the move
18163 //   peepconstraint ( 0.dst == 1.dst );
18164 //   // construct a replacement instruction that sets
18165 //   // the destination to ( move's source register + one )
18166 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18167 // %}
18168 //
18169 
18170 // Implementation no longer uses movX instructions since
18171 // machine-independent system no longer uses CopyX nodes.
18172 //
18173 // peephole
18174 // %{
18175 //   peepmatch (incI_iReg movI);
18176 //   peepconstraint (0.dst == 1.dst);
18177 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18178 // %}
18179 
18180 // peephole
18181 // %{
18182 //   peepmatch (decI_iReg movI);
18183 //   peepconstraint (0.dst == 1.dst);
18184 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18185 // %}
18186 
18187 // peephole
18188 // %{
18189 //   peepmatch (addI_iReg_imm movI);
18190 //   peepconstraint (0.dst == 1.dst);
18191 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18192 // %}
18193 
18194 // peephole
18195 // %{
18196 //   peepmatch (incL_iReg movL);
18197 //   peepconstraint (0.dst == 1.dst);
18198 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18199 // %}
18200 
18201 // peephole
18202 // %{
18203 //   peepmatch (decL_iReg movL);
18204 //   peepconstraint (0.dst == 1.dst);
18205 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18206 // %}
18207 
18208 // peephole
18209 // %{
18210 //   peepmatch (addL_iReg_imm movL);
18211 //   peepconstraint (0.dst == 1.dst);
18212 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18213 // %}
18214 
18215 // peephole
18216 // %{
18217 //   peepmatch (addP_iReg_imm movP);
18218 //   peepconstraint (0.dst == 1.dst);
18219 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18220 // %}
18221 
18222 // // Change load of spilled value to only a spill
18223 // instruct storeI(memory mem, iRegI src)
18224 // %{
18225 //   match(Set mem (StoreI mem src));
18226 // %}
18227 //
18228 // instruct loadI(iRegINoSp dst, memory mem)
18229 // %{
18230 //   match(Set dst (LoadI mem));
18231 // %}
18232 //
18233 
18234 //----------SMARTSPILL RULES---------------------------------------------------
18235 // These must follow all instruction definitions as they use the names
18236 // defined in the instructions definitions.
18237 
18238 // Local Variables:
18239 // mode: c++
18240 // End: