1 //
   2 // Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580  /* R29, */                     // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649  /* R29, R29_H, */              // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "opto/addnode.hpp"
1000 
1001 class CallStubImpl {
1002 
1003   //--------------------------------------------------------------
1004   //---<  Used for optimization in Compile::shorten_branches  >---
1005   //--------------------------------------------------------------
1006 
1007  public:
1008   // Size of call trampoline stub.
1009   static uint size_call_trampoline() {
1010     return 0; // no call trampolines on this platform
1011   }
1012 
1013   // number of relocations needed by a call trampoline stub
1014   static uint reloc_call_trampoline() {
1015     return 0; // no call trampolines on this platform
1016   }
1017 };
1018 
1019 class HandlerImpl {
1020 
1021  public:
1022 
1023   static int emit_exception_handler(CodeBuffer &cbuf);
1024   static int emit_deopt_handler(CodeBuffer& cbuf);
1025 
1026   static uint size_exception_handler() {
1027     return MacroAssembler::far_branch_size();
1028   }
1029 
1030   static uint size_deopt_handler() {
1031     // count one adr and one far branch instruction
1032     return 4 * NativeInstruction::instruction_size;
1033   }
1034 };
1035 
1036   // graph traversal helpers
1037 
1038   MemBarNode *parent_membar(const Node *n);
1039   MemBarNode *child_membar(const MemBarNode *n);
1040   bool leading_membar(const MemBarNode *barrier);
1041 
1042   bool is_card_mark_membar(const MemBarNode *barrier);
1043   bool is_CAS(int opcode);
1044 
1045   MemBarNode *leading_to_trailing(MemBarNode *leading);
1046   MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
1047   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1048 
1049   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1050 
1051   bool unnecessary_acquire(const Node *barrier);
1052   bool needs_acquiring_load(const Node *load);
1053 
1054   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1055 
1056   bool unnecessary_release(const Node *barrier);
1057   bool unnecessary_volatile(const Node *barrier);
1058   bool needs_releasing_store(const Node *store);
1059 
1060   // predicate controlling translation of CompareAndSwapX
1061   bool needs_acquiring_load_exclusive(const Node *load);
1062 
1063   // predicate controlling translation of StoreCM
1064   bool unnecessary_storestore(const Node *storecm);
1065 
1066   // predicate controlling addressing modes
1067   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1068 %}
1069 
1070 source %{
1071 
1072   // Optimizaton of volatile gets and puts
1073   // -------------------------------------
1074   //
1075   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1076   // use to implement volatile reads and writes. For a volatile read
1077   // we simply need
1078   //
1079   //   ldar<x>
1080   //
1081   // and for a volatile write we need
1082   //
1083   //   stlr<x>
1084   //
1085   // Alternatively, we can implement them by pairing a normal
1086   // load/store with a memory barrier. For a volatile read we need
1087   //
1088   //   ldr<x>
1089   //   dmb ishld
1090   //
1091   // for a volatile write
1092   //
1093   //   dmb ish
1094   //   str<x>
1095   //   dmb ish
1096   //
1097   // We can also use ldaxr and stlxr to implement compare and swap CAS
1098   // sequences. These are normally translated to an instruction
1099   // sequence like the following
1100   //
1101   //   dmb      ish
1102   // retry:
1103   //   ldxr<x>   rval raddr
1104   //   cmp       rval rold
1105   //   b.ne done
1106   //   stlxr<x>  rval, rnew, rold
1107   //   cbnz      rval retry
1108   // done:
1109   //   cset      r0, eq
1110   //   dmb ishld
1111   //
1112   // Note that the exclusive store is already using an stlxr
1113   // instruction. That is required to ensure visibility to other
1114   // threads of the exclusive write (assuming it succeeds) before that
1115   // of any subsequent writes.
1116   //
1117   // The following instruction sequence is an improvement on the above
1118   //
1119   // retry:
1120   //   ldaxr<x>  rval raddr
1121   //   cmp       rval rold
1122   //   b.ne done
1123   //   stlxr<x>  rval, rnew, rold
1124   //   cbnz      rval retry
1125   // done:
1126   //   cset      r0, eq
1127   //
1128   // We don't need the leading dmb ish since the stlxr guarantees
1129   // visibility of prior writes in the case that the swap is
1130   // successful. Crucially we don't have to worry about the case where
1131   // the swap is not successful since no valid program should be
1132   // relying on visibility of prior changes by the attempting thread
1133   // in the case where the CAS fails.
1134   //
1135   // Similarly, we don't need the trailing dmb ishld if we substitute
1136   // an ldaxr instruction since that will provide all the guarantees we
1137   // require regarding observation of changes made by other threads
1138   // before any change to the CAS address observed by the load.
1139   //
1140   // In order to generate the desired instruction sequence we need to
1141   // be able to identify specific 'signature' ideal graph node
1142   // sequences which i) occur as a translation of a volatile reads or
1143   // writes or CAS operations and ii) do not occur through any other
1144   // translation or graph transformation. We can then provide
1145   // alternative aldc matching rules which translate these node
1146   // sequences to the desired machine code sequences. Selection of the
1147   // alternative rules can be implemented by predicates which identify
1148   // the relevant node sequences.
1149   //
1150   // The ideal graph generator translates a volatile read to the node
1151   // sequence
1152   //
1153   //   LoadX[mo_acquire]
1154   //   MemBarAcquire
1155   //
1156   // As a special case when using the compressed oops optimization we
1157   // may also see this variant
1158   //
1159   //   LoadN[mo_acquire]
1160   //   DecodeN
1161   //   MemBarAcquire
1162   //
1163   // A volatile write is translated to the node sequence
1164   //
1165   //   MemBarRelease
1166   //   StoreX[mo_release] {CardMark}-optional
1167   //   MemBarVolatile
1168   //
1169   // n.b. the above node patterns are generated with a strict
1170   // 'signature' configuration of input and output dependencies (see
1171   // the predicates below for exact details). The card mark may be as
1172   // simple as a few extra nodes or, in a few GC configurations, may
1173   // include more complex control flow between the leading and
1174   // trailing memory barriers. However, whatever the card mark
1175   // configuration these signatures are unique to translated volatile
1176   // reads/stores -- they will not appear as a result of any other
1177   // bytecode translation or inlining nor as a consequence of
1178   // optimizing transforms.
1179   //
1180   // We also want to catch inlined unsafe volatile gets and puts and
1181   // be able to implement them using either ldar<x>/stlr<x> or some
1182   // combination of ldr<x>/stlr<x> and dmb instructions.
1183   //
1184   // Inlined unsafe volatiles puts manifest as a minor variant of the
1185   // normal volatile put node sequence containing an extra cpuorder
1186   // membar
1187   //
1188   //   MemBarRelease
1189   //   MemBarCPUOrder
1190   //   StoreX[mo_release] {CardMark}-optional
1191   //   MemBarVolatile
1192   //
1193   // n.b. as an aside, the cpuorder membar is not itself subject to
1194   // matching and translation by adlc rules.  However, the rule
1195   // predicates need to detect its presence in order to correctly
1196   // select the desired adlc rules.
1197   //
1198   // Inlined unsafe volatile gets manifest as a somewhat different
1199   // node sequence to a normal volatile get
1200   //
1201   //   MemBarCPUOrder
1202   //        ||       \\
1203   //   MemBarAcquire LoadX[mo_acquire]
1204   //        ||
1205   //   MemBarCPUOrder
1206   //
1207   // In this case the acquire membar does not directly depend on the
1208   // load. However, we can be sure that the load is generated from an
1209   // inlined unsafe volatile get if we see it dependent on this unique
1210   // sequence of membar nodes. Similarly, given an acquire membar we
1211   // can know that it was added because of an inlined unsafe volatile
1212   // get if it is fed and feeds a cpuorder membar and if its feed
1213   // membar also feeds an acquiring load.
1214   //
1215   // Finally an inlined (Unsafe) CAS operation is translated to the
1216   // following ideal graph
1217   //
1218   //   MemBarRelease
1219   //   MemBarCPUOrder
1220   //   CompareAndSwapX {CardMark}-optional
1221   //   MemBarCPUOrder
1222   //   MemBarAcquire
1223   //
1224   // So, where we can identify these volatile read and write
1225   // signatures we can choose to plant either of the above two code
1226   // sequences. For a volatile read we can simply plant a normal
1227   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1228   // also choose to inhibit translation of the MemBarAcquire and
1229   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1230   //
1231   // When we recognise a volatile store signature we can choose to
1232   // plant at a dmb ish as a translation for the MemBarRelease, a
1233   // normal str<x> and then a dmb ish for the MemBarVolatile.
1234   // Alternatively, we can inhibit translation of the MemBarRelease
1235   // and MemBarVolatile and instead plant a simple stlr<x>
1236   // instruction.
1237   //
1238   // when we recognise a CAS signature we can choose to plant a dmb
1239   // ish as a translation for the MemBarRelease, the conventional
1240   // macro-instruction sequence for the CompareAndSwap node (which
1241   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1242   // Alternatively, we can elide generation of the dmb instructions
1243   // and plant the alternative CompareAndSwap macro-instruction
1244   // sequence (which uses ldaxr<x>).
1245   //
1246   // Of course, the above only applies when we see these signature
1247   // configurations. We still want to plant dmb instructions in any
1248   // other cases where we may see a MemBarAcquire, MemBarRelease or
1249   // MemBarVolatile. For example, at the end of a constructor which
1250   // writes final/volatile fields we will see a MemBarRelease
1251   // instruction and this needs a 'dmb ish' lest we risk the
1252   // constructed object being visible without making the
1253   // final/volatile field writes visible.
1254   //
1255   // n.b. the translation rules below which rely on detection of the
1256   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1257   // If we see anything other than the signature configurations we
1258   // always just translate the loads and stores to ldr<x> and str<x>
1259   // and translate acquire, release and volatile membars to the
1260   // relevant dmb instructions.
1261   //
1262 
1263   // graph traversal helpers used for volatile put/get and CAS
1264   // optimization
1265 
1266   // 1) general purpose helpers
1267 
1268   // if node n is linked to a parent MemBarNode by an intervening
1269   // Control and Memory ProjNode return the MemBarNode otherwise return
1270   // NULL.
1271   //
1272   // n may only be a Load or a MemBar.
1273 
1274   MemBarNode *parent_membar(const Node *n)
1275   {
1276     Node *ctl = NULL;
1277     Node *mem = NULL;
1278     Node *membar = NULL;
1279 
1280     if (n->is_Load()) {
1281       ctl = n->lookup(LoadNode::Control);
1282       mem = n->lookup(LoadNode::Memory);
1283     } else if (n->is_MemBar()) {
1284       ctl = n->lookup(TypeFunc::Control);
1285       mem = n->lookup(TypeFunc::Memory);
1286     } else {
1287         return NULL;
1288     }
1289 
1290     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1291       return NULL;
1292     }
1293 
1294     membar = ctl->lookup(0);
1295 
1296     if (!membar || !membar->is_MemBar()) {
1297       return NULL;
1298     }
1299 
1300     if (mem->lookup(0) != membar) {
1301       return NULL;
1302     }
1303 
1304     return membar->as_MemBar();
1305   }
1306 
1307   // if n is linked to a child MemBarNode by intervening Control and
1308   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1309 
1310   MemBarNode *child_membar(const MemBarNode *n)
1311   {
1312     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1313     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1314 
1315     // MemBar needs to have both a Ctl and Mem projection
1316     if (! ctl || ! mem)
1317       return NULL;
1318 
1319     MemBarNode *child = NULL;
1320     Node *x;
1321 
1322     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1323       x = ctl->fast_out(i);
1324       // if we see a membar we keep hold of it. we may also see a new
1325       // arena copy of the original but it will appear later
1326       if (x->is_MemBar()) {
1327           child = x->as_MemBar();
1328           break;
1329       }
1330     }
1331 
1332     if (child == NULL) {
1333       return NULL;
1334     }
1335 
1336     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1337       x = mem->fast_out(i);
1338       // if we see a membar we keep hold of it. we may also see a new
1339       // arena copy of the original but it will appear later
1340       if (x == child) {
1341         return child;
1342       }
1343     }
1344     return NULL;
1345   }
1346 
1347   // helper predicate use to filter candidates for a leading memory
1348   // barrier
1349   //
1350   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1351   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1352 
1353   bool leading_membar(const MemBarNode *barrier)
1354   {
1355     int opcode = barrier->Opcode();
1356     // if this is a release membar we are ok
1357     if (opcode == Op_MemBarRelease) {
1358       return true;
1359     }
1360     // if its a cpuorder membar . . .
1361     if (opcode != Op_MemBarCPUOrder) {
1362       return false;
1363     }
1364     // then the parent has to be a release membar
1365     MemBarNode *parent = parent_membar(barrier);
1366     if (!parent) {
1367       return false;
1368     }
1369     opcode = parent->Opcode();
1370     return opcode == Op_MemBarRelease;
1371   }
1372 
1373   // 2) card mark detection helper
1374 
1375   // helper predicate which can be used to detect a volatile membar
1376   // introduced as part of a conditional card mark sequence either by
1377   // G1 or by CMS when UseCondCardMark is true.
1378   //
1379   // membar can be definitively determined to be part of a card mark
1380   // sequence if and only if all the following hold
1381   //
1382   // i) it is a MemBarVolatile
1383   //
1384   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1385   // true
1386   //
1387   // iii) the node's Mem projection feeds a StoreCM node.
1388 
1389   bool is_card_mark_membar(const MemBarNode *barrier)
1390   {
1391     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1392       return false;
1393     }
1394 
1395     if (barrier->Opcode() != Op_MemBarVolatile) {
1396       return false;
1397     }
1398 
1399     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1400 
1401     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1402       Node *y = mem->fast_out(i);
1403       if (y->Opcode() == Op_StoreCM) {
1404         return true;
1405       }
1406     }
1407 
1408     return false;
1409   }
1410 
1411 
1412   // 3) helper predicates to traverse volatile put or CAS graphs which
1413   // may contain GC barrier subgraphs
1414 
1415   // Preamble
1416   // --------
1417   //
1418   // for volatile writes we can omit generating barriers and employ a
1419   // releasing store when we see a node sequence sequence with a
1420   // leading MemBarRelease and a trailing MemBarVolatile as follows
1421   //
1422   //   MemBarRelease
1423   //  {    ||        } -- optional
1424   //  {MemBarCPUOrder}
1425   //       ||       \\
1426   //       ||     StoreX[mo_release]
1427   //       | \ Bot    / ???
1428   //       | MergeMem
1429   //       | /
1430   //   MemBarVolatile
1431   //
1432   // where
1433   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1434   //  | \ and / indicate further routing of the Ctl and Mem feeds
1435   //
1436   // Note that the memory feed from the CPUOrder membar to the
1437   // MergeMem node is an AliasIdxBot slice while the feed from the
1438   // StoreX is for a slice determined by the type of value being
1439   // written.
1440   //
1441   // the diagram above shows the graph we see for non-object stores.
1442   // for a volatile Object store (StoreN/P) we may see other nodes
1443   // below the leading membar because of the need for a GC pre- or
1444   // post-write barrier.
1445   //
1446   // with most GC configurations we with see this simple variant which
1447   // includes a post-write barrier card mark.
1448   //
1449   //   MemBarRelease______________________________
1450   //         ||    \\               Ctl \        \\
1451   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1452   //         | \ Bot  / oop                 . . .  /
1453   //         | MergeMem
1454   //         | /
1455   //         ||      /
1456   //   MemBarVolatile
1457   //
1458   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1459   // the object address to an int used to compute the card offset) and
1460   // Ctl+Mem to a StoreB node (which does the actual card mark).
1461   //
1462   // n.b. a StoreCM node is only ever used when CMS (with or without
1463   // CondCardMark) or G1 is configured. This abstract instruction
1464   // differs from a normal card mark write (StoreB) because it implies
1465   // a requirement to order visibility of the card mark (StoreCM)
1466   // after that of the object put (StoreP/N) using a StoreStore memory
1467   // barrier. Note that this is /not/ a requirement to order the
1468   // instructions in the generated code (that is already guaranteed by
1469   // the order of memory dependencies). Rather it is a requirement to
1470   // ensure visibility order which only applies on architectures like
1471   // AArch64 which do not implement TSO. This ordering is required for
1472   // both non-volatile and volatile puts.
1473   //
1474   // That implies that we need to translate a StoreCM using the
1475   // sequence
1476   //
1477   //   dmb ishst
1478   //   stlrb
1479   //
1480   // This dmb cannot be omitted even when the associated StoreX or
1481   // CompareAndSwapX is implemented using stlr. However, as described
1482   // below there are circumstances where a specific GC configuration
1483   // requires a stronger barrier in which case it can be omitted.
1484   // 
1485   // With the Serial or Parallel GC using +CondCardMark the card mark
1486   // is performed conditionally on it currently being unmarked in
1487   // which case the volatile put graph looks slightly different
1488   //
1489   //   MemBarRelease____________________________________________
1490   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1491   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1492   //         | \ Bot / oop                          \            |
1493   //         | MergeMem                            . . .      StoreB
1494   //         | /                                                /
1495   //         ||     /
1496   //   MemBarVolatile
1497   //
1498   // It is worth noting at this stage that all the above
1499   // configurations can be uniquely identified by checking that the
1500   // memory flow includes the following subgraph:
1501   //
1502   //   MemBarRelease
1503   //  {MemBarCPUOrder}
1504   //      |  \      . . .
1505   //      |  StoreX[mo_release]  . . .
1506   //  Bot |   / oop
1507   //     MergeMem
1508   //      |
1509   //   MemBarVolatile
1510   //
1511   // This is referred to as a *normal* volatile store subgraph. It can
1512   // easily be detected starting from any candidate MemBarRelease,
1513   // StoreX[mo_release] or MemBarVolatile node.
1514   //
1515   // A small variation on this normal case occurs for an unsafe CAS
1516   // operation. The basic memory flow subgraph for a non-object CAS is
1517   // as follows
1518   //
1519   //   MemBarRelease
1520   //         ||
1521   //   MemBarCPUOrder
1522   //          |     \\   . . .
1523   //          |     CompareAndSwapX
1524   //          |       |
1525   //      Bot |     SCMemProj
1526   //           \     / Bot
1527   //           MergeMem
1528   //           /
1529   //   MemBarCPUOrder
1530   //         ||
1531   //   MemBarAcquire
1532   //
1533   // The same basic variations on this arrangement (mutatis mutandis)
1534   // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1535   // feeds the extra CastP2X, LoadB etc nodes but the above memory
1536   // flow subgraph is still present.
1537   // 
1538   // This is referred to as a *normal* CAS subgraph. It can easily be
1539   // detected starting from any candidate MemBarRelease,
1540   // StoreX[mo_release] or MemBarAcquire node.
1541   //
1542   // The code below uses two helper predicates, leading_to_trailing
1543   // and trailing_to_leading to identify these normal graphs, one
1544   // validating the layout starting from the top membar and searching
1545   // down and the other validating the layout starting from the lower
1546   // membar and searching up.
1547   //
1548   // There are two special case GC configurations when the simple
1549   // normal graphs above may not be generated: when using G1 (which
1550   // always employs a conditional card mark); and when using CMS with
1551   // conditional card marking (+CondCardMark) configured. These GCs
1552   // are both concurrent rather than stop-the world GCs. So they
1553   // introduce extra Ctl+Mem flow into the graph between the leading
1554   // and trailing membar nodes, in particular enforcing stronger
1555   // memory serialisation beween the object put and the corresponding
1556   // conditional card mark. CMS employs a post-write GC barrier while
1557   // G1 employs both a pre- and post-write GC barrier.
1558   //
1559   // The post-write barrier subgraph for these configurations includes
1560   // a MemBarVolatile node -- referred to as a card mark membar --
1561   // which is needed to order the card write (StoreCM) operation in
1562   // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1563   // operations performed by GC threads i.e. a card mark membar
1564   // constitutes a StoreLoad barrier hence must be translated to a dmb
1565   // ish (whether or not it sits inside a volatile store sequence).
1566   //
1567   // Of course, the use of the dmb ish for the card mark membar also
1568   // implies theat the StoreCM which follows can omit the dmb ishst
1569   // instruction. The necessary visibility ordering will already be
1570   // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1571   // needs to be generated for as part of the StoreCM sequence with GC
1572   // configuration +CMS -CondCardMark.
1573   // 
1574   // Of course all these extra barrier nodes may well be absent --
1575   // they are only inserted for object puts. Their potential presence
1576   // significantly complicates the task of identifying whether a
1577   // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1578   // MemBarAcquire forms part of a volatile put or CAS when using
1579   // these GC configurations (see below) and also complicates the
1580   // decision as to how to translate a MemBarVolatile and StoreCM.
1581   //
1582   // So, thjis means that a card mark MemBarVolatile occurring in the
1583   // post-barrier graph it needs to be distinguished from a normal
1584   // trailing MemBarVolatile. Resolving this is straightforward: a
1585   // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1586   // node and that is a unique marker
1587   //
1588   //      MemBarVolatile (card mark)
1589   //       C |    \     . . .
1590   //         |   StoreCM   . . .
1591   //       . . .
1592   //
1593   // Returning to the task of translating the object put and the
1594   // leading/trailing membar nodes: what do the node graphs look like
1595   // for these 2 special cases? and how can we determine the status of
1596   // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1597   // normal and non-normal cases?
1598   //
1599   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1600   // which selects conditonal execution based on the value loaded
1601   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1602   // intervening StoreLoad barrier (MemBarVolatile).
1603   //
1604   // So, with CMS we may see a node graph for a volatile object store
1605   // which looks like this
1606   //
1607   //   MemBarRelease
1608   //   MemBarCPUOrder_(leading)____________________
1609   //     C |  | M \       \\               M |   C \
1610   //       |  |    \    StoreN/P[mo_release] |  CastP2X
1611   //       |  | Bot \    / oop      \        |
1612   //       |  |    MergeMem          \      / 
1613   //       |  |      /                |    /
1614   //     MemBarVolatile (card mark)   |   /
1615   //     C |  ||    M |               |  /
1616   //       | LoadB    | Bot       oop | / Bot
1617   //       |   |      |              / /
1618   //       | Cmp      |\            / /
1619   //       | /        | \          / /
1620   //       If         |  \        / /
1621   //       | \        |   \      / /
1622   // IfFalse  IfTrue  |    \    / /
1623   //       \     / \  |    |   / /
1624   //        \   / StoreCM  |  / /
1625   //         \ /      \   /  / /
1626   //        Region     Phi  / /
1627   //          | \   Raw |  / /
1628   //          |  . . .  | / /
1629   //          |       MergeMem
1630   //          |           |
1631   //        MemBarVolatile (trailing)
1632   //
1633   // Notice that there are two MergeMem nodes below the leading
1634   // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1635   // the leading membar and the oopptr Mem slice from the Store into
1636   // the card mark membar. The trailing MergeMem merges the
1637   // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1638   // slice from the StoreCM and an oop slice from the StoreN/P node
1639   // into the trailing membar (n.b. the raw slice proceeds via a Phi
1640   // associated with the If region).
1641   //
1642   // So, in the case of CMS + CondCardMark the volatile object store
1643   // graph still includes a normal volatile store subgraph from the
1644   // leading membar to the trailing membar. However, it also contains
1645   // the same shape memory flow to the card mark membar. The two flows
1646   // can be distinguished by testing whether or not the downstream
1647   // membar is a card mark membar.
1648   //
1649   // The graph for a CAS also varies with CMS + CondCardMark, in
1650   // particular employing a control feed from the CompareAndSwapX node
1651   // through a CmpI and If to the card mark membar and StoreCM which
1652   // updates the associated card. This avoids executing the card mark
1653   // if the CAS fails. However, it can be seen from the diagram below
1654   // that the presence of the barrier does not alter the normal CAS
1655   // memory subgraph where the leading membar feeds a CompareAndSwapX,
1656   // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1657   // MemBarAcquire pair.
1658   //
1659   //   MemBarRelease
1660   //   MemBarCPUOrder__(leading)_______________________
1661   //   C /  M |                        \\            C \
1662   //  . . .   | Bot                CompareAndSwapN/P   CastP2X
1663   //          |                  C /  M |
1664   //          |                 CmpI    |
1665   //          |                  /      |
1666   //          |               . . .     |
1667   //          |              IfTrue     |
1668   //          |              /          |
1669   //       MemBarVolatile (card mark)   |
1670   //        C |  ||    M |              |
1671   //          | LoadB    | Bot   ______/|
1672   //          |   |      |      /       |
1673   //          | Cmp      |     /      SCMemProj
1674   //          | /        |    /         |
1675   //          If         |   /         /
1676   //          | \        |  /         / Bot
1677   //     IfFalse  IfTrue | /         /
1678   //          |   / \   / / prec    /
1679   //   . . .  |  /  StoreCM        /
1680   //        \ | /      | raw      /
1681   //        Region    . . .      /
1682   //           | \              /
1683   //           |   . . .   \    / Bot
1684   //           |        MergeMem
1685   //           |          /
1686   //         MemBarCPUOrder
1687   //         MemBarAcquire (trailing)
1688   //
1689   // This has a slightly different memory subgraph to the one seen
1690   // previously but the core of it has a similar memory flow to the
1691   // CAS normal subgraph:
1692   //
1693   //   MemBarRelease
1694   //   MemBarCPUOrder____
1695   //         |          \      . . .
1696   //         |       CompareAndSwapX  . . .
1697   //         |       C /  M |
1698   //         |      CmpI    |
1699   //         |       /      |
1700   //         |      . .    /
1701   //     Bot |   IfTrue   /
1702   //         |   /       /
1703   //    MemBarVolatile  /
1704   //         | ...     /
1705   //      StoreCM ... /
1706   //         |       / 
1707   //       . . .  SCMemProj
1708   //      Raw \    / Bot
1709   //        MergeMem
1710   //           |
1711   //   MemBarCPUOrder
1712   //   MemBarAcquire
1713   //
1714   // The G1 graph for a volatile object put is a lot more complicated.
1715   // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1716   // which adds the old value to the SATB queue; the releasing store
1717   // itself; and, finally, a post-write graph which performs a card
1718   // mark.
1719   //
1720   // The pre-write graph may be omitted, but only when the put is
1721   // writing to a newly allocated (young gen) object and then only if
1722   // there is a direct memory chain to the Initialize node for the
1723   // object allocation. This will not happen for a volatile put since
1724   // any memory chain passes through the leading membar.
1725   //
1726   // The pre-write graph includes a series of 3 If tests. The outermost
1727   // If tests whether SATB is enabled (no else case). The next If tests
1728   // whether the old value is non-NULL (no else case). The third tests
1729   // whether the SATB queue index is > 0, if so updating the queue. The
1730   // else case for this third If calls out to the runtime to allocate a
1731   // new queue buffer.
1732   //
1733   // So with G1 the pre-write and releasing store subgraph looks like
1734   // this (the nested Ifs are omitted).
1735   //
1736   //  MemBarRelease (leading)____________
1737   //     C |  ||  M \   M \    M \  M \ . . .
1738   //       | LoadB   \  LoadL  LoadN   \
1739   //       | /        \                 \
1740   //       If         |\                 \
1741   //       | \        | \                 \
1742   //  IfFalse  IfTrue |  \                 \
1743   //       |     |    |   \                 |
1744   //       |     If   |   /\                |
1745   //       |     |          \               |
1746   //       |                 \              |
1747   //       |    . . .         \             |
1748   //       | /       | /       |            |
1749   //      Region  Phi[M]       |            |
1750   //       | \       |         |            |
1751   //       |  \_____ | ___     |            |
1752   //     C | C \     |   C \ M |            |
1753   //       | CastP2X | StoreN/P[mo_release] |
1754   //       |         |         |            |
1755   //     C |       M |       M |          M |
1756   //        \        | Raw     | oop       / Bot
1757   //                  . . .
1758   //          (post write subtree elided)
1759   //                    . . .
1760   //             C \         M /
1761   //         MemBarVolatile (trailing)
1762   //
1763   // Note that the three memory feeds into the post-write tree are an
1764   // AliasRawIdx slice associated with the writes in the pre-write
1765   // tree, an oop type slice from the StoreX specific to the type of
1766   // the volatile field and the AliasBotIdx slice emanating from the
1767   // leading membar.
1768   //
1769   // n.b. the LoadB in this subgraph is not the card read -- it's a
1770   // read of the SATB queue active flag.
1771   //
1772   // The CAS graph is once again a variant of the above with a
1773   // CompareAndSwapX node and SCMemProj in place of the StoreX.  The
1774   // value from the CompareAndSwapX node is fed into the post-write
1775   // graph aling with the AliasIdxRaw feed from the pre-barrier and
1776   // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1777   //
1778   //  MemBarRelease (leading)____________
1779   //     C |  ||  M \   M \    M \  M \ . . .
1780   //       | LoadB   \  LoadL  LoadN   \
1781   //       | /        \                 \
1782   //       If         |\                 \
1783   //       | \        | \                 \
1784   //  IfFalse  IfTrue |  \                 \
1785   //       |     |    |   \                 \
1786   //       |     If   |    \                 |
1787   //       |     |          \                |
1788   //       |                 \               |
1789   //       |    . . .         \              |
1790   //       | /       | /       \             |
1791   //      Region  Phi[M]        \            |
1792   //       | \       |           \           |
1793   //       |  \_____ |            |          |
1794   //     C | C \     |            |          |
1795   //       | CastP2X |     CompareAndSwapX   |
1796   //       |         |   res |     |         |
1797   //     C |       M |       |  SCMemProj  M |
1798   //        \        | Raw   |     | Bot    / Bot
1799   //                  . . .
1800   //          (post write subtree elided)
1801   //                    . . .
1802   //             C \         M /
1803   //         MemBarVolatile (trailing)
1804   //
1805   // The G1 post-write subtree is also optional, this time when the
1806   // new value being written is either null or can be identified as a
1807   // newly allocated (young gen) object with no intervening control
1808   // flow. The latter cannot happen but the former may, in which case
1809   // the card mark membar is omitted and the memory feeds from the
1810   // leading membar and the SToreN/P are merged direct into the
1811   // trailing membar as per the normal subgraph. So, the only special
1812   // case which arises is when the post-write subgraph is generated.
1813   //
1814   // The kernel of the post-write G1 subgraph is the card mark itself
1815   // which includes a card mark memory barrier (MemBarVolatile), a
1816   // card test (LoadB), and a conditional update (If feeding a
1817   // StoreCM). These nodes are surrounded by a series of nested Ifs
1818   // which try to avoid doing the card mark. The top level If skips if
1819   // the object reference does not cross regions (i.e. it tests if
1820   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1821   // need not be recorded. The next If, which skips on a NULL value,
1822   // may be absent (it is not generated if the type of value is >=
1823   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1824   // checking if card_val != young).  n.b. although this test requires
1825   // a pre-read of the card it can safely be done before the StoreLoad
1826   // barrier. However that does not bypass the need to reread the card
1827   // after the barrier.
1828   //
1829   //                (pre-write subtree elided)
1830   //        . . .                  . . .    . . .  . . .
1831   //        C |               M |    M |    M |
1832   //       Region            Phi[M] StoreN    |
1833   //          |            Raw  |  oop |  Bot |
1834   //         / \_______         |\     |\     |\
1835   //      C / C \      . . .    | \    | \    | \
1836   //       If   CastP2X . . .   |  \   |  \   |  \
1837   //       / \                  |   \  |   \  |   \
1838   //      /   \                 |    \ |    \ |    \
1839   // IfFalse IfTrue             |      |      |     \
1840   //   |       |                 \     |     /       |
1841   //   |       If                 \    | \  /   \    |
1842   //   |      / \                  \   |   /     \   |
1843   //   |     /   \                  \  |  / \     |  |
1844   //   | IfFalse IfTrue           MergeMem   \    |  |
1845   //   |  . . .    / \                 |      \   |  |
1846   //   |          /   \                |       |  |  |
1847   //   |     IfFalse IfTrue            |       |  |  |
1848   //   |      . . .    |               |       |  |  |
1849   //   |               If             /        |  |  |
1850   //   |               / \           /         |  |  |
1851   //   |              /   \         /          |  |  |
1852   //   |         IfFalse IfTrue    /           |  |  |
1853   //   |           . . .   |      /            |  |  |
1854   //   |                    \    /             |  |  |
1855   //   |                     \  /              |  |  |
1856   //   |         MemBarVolatile__(card mark  ) |  |  |
1857   //   |              ||   C |     \           |  |  |
1858   //   |             LoadB   If     |         /   |  |
1859   //   |                    / \ Raw |        /   /  /
1860   //   |                   . . .    |       /   /  /
1861   //   |                        \   |      /   /  /
1862   //   |                        StoreCM   /   /  /
1863   //   |                           |     /   /  /
1864   //   |                            . . .   /  /
1865   //   |                                   /  /
1866   //   |   . . .                          /  /
1867   //   |    |             | /            /  /
1868   //   |    |           Phi[M] /        /  /
1869   //   |    |             |   /        /  /
1870   //   |    |             |  /        /  /
1871   //   |  Region  . . .  Phi[M]      /  /
1872   //   |    |             |         /  /
1873   //    \   |             |        /  /
1874   //     \  | . . .       |       /  /
1875   //      \ |             |      /  /
1876   //      Region         Phi[M] /  /
1877   //        |               \  /  /
1878   //         \             MergeMem
1879   //          \            /
1880   //          MemBarVolatile
1881   //
1882   // As with CMS + CondCardMark the first MergeMem merges the
1883   // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1884   // slice from the Store into the card mark membar. However, in this
1885   // case it may also merge an AliasRawIdx mem slice from the pre
1886   // barrier write.
1887   //
1888   // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1889   // leading membar with an oop slice from the StoreN and an
1890   // AliasRawIdx slice from the post barrier writes. In this case the
1891   // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1892   // which combine feeds from the If regions in the post barrier
1893   // subgraph.
1894   //
1895   // So, for G1 the same characteristic subgraph arises as for CMS +
1896   // CondCardMark. There is a normal subgraph feeding the card mark
1897   // membar and a normal subgraph feeding the trailing membar.
1898   //
1899   // The CAS graph when using G1GC also includes an optional
1900   // post-write subgraph. It is very similar to the above graph except
1901   // for a few details.
1902   // 
1903   // - The control flow is gated by an additonal If which tests the
1904   // result from the CompareAndSwapX node
1905   // 
1906   //  - The MergeMem which feeds the card mark membar only merges the
1907   // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1908   // slice from the pre-barrier. It does not merge the SCMemProj
1909   // AliasIdxBot slice. So, this subgraph does not look like the
1910   // normal CAS subgraph.
1911   //
1912   // - The MergeMem which feeds the trailing membar merges the
1913   // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1914   // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1915   // has two AliasIdxBot input slices. However, this subgraph does
1916   // still look like the normal CAS subgraph.
1917   //
1918   // So, the upshot is:
1919   //
1920   // In all cases a volatile put graph will include a *normal*
1921   // volatile store subgraph betwen the leading membar and the
1922   // trailing membar. It may also include a normal volatile store
1923   // subgraph betwen the leading membar and the card mark membar.
1924   //
1925   // In all cases a CAS graph will contain a unique normal CAS graph
1926   // feeding the trailing membar.
1927   //
1928   // In all cases where there is a card mark membar (either as part of
1929   // a volatile object put or CAS) it will be fed by a MergeMem whose
1930   // AliasIdxBot slice feed will be a leading membar.
1931   //
1932   // The predicates controlling generation of instructions for store
1933   // and barrier nodes employ a few simple helper functions (described
1934   // below) which identify the presence or absence of all these
1935   // subgraph configurations and provide a means of traversing from
1936   // one node in the subgraph to another.
1937 
1938   // is_CAS(int opcode)
1939   //
1940   // return true if opcode is one of the possible CompareAndSwapX
1941   // values otherwise false.
1942 
1943   bool is_CAS(int opcode)
1944   {
1945     return (opcode == Op_CompareAndSwapI ||
1946             opcode == Op_CompareAndSwapL ||
1947             opcode == Op_CompareAndSwapN ||
1948             opcode == Op_CompareAndSwapP);
1949   }
1950 
1951   // leading_to_trailing
1952   //
1953   //graph traversal helper which detects the normal case Mem feed from
1954   // a release membar (or, optionally, its cpuorder child) to a
1955   // dependent volatile membar i.e. it ensures that one or other of
1956   // the following Mem flow subgraph is present.
1957   //
1958   //   MemBarRelease {leading}
1959   //   {MemBarCPUOrder} {optional}
1960   //     Bot |  \      . . .
1961   //         |  StoreN/P[mo_release]  . . .
1962   //         |   /
1963   //        MergeMem
1964   //         |
1965   //   MemBarVolatile {not card mark}
1966   //
1967   //   MemBarRelease {leading}
1968   //   {MemBarCPUOrder} {optional}
1969   //      |       \      . . .
1970   //      |     CompareAndSwapX  . . .
1971   //               |
1972   //     . . .    SCMemProj
1973   //           \   |
1974   //      |    MergeMem
1975   //      |       /
1976   //    MemBarCPUOrder
1977   //    MemBarAcquire {trailing}
1978   //
1979   // the predicate needs to be capable of distinguishing the following
1980   // volatile put graph which may arises when a GC post barrier
1981   // inserts a card mark membar
1982   //
1983   //   MemBarRelease {leading}
1984   //   {MemBarCPUOrder}__
1985   //     Bot |   \       \
1986   //         |   StoreN/P \
1987   //         |    / \     |
1988   //        MergeMem \    |
1989   //         |        \   |
1990   //   MemBarVolatile  \  |
1991   //    {card mark}     \ |
1992   //                  MergeMem
1993   //                      |
1994   // {not card mark} MemBarVolatile
1995   //
1996   // if the correct configuration is present returns the trailing
1997   // membar otherwise NULL.
1998   //
1999   // the input membar is expected to be either a cpuorder membar or a
2000   // release membar. in the latter case it should not have a cpu membar
2001   // child.
2002   //
2003   // the returned value may be a card mark or trailing membar
2004   //
2005 
2006   MemBarNode *leading_to_trailing(MemBarNode *leading)
2007   {
2008     assert((leading->Opcode() == Op_MemBarRelease ||
2009             leading->Opcode() == Op_MemBarCPUOrder),
2010            "expecting a volatile or cpuroder membar!");
2011 
2012     // check the mem flow
2013     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2014 
2015     if (!mem) {
2016       return NULL;
2017     }
2018 
2019     Node *x = NULL;
2020     StoreNode * st = NULL;
2021     LoadStoreNode *cas = NULL;
2022     MergeMemNode *mm = NULL;
2023     MergeMemNode *mm2 = NULL;
2024 
2025     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2026       x = mem->fast_out(i);
2027       if (x->is_MergeMem()) {
2028         if (mm != NULL) {
2029           if (mm2 != NULL) {
2030           // should not see more than 2 merge mems
2031             return NULL;
2032           } else {
2033             mm2 = x->as_MergeMem();
2034           }
2035         } else {
2036           mm = x->as_MergeMem();
2037         }
2038       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2039         // two releasing stores/CAS nodes is one too many
2040         if (st != NULL || cas != NULL) {
2041           return NULL;
2042         }
2043         st = x->as_Store();
2044       } else if (is_CAS(x->Opcode())) {
2045         if (st != NULL || cas != NULL) {
2046           return NULL;
2047         }
2048         cas = x->as_LoadStore();
2049       }
2050     }
2051 
2052     // must have a store or a cas
2053     if (!st && !cas) {
2054       return NULL;
2055     }
2056 
2057     // must have at least one merge if we also have st
2058     if (st && !mm) {
2059       return NULL;
2060     }
2061 
2062     if (cas) {
2063       Node *y = NULL;
2064       // look for an SCMemProj
2065       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2066         x = cas->fast_out(i);
2067         if (x->is_Proj()) {
2068           y = x;
2069           break;
2070         }
2071       }
2072       if (y == NULL) {
2073         return NULL;
2074       }
2075       // the proj must feed a MergeMem
2076       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2077         x = y->fast_out(i);
2078         if (x->is_MergeMem()) {
2079           mm = x->as_MergeMem();
2080           break;
2081         }
2082       }
2083       if (mm == NULL) {
2084         return NULL;
2085       }
2086       MemBarNode *mbar = NULL;
2087       // ensure the merge feeds a trailing membar cpuorder + acquire pair
2088       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2089         x = mm->fast_out(i);
2090         if (x->is_MemBar()) {
2091           int opcode = x->Opcode();
2092           if (opcode == Op_MemBarCPUOrder) {
2093             MemBarNode *z =  x->as_MemBar();
2094             z = child_membar(z);
2095             if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2096               mbar = z;
2097             }
2098           }
2099           break;
2100         }
2101       }
2102       return mbar;
2103     } else {
2104       Node *y = NULL;
2105       // ensure the store feeds the first mergemem;
2106       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2107         if (st->fast_out(i) == mm) {
2108           y = st;
2109           break;
2110         }
2111       }
2112       if (y == NULL) {
2113         return NULL;
2114       }
2115       if (mm2 != NULL) {
2116         // ensure the store feeds the second mergemem;
2117         y = NULL;
2118         for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2119           if (st->fast_out(i) == mm2) {
2120             y = st;
2121           }
2122         }
2123         if (y == NULL) {
2124           return NULL;
2125         }
2126       }
2127 
2128       MemBarNode *mbar = NULL;
2129       // ensure the first mergemem feeds a volatile membar
2130       for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2131         x = mm->fast_out(i);
2132         if (x->is_MemBar()) {
2133           int opcode = x->Opcode();
2134           if (opcode == Op_MemBarVolatile) {
2135             mbar = x->as_MemBar();
2136           }
2137           break;
2138         }
2139       }
2140       if (mm2 == NULL) {
2141         // this is our only option for a trailing membar
2142         return mbar;
2143       }
2144       // ensure the second mergemem feeds a volatile membar
2145       MemBarNode *mbar2 = NULL;
2146       for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2147         x = mm2->fast_out(i);
2148         if (x->is_MemBar()) {
2149           int opcode = x->Opcode();
2150           if (opcode == Op_MemBarVolatile) {
2151             mbar2 = x->as_MemBar();
2152           }
2153           break;
2154         }
2155       }
2156       // if we have two merge mems we must have two volatile membars
2157       if (mbar == NULL || mbar2 == NULL) {
2158         return NULL;
2159       }
2160       // return the trailing membar
2161       if (is_card_mark_membar(mbar2)) {
2162         return mbar;
2163       } else {
2164         if (is_card_mark_membar(mbar)) {
2165           return mbar2;
2166         } else {
2167           return NULL;
2168         }
2169       }
2170     }
2171   }
2172 
2173   // trailing_to_leading
2174   //
2175   // graph traversal helper which detects the normal case Mem feed
2176   // from a trailing membar to a preceding release membar (optionally
2177   // its cpuorder child) i.e. it ensures that one or other of the
2178   // following Mem flow subgraphs is present.
2179   //
2180   //   MemBarRelease {leading}
2181   //   MemBarCPUOrder {optional}
2182   //    | Bot |  \      . . .
2183   //    |     |  StoreN/P[mo_release]  . . .
2184   //    |     |   /
2185   //    |    MergeMem
2186   //    |     |
2187   //   MemBarVolatile {not card mark}
2188   //
2189   //   MemBarRelease {leading}
2190   //   MemBarCPUOrder {optional}
2191   //      |       \      . . .
2192   //      |     CompareAndSwapX  . . .
2193   //               |
2194   //     . . .    SCMemProj
2195   //           \   |
2196   //      |    MergeMem
2197   //      |       |
2198   //    MemBarCPUOrder
2199   //    MemBarAcquire {trailing}
2200   //
2201   // this predicate checks for the same flow as the previous predicate
2202   // but starting from the bottom rather than the top.
2203   //
2204   // if the configuration is present returns the cpuorder member for
2205   // preference or when absent the release membar otherwise NULL.
2206   //
2207   // n.b. the input membar is expected to be a MemBarVolatile or
2208   // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2209   // mark membar.
2210 
2211   MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2212   {
2213     // input must be a volatile membar
2214     assert((barrier->Opcode() == Op_MemBarVolatile ||
2215             barrier->Opcode() == Op_MemBarAcquire),
2216            "expecting a volatile or an acquire membar");
2217 
2218     assert((barrier->Opcode() != Op_MemBarVolatile) ||
2219            !is_card_mark_membar(barrier),
2220            "not expecting a card mark membar");
2221     Node *x;
2222     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2223 
2224     // if we have an acquire membar then it must be fed via a CPUOrder
2225     // membar
2226 
2227     if (is_cas) {
2228       // skip to parent barrier which must be a cpuorder
2229       x = parent_membar(barrier);
2230       if (x->Opcode() != Op_MemBarCPUOrder)
2231         return NULL;
2232     } else {
2233       // start from the supplied barrier
2234       x = (Node *)barrier;
2235     }
2236 
2237     // the Mem feed to the membar should be a merge
2238     x = x ->in(TypeFunc::Memory);
2239     if (!x->is_MergeMem())
2240       return NULL;
2241 
2242     MergeMemNode *mm = x->as_MergeMem();
2243 
2244     if (is_cas) {
2245       // the merge should be fed from the CAS via an SCMemProj node
2246       x = NULL;
2247       for (uint idx = 1; idx < mm->req(); idx++) {
2248         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2249           x = mm->in(idx);
2250           break;
2251         }
2252       }
2253       if (x == NULL) {
2254         return NULL;
2255       }
2256       // check for a CAS feeding this proj
2257       x = x->in(0);
2258       int opcode = x->Opcode();
2259       if (!is_CAS(opcode)) {
2260         return NULL;
2261       }
2262       // the CAS should get its mem feed from the leading membar
2263       x = x->in(MemNode::Memory);
2264     } else {
2265       // the merge should get its Bottom mem feed from the leading membar
2266       x = mm->in(Compile::AliasIdxBot);
2267     }
2268 
2269     // ensure this is a non control projection
2270     if (!x->is_Proj() || x->is_CFG()) {
2271       return NULL;
2272     }
2273     // if it is fed by a membar that's the one we want
2274     x = x->in(0);
2275 
2276     if (!x->is_MemBar()) {
2277       return NULL;
2278     }
2279 
2280     MemBarNode *leading = x->as_MemBar();
2281     // reject invalid candidates
2282     if (!leading_membar(leading)) {
2283       return NULL;
2284     }
2285 
2286     // ok, we have a leading membar, now for the sanity clauses
2287 
2288     // the leading membar must feed Mem to a releasing store or CAS
2289     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2290     StoreNode *st = NULL;
2291     LoadStoreNode *cas = NULL;
2292     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2293       x = mem->fast_out(i);
2294       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2295         // two stores or CASes is one too many
2296         if (st != NULL || cas != NULL) {
2297           return NULL;
2298         }
2299         st = x->as_Store();
2300       } else if (is_CAS(x->Opcode())) {
2301         if (st != NULL || cas != NULL) {
2302           return NULL;
2303         }
2304         cas = x->as_LoadStore();
2305       }
2306     }
2307 
2308     // we should not have both a store and a cas
2309     if (st == NULL & cas == NULL) {
2310       return NULL;
2311     }
2312 
2313     if (st == NULL) {
2314       // nothing more to check
2315       return leading;
2316     } else {
2317       // we should not have a store if we started from an acquire
2318       if (is_cas) {
2319         return NULL;
2320       }
2321 
2322       // the store should feed the merge we used to get here
2323       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2324         if (st->fast_out(i) == mm) {
2325           return leading;
2326         }
2327       }
2328     }
2329 
2330     return NULL;
2331   }
2332 
2333   // card_mark_to_leading
2334   //
2335   // graph traversal helper which traverses from a card mark volatile
2336   // membar to a leading membar i.e. it ensures that the following Mem
2337   // flow subgraph is present.
2338   //
2339   //    MemBarRelease {leading}
2340   //   {MemBarCPUOrder} {optional}
2341   //         |   . . .
2342   //     Bot |   /
2343   //      MergeMem
2344   //         |
2345   //     MemBarVolatile (card mark)
2346   //        |     \
2347   //      . . .   StoreCM
2348   //
2349   // if the configuration is present returns the cpuorder member for
2350   // preference or when absent the release membar otherwise NULL.
2351   //
2352   // n.b. the input membar is expected to be a MemBarVolatile amd must
2353   // be a card mark membar.
2354 
2355   MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2356   {
2357     // input must be a card mark volatile membar
2358     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2359 
2360     // the Mem feed to the membar should be a merge
2361     Node *x = barrier->in(TypeFunc::Memory);
2362     if (!x->is_MergeMem()) {
2363       return NULL;
2364     }
2365 
2366     MergeMemNode *mm = x->as_MergeMem();
2367 
2368     x = mm->in(Compile::AliasIdxBot);
2369 
2370     if (!x->is_MemBar()) {
2371       return NULL;
2372     }
2373 
2374     MemBarNode *leading = x->as_MemBar();
2375 
2376     if (leading_membar(leading)) {
2377       return leading;
2378     }
2379 
2380     return NULL;
2381   }
2382 
2383 bool unnecessary_acquire(const Node *barrier)
2384 {
2385   assert(barrier->is_MemBar(), "expecting a membar");
2386 
2387   if (UseBarriersForVolatile) {
2388     // we need to plant a dmb
2389     return false;
2390   }
2391 
2392   // a volatile read derived from bytecode (or also from an inlined
2393   // SHA field read via LibraryCallKit::load_field_from_object)
2394   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2395   // with a bogus read dependency on it's preceding load. so in those
2396   // cases we will find the load node at the PARMS offset of the
2397   // acquire membar.  n.b. there may be an intervening DecodeN node.
2398   //
2399   // a volatile load derived from an inlined unsafe field access
2400   // manifests as a cpuorder membar with Ctl and Mem projections
2401   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2402   // acquire then feeds another cpuorder membar via Ctl and Mem
2403   // projections. The load has no output dependency on these trailing
2404   // membars because subsequent nodes inserted into the graph take
2405   // their control feed from the final membar cpuorder meaning they
2406   // are all ordered after the load.
2407 
2408   Node *x = barrier->lookup(TypeFunc::Parms);
2409   if (x) {
2410     // we are starting from an acquire and it has a fake dependency
2411     //
2412     // need to check for
2413     //
2414     //   LoadX[mo_acquire]
2415     //   {  |1   }
2416     //   {DecodeN}
2417     //      |Parms
2418     //   MemBarAcquire*
2419     //
2420     // where * tags node we were passed
2421     // and |k means input k
2422     if (x->is_DecodeNarrowPtr()) {
2423       x = x->in(1);
2424     }
2425 
2426     return (x->is_Load() && x->as_Load()->is_acquire());
2427   }
2428 
2429   // now check for an unsafe volatile get
2430 
2431   // need to check for
2432   //
2433   //   MemBarCPUOrder
2434   //        ||       \\
2435   //   MemBarAcquire* LoadX[mo_acquire]
2436   //        ||
2437   //   MemBarCPUOrder
2438   //
2439   // where * tags node we were passed
2440   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2441 
2442   // check for a parent MemBarCPUOrder
2443   ProjNode *ctl;
2444   ProjNode *mem;
2445   MemBarNode *parent = parent_membar(barrier);
2446   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2447     return false;
2448   ctl = parent->proj_out(TypeFunc::Control);
2449   mem = parent->proj_out(TypeFunc::Memory);
2450   if (!ctl || !mem) {
2451     return false;
2452   }
2453   // ensure the proj nodes both feed a LoadX[mo_acquire]
2454   LoadNode *ld = NULL;
2455   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2456     x = ctl->fast_out(i);
2457     // if we see a load we keep hold of it and stop searching
2458     if (x->is_Load()) {
2459       ld = x->as_Load();
2460       break;
2461     }
2462   }
2463   // it must be an acquiring load
2464   if (ld && ld->is_acquire()) {
2465 
2466     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2467       x = mem->fast_out(i);
2468       // if we see the same load we drop it and stop searching
2469       if (x == ld) {
2470         ld = NULL;
2471         break;
2472       }
2473     }
2474     // we must have dropped the load
2475     if (ld == NULL) {
2476       // check for a child cpuorder membar
2477       MemBarNode *child  = child_membar(barrier->as_MemBar());
2478       if (child && child->Opcode() == Op_MemBarCPUOrder)
2479         return true;
2480     }
2481   }
2482 
2483   // final option for unnecessary mebar is that it is a trailing node
2484   // belonging to a CAS
2485 
2486   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2487 
2488   return leading != NULL;
2489 }
2490 
2491 bool needs_acquiring_load(const Node *n)
2492 {
2493   assert(n->is_Load(), "expecting a load");
2494   if (UseBarriersForVolatile) {
2495     // we use a normal load and a dmb
2496     return false;
2497   }
2498 
2499   LoadNode *ld = n->as_Load();
2500 
2501   if (!ld->is_acquire()) {
2502     return false;
2503   }
2504 
2505   // check if this load is feeding an acquire membar
2506   //
2507   //   LoadX[mo_acquire]
2508   //   {  |1   }
2509   //   {DecodeN}
2510   //      |Parms
2511   //   MemBarAcquire*
2512   //
2513   // where * tags node we were passed
2514   // and |k means input k
2515 
2516   Node *start = ld;
2517   Node *mbacq = NULL;
2518 
2519   // if we hit a DecodeNarrowPtr we reset the start node and restart
2520   // the search through the outputs
2521  restart:
2522 
2523   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2524     Node *x = start->fast_out(i);
2525     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2526       mbacq = x;
2527     } else if (!mbacq &&
2528                (x->is_DecodeNarrowPtr() ||
2529                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2530       start = x;
2531       goto restart;
2532     }
2533   }
2534 
2535   if (mbacq) {
2536     return true;
2537   }
2538 
2539   // now check for an unsafe volatile get
2540 
2541   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2542   //
2543   //     MemBarCPUOrder
2544   //        ||       \\
2545   //   MemBarAcquire* LoadX[mo_acquire]
2546   //        ||
2547   //   MemBarCPUOrder
2548 
2549   MemBarNode *membar;
2550 
2551   membar = parent_membar(ld);
2552 
2553   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2554     return false;
2555   }
2556 
2557   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2558 
2559   membar = child_membar(membar);
2560 
2561   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2562     return false;
2563   }
2564 
2565   membar = child_membar(membar);
2566 
2567   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2568     return false;
2569   }
2570 
2571   return true;
2572 }
2573 
2574 bool unnecessary_release(const Node *n)
2575 {
2576   assert((n->is_MemBar() &&
2577           n->Opcode() == Op_MemBarRelease),
2578          "expecting a release membar");
2579 
2580   if (UseBarriersForVolatile) {
2581     // we need to plant a dmb
2582     return false;
2583   }
2584 
2585   // if there is a dependent CPUOrder barrier then use that as the
2586   // leading
2587 
2588   MemBarNode *barrier = n->as_MemBar();
2589   // check for an intervening cpuorder membar
2590   MemBarNode *b = child_membar(barrier);
2591   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2592     // ok, so start the check from the dependent cpuorder barrier
2593     barrier = b;
2594   }
2595 
2596   // must start with a normal feed
2597   MemBarNode *trailing = leading_to_trailing(barrier);
2598 
2599   return (trailing != NULL);
2600 }
2601 
2602 bool unnecessary_volatile(const Node *n)
2603 {
2604   // assert n->is_MemBar();
2605   if (UseBarriersForVolatile) {
2606     // we need to plant a dmb
2607     return false;
2608   }
2609 
2610   MemBarNode *mbvol = n->as_MemBar();
2611 
2612   // first we check if this is part of a card mark. if so then we have
2613   // to generate a StoreLoad barrier
2614 
2615   if (is_card_mark_membar(mbvol)) {
2616       return false;
2617   }
2618 
2619   // ok, if it's not a card mark then we still need to check if it is
2620   // a trailing membar of a volatile put graph.
2621 
2622   return (trailing_to_leading(mbvol) != NULL);
2623 }
2624 
2625 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2626 
2627 bool needs_releasing_store(const Node *n)
2628 {
2629   // assert n->is_Store();
2630   if (UseBarriersForVolatile) {
2631     // we use a normal store and dmb combination
2632     return false;
2633   }
2634 
2635   StoreNode *st = n->as_Store();
2636 
2637   // the store must be marked as releasing
2638   if (!st->is_release()) {
2639     return false;
2640   }
2641 
2642   // the store must be fed by a membar
2643 
2644   Node *x = st->lookup(StoreNode::Memory);
2645 
2646   if (! x || !x->is_Proj()) {
2647     return false;
2648   }
2649 
2650   ProjNode *proj = x->as_Proj();
2651 
2652   x = proj->lookup(0);
2653 
2654   if (!x || !x->is_MemBar()) {
2655     return false;
2656   }
2657 
2658   MemBarNode *barrier = x->as_MemBar();
2659 
2660   // if the barrier is a release membar or a cpuorder mmebar fed by a
2661   // release membar then we need to check whether that forms part of a
2662   // volatile put graph.
2663 
2664   // reject invalid candidates
2665   if (!leading_membar(barrier)) {
2666     return false;
2667   }
2668 
2669   // does this lead a normal subgraph?
2670   MemBarNode *trailing = leading_to_trailing(barrier);
2671 
2672   return (trailing != NULL);
2673 }
2674 
2675 // predicate controlling translation of CAS
2676 //
2677 // returns true if CAS needs to use an acquiring load otherwise false
2678 
2679 bool needs_acquiring_load_exclusive(const Node *n)
2680 {
2681   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2682   if (UseBarriersForVolatile) {
2683     return false;
2684   }
2685 
2686   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2687 #ifdef ASSERT
2688   LoadStoreNode *st = n->as_LoadStore();
2689 
2690   // the store must be fed by a membar
2691 
2692   Node *x = st->lookup(StoreNode::Memory);
2693 
2694   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2695 
2696   ProjNode *proj = x->as_Proj();
2697 
2698   x = proj->lookup(0);
2699 
2700   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2701 
2702   MemBarNode *barrier = x->as_MemBar();
2703 
2704   // the barrier must be a cpuorder mmebar fed by a release membar
2705 
2706   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2707          "CAS not fed by cpuorder membar!");
2708 
2709   MemBarNode *b = parent_membar(barrier);
2710   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2711           "CAS not fed by cpuorder+release membar pair!");
2712 
2713   // does this lead a normal subgraph?
2714   MemBarNode *mbar = leading_to_trailing(barrier);
2715 
2716   assert(mbar != NULL, "CAS not embedded in normal graph!");
2717 
2718   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2719 #endif // ASSERT
2720   // so we can just return true here
2721   return true;
2722 }
2723 
2724 // predicate controlling translation of StoreCM
2725 //
2726 // returns true if a StoreStore must precede the card write otherwise
2727 // false
2728 
2729 bool unnecessary_storestore(const Node *storecm)
2730 {
2731   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2732 
2733   // we only ever need to generate a dmb ishst between an object put
2734   // and the associated card mark when we are using CMS without
2735   // conditional card marking. Any other occurence will happen when
2736   // performing a card mark using CMS with conditional card marking or
2737   // G1. In those cases the preceding MamBarVolatile will be
2738   // translated to a dmb ish which guarantes visibility of the
2739   // preceding StoreN/P before this StoreCM
2740 
2741   if (!UseConcMarkSweepGC || UseCondCardMark) {
2742     return true;
2743   }
2744 
2745   // if we are implementing volatile puts using barriers then we must
2746   // insert the dmb ishst
2747 
2748   if (UseBarriersForVolatile) {
2749     return false;
2750   }
2751 
2752   // we must be using CMS with conditional card marking so we ahve to
2753   // generate the StoreStore
2754 
2755   return false;
2756 }
2757 
2758 
2759 #define __ _masm.
2760 
2761 // advance declarations for helper functions to convert register
2762 // indices to register objects
2763 
2764 // the ad file has to provide implementations of certain methods
2765 // expected by the generic code
2766 //
2767 // REQUIRED FUNCTIONALITY
2768 
2769 //=============================================================================
2770 
2771 // !!!!! Special hack to get all types of calls to specify the byte offset
2772 //       from the start of the call to the point where the return address
2773 //       will point.
2774 
2775 int MachCallStaticJavaNode::ret_addr_offset()
2776 {
2777   // call should be a simple bl
2778   int off = 4;
2779   return off;
2780 }
2781 
2782 int MachCallDynamicJavaNode::ret_addr_offset()
2783 {
2784   return 16; // movz, movk, movk, bl
2785 }
2786 
2787 int MachCallRuntimeNode::ret_addr_offset() {
2788   // for generated stubs the call will be
2789   //   far_call(addr)
2790   // for real runtime callouts it will be six instructions
2791   // see aarch64_enc_java_to_runtime
2792   //   adr(rscratch2, retaddr)
2793   //   lea(rscratch1, RuntimeAddress(addr)
2794   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2795   //   blrt rscratch1
2796   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2797   if (cb) {
2798     return MacroAssembler::far_branch_size();
2799   } else {
2800     return 6 * NativeInstruction::instruction_size;
2801   }
2802 }
2803 
2804 // Indicate if the safepoint node needs the polling page as an input
2805 
2806 // the shared code plants the oop data at the start of the generated
2807 // code for the safepoint node and that needs ot be at the load
2808 // instruction itself. so we cannot plant a mov of the safepoint poll
2809 // address followed by a load. setting this to true means the mov is
2810 // scheduled as a prior instruction. that's better for scheduling
2811 // anyway.
2812 
2813 bool SafePointNode::needs_polling_address_input()
2814 {
2815   return true;
2816 }
2817 
2818 //=============================================================================
2819 
2820 #ifndef PRODUCT
2821 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2822   st->print("BREAKPOINT");
2823 }
2824 #endif
2825 
2826 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2827   MacroAssembler _masm(&cbuf);
2828   __ brk(0);
2829 }
2830 
2831 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2832   return MachNode::size(ra_);
2833 }
2834 
2835 //=============================================================================
2836 
2837 #ifndef PRODUCT
2838   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2839     st->print("nop \t# %d bytes pad for loops and calls", _count);
2840   }
2841 #endif
2842 
2843   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2844     MacroAssembler _masm(&cbuf);
2845     for (int i = 0; i < _count; i++) {
2846       __ nop();
2847     }
2848   }
2849 
2850   uint MachNopNode::size(PhaseRegAlloc*) const {
2851     return _count * NativeInstruction::instruction_size;
2852   }
2853 
2854 //=============================================================================
2855 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2856 
2857 int Compile::ConstantTable::calculate_table_base_offset() const {
2858   return 0;  // absolute addressing, no offset
2859 }
2860 
2861 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2862 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2863   ShouldNotReachHere();
2864 }
2865 
2866 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2867   // Empty encoding
2868 }
2869 
2870 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2871   return 0;
2872 }
2873 
2874 #ifndef PRODUCT
2875 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2876   st->print("-- \t// MachConstantBaseNode (empty encoding)");
2877 }
2878 #endif
2879 
2880 #ifndef PRODUCT
2881 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2882   Compile* C = ra_->C;
2883 
2884   int framesize = C->frame_slots() << LogBytesPerInt;
2885 
2886   if (C->need_stack_bang(framesize))
2887     st->print("# stack bang size=%d\n\t", framesize);
2888 
2889   if (framesize < ((1 << 9) + 2 * wordSize)) {
2890     st->print("sub  sp, sp, #%d\n\t", framesize);
2891     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
2892     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
2893   } else {
2894     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
2895     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
2896     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2897     st->print("sub  sp, sp, rscratch1");
2898   }
2899 }
2900 #endif
2901 
2902 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2903   Compile* C = ra_->C;
2904   MacroAssembler _masm(&cbuf);
2905 
2906   // n.b. frame size includes space for return pc and rfp
2907   const long framesize = C->frame_size_in_bytes();
2908   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2909 
2910   // insert a nop at the start of the prolog so we can patch in a
2911   // branch if we need to invalidate the method later
2912   __ nop();
2913 
2914   int bangsize = C->bang_size_in_bytes();
2915   if (C->need_stack_bang(bangsize) && UseStackBanging)
2916     __ generate_stack_overflow_check(bangsize);
2917 
2918   __ build_frame(framesize);
2919 
2920   if (NotifySimulator) {
2921     __ notify(Assembler::method_entry);
2922   }
2923 
2924   if (VerifyStackAtCalls) {
2925     Unimplemented();
2926   }
2927 
2928   C->set_frame_complete(cbuf.insts_size());
2929 
2930   if (C->has_mach_constant_base_node()) {
2931     // NOTE: We set the table base offset here because users might be
2932     // emitted before MachConstantBaseNode.
2933     Compile::ConstantTable& constant_table = C->constant_table();
2934     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2935   }
2936 }
2937 
2938 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2939 {
2940   return MachNode::size(ra_); // too many variables; just compute it
2941                               // the hard way
2942 }
2943 
2944 int MachPrologNode::reloc() const
2945 {
2946   return 0;
2947 }
2948 
2949 //=============================================================================
2950 
2951 #ifndef PRODUCT
2952 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2953   Compile* C = ra_->C;
2954   int framesize = C->frame_slots() << LogBytesPerInt;
2955 
2956   st->print("# pop frame %d\n\t",framesize);
2957 
2958   if (framesize == 0) {
2959     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2960   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2961     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2962     st->print("add  sp, sp, #%d\n\t", framesize);
2963   } else {
2964     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
2965     st->print("add  sp, sp, rscratch1\n\t");
2966     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2967   }
2968 
2969   if (do_polling() && C->is_method_compilation()) {
2970     st->print("# touch polling page\n\t");
2971     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2972     st->print("ldr zr, [rscratch1]");
2973   }
2974 }
2975 #endif
2976 
2977 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2978   Compile* C = ra_->C;
2979   MacroAssembler _masm(&cbuf);
2980   int framesize = C->frame_slots() << LogBytesPerInt;
2981 
2982   __ remove_frame(framesize);
2983 
2984   if (NotifySimulator) {
2985     __ notify(Assembler::method_reentry);
2986   }
2987 
2988   if (do_polling() && C->is_method_compilation()) {
2989     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
2990   }
2991 }
2992 
2993 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
2994   // Variable size. Determine dynamically.
2995   return MachNode::size(ra_);
2996 }
2997 
2998 int MachEpilogNode::reloc() const {
2999   // Return number of relocatable values contained in this instruction.
3000   return 1; // 1 for polling page.
3001 }
3002 
3003 const Pipeline * MachEpilogNode::pipeline() const {
3004   return MachNode::pipeline_class();
3005 }
3006 
3007 // This method seems to be obsolete. It is declared in machnode.hpp
3008 // and defined in all *.ad files, but it is never called. Should we
3009 // get rid of it?
3010 int MachEpilogNode::safepoint_offset() const {
3011   assert(do_polling(), "no return for this epilog node");
3012   return 4;
3013 }
3014 
3015 //=============================================================================
3016 
3017 // Figure out which register class each belongs in: rc_int, rc_float or
3018 // rc_stack.
3019 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3020 
3021 static enum RC rc_class(OptoReg::Name reg) {
3022 
3023   if (reg == OptoReg::Bad) {
3024     return rc_bad;
3025   }
3026 
3027   // we have 30 int registers * 2 halves
3028   // (rscratch1 and rscratch2 are omitted)
3029 
3030   if (reg < 60) {
3031     return rc_int;
3032   }
3033 
3034   // we have 32 float register * 2 halves
3035   if (reg < 60 + 128) {
3036     return rc_float;
3037   }
3038 
3039   // Between float regs & stack is the flags regs.
3040   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3041 
3042   return rc_stack;
3043 }
3044 
3045 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3046   Compile* C = ra_->C;
3047 
3048   // Get registers to move.
3049   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3050   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3051   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3052   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3053 
3054   enum RC src_hi_rc = rc_class(src_hi);
3055   enum RC src_lo_rc = rc_class(src_lo);
3056   enum RC dst_hi_rc = rc_class(dst_hi);
3057   enum RC dst_lo_rc = rc_class(dst_lo);
3058 
3059   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3060 
3061   if (src_hi != OptoReg::Bad) {
3062     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3063            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3064            "expected aligned-adjacent pairs");
3065   }
3066 
3067   if (src_lo == dst_lo && src_hi == dst_hi) {
3068     return 0;            // Self copy, no move.
3069   }
3070 
3071   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3072               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3073   int src_offset = ra_->reg2offset(src_lo);
3074   int dst_offset = ra_->reg2offset(dst_lo);
3075 
3076   if (bottom_type()->isa_vect() != NULL) {
3077     uint ireg = ideal_reg();
3078     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3079     if (cbuf) {
3080       MacroAssembler _masm(cbuf);
3081       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3082       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3083         // stack->stack
3084         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3085         if (ireg == Op_VecD) {
3086           __ unspill(rscratch1, true, src_offset);
3087           __ spill(rscratch1, true, dst_offset);
3088         } else {
3089           __ spill_copy128(src_offset, dst_offset);
3090         }
3091       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3092         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3093                ireg == Op_VecD ? __ T8B : __ T16B,
3094                as_FloatRegister(Matcher::_regEncode[src_lo]));
3095       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3096         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3097                        ireg == Op_VecD ? __ D : __ Q,
3098                        ra_->reg2offset(dst_lo));
3099       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3100         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3101                        ireg == Op_VecD ? __ D : __ Q,
3102                        ra_->reg2offset(src_lo));
3103       } else {
3104         ShouldNotReachHere();
3105       }
3106     }
3107   } else if (cbuf) {
3108     MacroAssembler _masm(cbuf);
3109     switch (src_lo_rc) {
3110     case rc_int:
3111       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3112         if (is64) {
3113             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3114                    as_Register(Matcher::_regEncode[src_lo]));
3115         } else {
3116             MacroAssembler _masm(cbuf);
3117             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3118                     as_Register(Matcher::_regEncode[src_lo]));
3119         }
3120       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3121         if (is64) {
3122             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3123                      as_Register(Matcher::_regEncode[src_lo]));
3124         } else {
3125             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3126                      as_Register(Matcher::_regEncode[src_lo]));
3127         }
3128       } else {                    // gpr --> stack spill
3129         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3130         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3131       }
3132       break;
3133     case rc_float:
3134       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3135         if (is64) {
3136             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3137                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3138         } else {
3139             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3140                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3141         }
3142       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3143           if (cbuf) {
3144             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3145                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3146         } else {
3147             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3148                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3149         }
3150       } else {                    // fpr --> stack spill
3151         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3152         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3153                  is64 ? __ D : __ S, dst_offset);
3154       }
3155       break;
3156     case rc_stack:
3157       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3158         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3159       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3160         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3161                    is64 ? __ D : __ S, src_offset);
3162       } else {                    // stack --> stack copy
3163         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3164         __ unspill(rscratch1, is64, src_offset);
3165         __ spill(rscratch1, is64, dst_offset);
3166       }
3167       break;
3168     default:
3169       assert(false, "bad rc_class for spill");
3170       ShouldNotReachHere();
3171     }
3172   }
3173 
3174   if (st) {
3175     st->print("spill ");
3176     if (src_lo_rc == rc_stack) {
3177       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3178     } else {
3179       st->print("%s -> ", Matcher::regName[src_lo]);
3180     }
3181     if (dst_lo_rc == rc_stack) {
3182       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3183     } else {
3184       st->print("%s", Matcher::regName[dst_lo]);
3185     }
3186     if (bottom_type()->isa_vect() != NULL) {
3187       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3188     } else {
3189       st->print("\t# spill size = %d", is64 ? 64:32);
3190     }
3191   }
3192 
3193   return 0;
3194 
3195 }
3196 
3197 #ifndef PRODUCT
3198 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3199   if (!ra_)
3200     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3201   else
3202     implementation(NULL, ra_, false, st);
3203 }
3204 #endif
3205 
3206 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3207   implementation(&cbuf, ra_, false, NULL);
3208 }
3209 
3210 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3211   return MachNode::size(ra_);
3212 }
3213 
3214 //=============================================================================
3215 
3216 #ifndef PRODUCT
3217 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3218   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3219   int reg = ra_->get_reg_first(this);
3220   st->print("add %s, rsp, #%d]\t# box lock",
3221             Matcher::regName[reg], offset);
3222 }
3223 #endif
3224 
3225 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3226   MacroAssembler _masm(&cbuf);
3227 
3228   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3229   int reg    = ra_->get_encode(this);
3230 
3231   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3232     __ add(as_Register(reg), sp, offset);
3233   } else {
3234     ShouldNotReachHere();
3235   }
3236 }
3237 
3238 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3239   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3240   return 4;
3241 }
3242 
3243 //=============================================================================
3244 
3245 #ifndef PRODUCT
3246 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3247 {
3248   st->print_cr("# MachUEPNode");
3249   if (UseCompressedClassPointers) {
3250     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3251     if (Universe::narrow_klass_shift() != 0) {
3252       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3253     }
3254   } else {
3255    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3256   }
3257   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3258   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3259 }
3260 #endif
3261 
3262 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3263 {
3264   // This is the unverified entry point.
3265   MacroAssembler _masm(&cbuf);
3266 
3267   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3268   Label skip;
3269   // TODO
3270   // can we avoid this skip and still use a reloc?
3271   __ br(Assembler::EQ, skip);
3272   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3273   __ bind(skip);
3274 }
3275 
3276 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3277 {
3278   return MachNode::size(ra_);
3279 }
3280 
3281 // REQUIRED EMIT CODE
3282 
3283 //=============================================================================
3284 
3285 // Emit exception handler code.
3286 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3287 {
3288   // mov rscratch1 #exception_blob_entry_point
3289   // br rscratch1
3290   // Note that the code buffer's insts_mark is always relative to insts.
3291   // That's why we must use the macroassembler to generate a handler.
3292   MacroAssembler _masm(&cbuf);
3293   address base = __ start_a_stub(size_exception_handler());
3294   if (base == NULL) {
3295     ciEnv::current()->record_failure("CodeCache is full");
3296     return 0;  // CodeBuffer::expand failed
3297   }
3298   int offset = __ offset();
3299   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3300   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3301   __ end_a_stub();
3302   return offset;
3303 }
3304 
3305 // Emit deopt handler code.
3306 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3307 {
3308   // Note that the code buffer's insts_mark is always relative to insts.
3309   // That's why we must use the macroassembler to generate a handler.
3310   MacroAssembler _masm(&cbuf);
3311   address base = __ start_a_stub(size_deopt_handler());
3312   if (base == NULL) {
3313     ciEnv::current()->record_failure("CodeCache is full");
3314     return 0;  // CodeBuffer::expand failed
3315   }
3316   int offset = __ offset();
3317 
3318   __ adr(lr, __ pc());
3319   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3320 
3321   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3322   __ end_a_stub();
3323   return offset;
3324 }
3325 
3326 // REQUIRED MATCHER CODE
3327 
3328 //=============================================================================
3329 
3330 const bool Matcher::match_rule_supported(int opcode) {
3331 
3332   switch (opcode) {
3333   case Op_StrComp:
3334     if (CompactStrings)  return false;
3335     break;
3336   default:
3337     break;
3338   }
3339 
3340   if (!has_match_rule(opcode)) {
3341     return false;
3342   }
3343 
3344   return true;  // Per default match rules are supported.
3345 }
3346 
3347 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3348 
3349   // TODO
3350   // identify extra cases that we might want to provide match rules for
3351   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3352   bool ret_value = match_rule_supported(opcode);
3353   // Add rules here.
3354 
3355   return ret_value;  // Per default match rules are supported.
3356 }
3357 
3358 const bool Matcher::has_predicated_vectors(void) {
3359   return false;
3360 }
3361 
3362 const int Matcher::float_pressure(int default_pressure_threshold) {
3363   return default_pressure_threshold;
3364 }
3365 
3366 int Matcher::regnum_to_fpu_offset(int regnum)
3367 {
3368   Unimplemented();
3369   return 0;
3370 }
3371 
3372 // Is this branch offset short enough that a short branch can be used?
3373 //
3374 // NOTE: If the platform does not provide any short branch variants, then
3375 //       this method should return false for offset 0.
3376 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3377   // The passed offset is relative to address of the branch.
3378 
3379   return (-32768 <= offset && offset < 32768);
3380 }
3381 
3382 const bool Matcher::isSimpleConstant64(jlong value) {
3383   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3384   // Probably always true, even if a temp register is required.
3385   return true;
3386 }
3387 
3388 // true just means we have fast l2f conversion
3389 const bool Matcher::convL2FSupported(void) {
3390   return true;
3391 }
3392 
3393 // Vector width in bytes.
3394 const int Matcher::vector_width_in_bytes(BasicType bt) {
3395   int size = MIN2(16,(int)MaxVectorSize);
3396   // Minimum 2 values in vector
3397   if (size < 2*type2aelembytes(bt)) size = 0;
3398   // But never < 4
3399   if (size < 4) size = 0;
3400   return size;
3401 }
3402 
3403 // Limits on vector size (number of elements) loaded into vector.
3404 const int Matcher::max_vector_size(const BasicType bt) {
3405   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3406 }
3407 const int Matcher::min_vector_size(const BasicType bt) {
3408 //  For the moment limit the vector size to 8 bytes
3409     int size = 8 / type2aelembytes(bt);
3410     if (size < 2) size = 2;
3411     return size;
3412 }
3413 
3414 // Vector ideal reg.
3415 const Opcodes Matcher::vector_ideal_reg(int len) {
3416   switch(len) {
3417     case  8: return Opcodes::Op_VecD;
3418     case 16: return Opcodes::Op_VecX;
3419   }
3420   ShouldNotReachHere();
3421   return 0;
3422 }
3423 
3424 const Opcodes Matcher::vector_shift_count_ideal_reg(int size) {
3425   return Opcodes::Op_VecX;
3426 }
3427 
3428 // AES support not yet implemented
3429 const bool Matcher::pass_original_key_for_aes() {
3430   return false;
3431 }
3432 
3433 // x86 supports misaligned vectors store/load.
3434 const bool Matcher::misaligned_vectors_ok() {
3435   return !AlignVector; // can be changed by flag
3436 }
3437 
3438 // false => size gets scaled to BytesPerLong, ok.
3439 const bool Matcher::init_array_count_is_in_bytes = false;
3440 
3441 // Use conditional move (CMOVL)
3442 const int Matcher::long_cmove_cost() {
3443   // long cmoves are no more expensive than int cmoves
3444   return 0;
3445 }
3446 
3447 const int Matcher::float_cmove_cost() {
3448   // float cmoves are no more expensive than int cmoves
3449   return 0;
3450 }
3451 
3452 // Does the CPU require late expand (see block.cpp for description of late expand)?
3453 const bool Matcher::require_postalloc_expand = false;
3454 
3455 // Do we need to mask the count passed to shift instructions or does
3456 // the cpu only look at the lower 5/6 bits anyway?
3457 const bool Matcher::need_masked_shift_count = false;
3458 
3459 // This affects two different things:
3460 //  - how Decode nodes are matched
3461 //  - how ImplicitNullCheck opportunities are recognized
3462 // If true, the matcher will try to remove all Decodes and match them
3463 // (as operands) into nodes. NullChecks are not prepared to deal with
3464 // Decodes by final_graph_reshaping().
3465 // If false, final_graph_reshaping() forces the decode behind the Cmp
3466 // for a NullCheck. The matcher matches the Decode node into a register.
3467 // Implicit_null_check optimization moves the Decode along with the
3468 // memory operation back up before the NullCheck.
3469 bool Matcher::narrow_oop_use_complex_address() {
3470   return Universe::narrow_oop_shift() == 0;
3471 }
3472 
3473 bool Matcher::narrow_klass_use_complex_address() {
3474 // TODO
3475 // decide whether we need to set this to true
3476   return false;
3477 }
3478 
3479 // Is it better to copy float constants, or load them directly from
3480 // memory?  Intel can load a float constant from a direct address,
3481 // requiring no extra registers.  Most RISCs will have to materialize
3482 // an address into a register first, so they would do better to copy
3483 // the constant from stack.
3484 const bool Matcher::rematerialize_float_constants = false;
3485 
3486 // If CPU can load and store mis-aligned doubles directly then no
3487 // fixup is needed.  Else we split the double into 2 integer pieces
3488 // and move it piece-by-piece.  Only happens when passing doubles into
3489 // C code as the Java calling convention forces doubles to be aligned.
3490 const bool Matcher::misaligned_doubles_ok = true;
3491 
3492 // No-op on amd64
3493 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3494   Unimplemented();
3495 }
3496 
3497 // Advertise here if the CPU requires explicit rounding operations to
3498 // implement the UseStrictFP mode.
3499 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3500 
3501 // Are floats converted to double when stored to stack during
3502 // deoptimization?
3503 bool Matcher::float_in_double() { return true; }
3504 
3505 // Do ints take an entire long register or just half?
3506 // The relevant question is how the int is callee-saved:
3507 // the whole long is written but de-opt'ing will have to extract
3508 // the relevant 32 bits.
3509 const bool Matcher::int_in_long = true;
3510 
3511 // Return whether or not this register is ever used as an argument.
3512 // This function is used on startup to build the trampoline stubs in
3513 // generateOptoStub.  Registers not mentioned will be killed by the VM
3514 // call in the trampoline, and arguments in those registers not be
3515 // available to the callee.
3516 bool Matcher::can_be_java_arg(int reg)
3517 {
3518   return
3519     reg ==  R0_num || reg == R0_H_num ||
3520     reg ==  R1_num || reg == R1_H_num ||
3521     reg ==  R2_num || reg == R2_H_num ||
3522     reg ==  R3_num || reg == R3_H_num ||
3523     reg ==  R4_num || reg == R4_H_num ||
3524     reg ==  R5_num || reg == R5_H_num ||
3525     reg ==  R6_num || reg == R6_H_num ||
3526     reg ==  R7_num || reg == R7_H_num ||
3527     reg ==  V0_num || reg == V0_H_num ||
3528     reg ==  V1_num || reg == V1_H_num ||
3529     reg ==  V2_num || reg == V2_H_num ||
3530     reg ==  V3_num || reg == V3_H_num ||
3531     reg ==  V4_num || reg == V4_H_num ||
3532     reg ==  V5_num || reg == V5_H_num ||
3533     reg ==  V6_num || reg == V6_H_num ||
3534     reg ==  V7_num || reg == V7_H_num;
3535 }
3536 
3537 bool Matcher::is_spillable_arg(int reg)
3538 {
3539   return can_be_java_arg(reg);
3540 }
3541 
3542 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3543   return false;
3544 }
3545 
3546 RegMask Matcher::divI_proj_mask() {
3547   ShouldNotReachHere();
3548   return RegMask();
3549 }
3550 
3551 // Register for MODI projection of divmodI.
3552 RegMask Matcher::modI_proj_mask() {
3553   ShouldNotReachHere();
3554   return RegMask();
3555 }
3556 
3557 // Register for DIVL projection of divmodL.
3558 RegMask Matcher::divL_proj_mask() {
3559   ShouldNotReachHere();
3560   return RegMask();
3561 }
3562 
3563 // Register for MODL projection of divmodL.
3564 RegMask Matcher::modL_proj_mask() {
3565   ShouldNotReachHere();
3566   return RegMask();
3567 }
3568 
3569 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3570   return FP_REG_mask();
3571 }
3572 
3573 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3574   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3575     Node* u = addp->fast_out(i);
3576     if (u->is_Mem()) {
3577       int opsize = u->as_Mem()->memory_size();
3578       assert(opsize > 0, "unexpected memory operand size");
3579       if (u->as_Mem()->memory_size() != (1<<shift)) {
3580         return false;
3581       }
3582     }
3583   }
3584   return true;
3585 }
3586 
3587 const bool Matcher::convi2l_type_required = false;
3588 
3589 // Should the Matcher clone shifts on addressing modes, expecting them
3590 // to be subsumed into complex addressing expressions or compute them
3591 // into registers?
3592 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3593   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3594     return true;
3595   }
3596 
3597   Node *off = m->in(AddPNode::Offset);
3598   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3599       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3600       // Are there other uses besides address expressions?
3601       !is_visited(off)) {
3602     address_visited.set(off->_idx); // Flag as address_visited
3603     mstack.push(off->in(2), Visit);
3604     Node *conv = off->in(1);
3605     if (conv->Opcode() == Op_ConvI2L &&
3606         // Are there other uses besides address expressions?
3607         !is_visited(conv)) {
3608       address_visited.set(conv->_idx); // Flag as address_visited
3609       mstack.push(conv->in(1), Pre_Visit);
3610     } else {
3611       mstack.push(conv, Pre_Visit);
3612     }
3613     address_visited.test_set(m->_idx); // Flag as address_visited
3614     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3615     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3616     return true;
3617   } else if (off->Opcode() == Op_ConvI2L &&
3618              // Are there other uses besides address expressions?
3619              !is_visited(off)) {
3620     address_visited.test_set(m->_idx); // Flag as address_visited
3621     address_visited.set(off->_idx); // Flag as address_visited
3622     mstack.push(off->in(1), Pre_Visit);
3623     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3624     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3625     return true;
3626   }
3627   return false;
3628 }
3629 
3630 // Transform:
3631 // (AddP base (AddP base address (LShiftL index con)) offset)
3632 // into:
3633 // (AddP base (AddP base offset) (LShiftL index con))
3634 // to take full advantage of ARM's addressing modes
3635 void Compile::reshape_address(AddPNode* addp) {
3636   Node *addr = addp->in(AddPNode::Address);
3637   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3638     const AddPNode *addp2 = addr->as_AddP();
3639     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3640          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3641          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3642         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3643 
3644       // Any use that can't embed the address computation?
3645       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3646         Node* u = addp->fast_out(i);
3647         if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3648           return;
3649         }
3650       }
3651       
3652       Node* off = addp->in(AddPNode::Offset);
3653       Node* addr2 = addp2->in(AddPNode::Address);
3654       Node* base = addp->in(AddPNode::Base);
3655       
3656       Node* new_addr = NULL;
3657       // Check whether the graph already has the new AddP we need
3658       // before we create one (no GVN available here).
3659       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3660         Node* u = addr2->fast_out(i);
3661         if (u->is_AddP() &&
3662             u->in(AddPNode::Base) == base &&
3663             u->in(AddPNode::Address) == addr2 &&
3664             u->in(AddPNode::Offset) == off) {
3665           new_addr = u;
3666           break;
3667         }
3668       }
3669       
3670       if (new_addr == NULL) {
3671         new_addr = new AddPNode(base, addr2, off);
3672       }
3673       Node* new_off = addp2->in(AddPNode::Offset);
3674       addp->set_req(AddPNode::Address, new_addr);
3675       if (addr->outcnt() == 0) {
3676         addr->disconnect_inputs(NULL, this);
3677       }
3678       addp->set_req(AddPNode::Offset, new_off);
3679       if (off->outcnt() == 0) {
3680         off->disconnect_inputs(NULL, this);
3681       }
3682     }
3683   }
3684 }
3685 
3686 // helper for encoding java_to_runtime calls on sim
3687 //
3688 // this is needed to compute the extra arguments required when
3689 // planting a call to the simulator blrt instruction. the TypeFunc
3690 // can be queried to identify the counts for integral, and floating
3691 // arguments and the return type
3692 
3693 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3694 {
3695   int gps = 0;
3696   int fps = 0;
3697   const TypeTuple *domain = tf->domain();
3698   int max = domain->cnt();
3699   for (int i = TypeFunc::Parms; i < max; i++) {
3700     const Type *t = domain->field_at(i);
3701     switch(t->basic_type()) {
3702     case T_FLOAT:
3703     case T_DOUBLE:
3704       fps++;
3705     default:
3706       gps++;
3707     }
3708   }
3709   gpcnt = gps;
3710   fpcnt = fps;
3711   BasicType rt = tf->return_type();
3712   switch (rt) {
3713   case T_VOID:
3714     rtype = MacroAssembler::ret_type_void;
3715     break;
3716   default:
3717     rtype = MacroAssembler::ret_type_integral;
3718     break;
3719   case T_FLOAT:
3720     rtype = MacroAssembler::ret_type_float;
3721     break;
3722   case T_DOUBLE:
3723     rtype = MacroAssembler::ret_type_double;
3724     break;
3725   }
3726 }
3727 
3728 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
3729   MacroAssembler _masm(&cbuf);                                          \
3730   {                                                                     \
3731     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3732     guarantee(DISP == 0, "mode not permitted for volatile");            \
3733     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3734     __ INSN(REG, as_Register(BASE));                                    \
3735   }
3736 
3737 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3738 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3739 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3740                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3741 
3742   // Used for all non-volatile memory accesses.  The use of
3743   // $mem->opcode() to discover whether this pattern uses sign-extended
3744   // offsets is something of a kludge.
3745   static void loadStore(MacroAssembler masm, mem_insn insn,
3746                          Register reg, int opcode,
3747                          Register base, int index, int size, int disp)
3748   {
3749     Address::extend scale;
3750 
3751     // Hooboy, this is fugly.  We need a way to communicate to the
3752     // encoder that the index needs to be sign extended, so we have to
3753     // enumerate all the cases.
3754     switch (opcode) {
3755     case INDINDEXSCALEDI2L:
3756     case INDINDEXSCALEDI2LN:
3757     case INDINDEXI2L:
3758     case INDINDEXI2LN:
3759       scale = Address::sxtw(size);
3760       break;
3761     default:
3762       scale = Address::lsl(size);
3763     }
3764 
3765     if (index == -1) {
3766       (masm.*insn)(reg, Address(base, disp));
3767     } else {
3768       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3769       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3770     }
3771   }
3772 
3773   static void loadStore(MacroAssembler masm, mem_float_insn insn,
3774                          FloatRegister reg, int opcode,
3775                          Register base, int index, int size, int disp)
3776   {
3777     Address::extend scale;
3778 
3779     switch (opcode) {
3780     case INDINDEXSCALEDI2L:
3781     case INDINDEXSCALEDI2LN:
3782       scale = Address::sxtw(size);
3783       break;
3784     default:
3785       scale = Address::lsl(size);
3786     }
3787 
3788      if (index == -1) {
3789       (masm.*insn)(reg, Address(base, disp));
3790     } else {
3791       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3792       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3793     }
3794   }
3795 
3796   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3797                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3798                          int opcode, Register base, int index, int size, int disp)
3799   {
3800     if (index == -1) {
3801       (masm.*insn)(reg, T, Address(base, disp));
3802     } else {
3803       assert(disp == 0, "unsupported address mode");
3804       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3805     }
3806   }
3807 
3808 %}
3809 
3810 
3811 
3812 //----------ENCODING BLOCK-----------------------------------------------------
3813 // This block specifies the encoding classes used by the compiler to
3814 // output byte streams.  Encoding classes are parameterized macros
3815 // used by Machine Instruction Nodes in order to generate the bit
3816 // encoding of the instruction.  Operands specify their base encoding
3817 // interface with the interface keyword.  There are currently
3818 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3819 // COND_INTER.  REG_INTER causes an operand to generate a function
3820 // which returns its register number when queried.  CONST_INTER causes
3821 // an operand to generate a function which returns the value of the
3822 // constant when queried.  MEMORY_INTER causes an operand to generate
3823 // four functions which return the Base Register, the Index Register,
3824 // the Scale Value, and the Offset Value of the operand when queried.
3825 // COND_INTER causes an operand to generate six functions which return
3826 // the encoding code (ie - encoding bits for the instruction)
3827 // associated with each basic boolean condition for a conditional
3828 // instruction.
3829 //
3830 // Instructions specify two basic values for encoding.  Again, a
3831 // function is available to check if the constant displacement is an
3832 // oop. They use the ins_encode keyword to specify their encoding
3833 // classes (which must be a sequence of enc_class names, and their
3834 // parameters, specified in the encoding block), and they use the
3835 // opcode keyword to specify, in order, their primary, secondary, and
3836 // tertiary opcode.  Only the opcode sections which a particular
3837 // instruction needs for encoding need to be specified.
3838 encode %{
3839   // Build emit functions for each basic byte or larger field in the
3840   // intel encoding scheme (opcode, rm, sib, immediate), and call them
3841   // from C++ code in the enc_class source block.  Emit functions will
3842   // live in the main source block for now.  In future, we can
3843   // generalize this by adding a syntax that specifies the sizes of
3844   // fields in an order, so that the adlc can build the emit functions
3845   // automagically
3846 
3847   // catch all for unimplemented encodings
3848   enc_class enc_unimplemented %{
3849     MacroAssembler _masm(&cbuf);
3850     __ unimplemented("C2 catch all");
3851   %}
3852 
3853   // BEGIN Non-volatile memory access
3854 
3855   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3856     Register dst_reg = as_Register($dst$$reg);
3857     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3858                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3859   %}
3860 
3861   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3862     Register dst_reg = as_Register($dst$$reg);
3863     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3864                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3865   %}
3866 
3867   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3868     Register dst_reg = as_Register($dst$$reg);
3869     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3870                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3871   %}
3872 
3873   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3874     Register dst_reg = as_Register($dst$$reg);
3875     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3876                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3877   %}
3878 
3879   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3880     Register dst_reg = as_Register($dst$$reg);
3881     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3882                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3883   %}
3884 
3885   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3886     Register dst_reg = as_Register($dst$$reg);
3887     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3888                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3889   %}
3890 
3891   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3892     Register dst_reg = as_Register($dst$$reg);
3893     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3894                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3895   %}
3896 
3897   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3898     Register dst_reg = as_Register($dst$$reg);
3899     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3900                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3901   %}
3902 
3903   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3904     Register dst_reg = as_Register($dst$$reg);
3905     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3906                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3907   %}
3908 
3909   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3910     Register dst_reg = as_Register($dst$$reg);
3911     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3912                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3913   %}
3914 
3915   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3916     Register dst_reg = as_Register($dst$$reg);
3917     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3918                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3919   %}
3920 
3921   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3922     Register dst_reg = as_Register($dst$$reg);
3923     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3924                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3925   %}
3926 
3927   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3928     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3929     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3930                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3931   %}
3932 
3933   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3934     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3935     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3936                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3937   %}
3938 
3939   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3940     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3941     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3942        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3943   %}
3944 
3945   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3946     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3947     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3948        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3949   %}
3950 
3951   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3952     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3953     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3954        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3955   %}
3956 
3957   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3958     Register src_reg = as_Register($src$$reg);
3959     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3960                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3961   %}
3962 
3963   enc_class aarch64_enc_strb0(memory mem) %{
3964     MacroAssembler _masm(&cbuf);
3965     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3966                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3967   %}
3968 
3969   enc_class aarch64_enc_strb0_ordered(memory mem) %{
3970     MacroAssembler _masm(&cbuf);
3971     __ membar(Assembler::StoreStore);
3972     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3973                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3974   %}
3975 
3976   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
3977     Register src_reg = as_Register($src$$reg);
3978     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
3979                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3980   %}
3981 
3982   enc_class aarch64_enc_strh0(memory mem) %{
3983     MacroAssembler _masm(&cbuf);
3984     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
3985                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3986   %}
3987 
3988   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
3989     Register src_reg = as_Register($src$$reg);
3990     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
3991                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3992   %}
3993 
3994   enc_class aarch64_enc_strw0(memory mem) %{
3995     MacroAssembler _masm(&cbuf);
3996     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
3997                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3998   %}
3999 
4000   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4001     Register src_reg = as_Register($src$$reg);
4002     // we sometimes get asked to store the stack pointer into the
4003     // current thread -- we cannot do that directly on AArch64
4004     if (src_reg == r31_sp) {
4005       MacroAssembler _masm(&cbuf);
4006       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4007       __ mov(rscratch2, sp);
4008       src_reg = rscratch2;
4009     }
4010     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4011                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4012   %}
4013 
4014   enc_class aarch64_enc_str0(memory mem) %{
4015     MacroAssembler _masm(&cbuf);
4016     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4017                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4018   %}
4019 
4020   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4021     FloatRegister src_reg = as_FloatRegister($src$$reg);
4022     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4023                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4024   %}
4025 
4026   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4027     FloatRegister src_reg = as_FloatRegister($src$$reg);
4028     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4029                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4030   %}
4031 
4032   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4033     FloatRegister src_reg = as_FloatRegister($src$$reg);
4034     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4035        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4036   %}
4037 
4038   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4039     FloatRegister src_reg = as_FloatRegister($src$$reg);
4040     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4041        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4042   %}
4043 
4044   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4045     FloatRegister src_reg = as_FloatRegister($src$$reg);
4046     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4047        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4048   %}
4049 
4050   // END Non-volatile memory access
4051 
4052   // volatile loads and stores
4053 
4054   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4055     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4056                  rscratch1, stlrb);
4057   %}
4058 
4059   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4060     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4061                  rscratch1, stlrh);
4062   %}
4063 
4064   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4065     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4066                  rscratch1, stlrw);
4067   %}
4068 
4069 
4070   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4071     Register dst_reg = as_Register($dst$$reg);
4072     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4073              rscratch1, ldarb);
4074     __ sxtbw(dst_reg, dst_reg);
4075   %}
4076 
4077   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4078     Register dst_reg = as_Register($dst$$reg);
4079     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4080              rscratch1, ldarb);
4081     __ sxtb(dst_reg, dst_reg);
4082   %}
4083 
4084   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4085     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4086              rscratch1, ldarb);
4087   %}
4088 
4089   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4090     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4091              rscratch1, ldarb);
4092   %}
4093 
4094   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4095     Register dst_reg = as_Register($dst$$reg);
4096     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4097              rscratch1, ldarh);
4098     __ sxthw(dst_reg, dst_reg);
4099   %}
4100 
4101   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4102     Register dst_reg = as_Register($dst$$reg);
4103     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4104              rscratch1, ldarh);
4105     __ sxth(dst_reg, dst_reg);
4106   %}
4107 
4108   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4109     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4110              rscratch1, ldarh);
4111   %}
4112 
4113   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4114     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4115              rscratch1, ldarh);
4116   %}
4117 
4118   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4119     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4120              rscratch1, ldarw);
4121   %}
4122 
4123   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4124     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4125              rscratch1, ldarw);
4126   %}
4127 
4128   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4129     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4130              rscratch1, ldar);
4131   %}
4132 
4133   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4134     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4135              rscratch1, ldarw);
4136     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4137   %}
4138 
4139   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4140     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4141              rscratch1, ldar);
4142     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4143   %}
4144 
4145   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4146     Register src_reg = as_Register($src$$reg);
4147     // we sometimes get asked to store the stack pointer into the
4148     // current thread -- we cannot do that directly on AArch64
4149     if (src_reg == r31_sp) {
4150         MacroAssembler _masm(&cbuf);
4151       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4152       __ mov(rscratch2, sp);
4153       src_reg = rscratch2;
4154     }
4155     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4156                  rscratch1, stlr);
4157   %}
4158 
4159   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4160     {
4161       MacroAssembler _masm(&cbuf);
4162       FloatRegister src_reg = as_FloatRegister($src$$reg);
4163       __ fmovs(rscratch2, src_reg);
4164     }
4165     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4166                  rscratch1, stlrw);
4167   %}
4168 
4169   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4170     {
4171       MacroAssembler _masm(&cbuf);
4172       FloatRegister src_reg = as_FloatRegister($src$$reg);
4173       __ fmovd(rscratch2, src_reg);
4174     }
4175     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4176                  rscratch1, stlr);
4177   %}
4178 
4179   // synchronized read/update encodings
4180 
4181   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4182     MacroAssembler _masm(&cbuf);
4183     Register dst_reg = as_Register($dst$$reg);
4184     Register base = as_Register($mem$$base);
4185     int index = $mem$$index;
4186     int scale = $mem$$scale;
4187     int disp = $mem$$disp;
4188     if (index == -1) {
4189        if (disp != 0) {
4190         __ lea(rscratch1, Address(base, disp));
4191         __ ldaxr(dst_reg, rscratch1);
4192       } else {
4193         // TODO
4194         // should we ever get anything other than this case?
4195         __ ldaxr(dst_reg, base);
4196       }
4197     } else {
4198       Register index_reg = as_Register(index);
4199       if (disp == 0) {
4200         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4201         __ ldaxr(dst_reg, rscratch1);
4202       } else {
4203         __ lea(rscratch1, Address(base, disp));
4204         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4205         __ ldaxr(dst_reg, rscratch1);
4206       }
4207     }
4208   %}
4209 
4210   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4211     MacroAssembler _masm(&cbuf);
4212     Register src_reg = as_Register($src$$reg);
4213     Register base = as_Register($mem$$base);
4214     int index = $mem$$index;
4215     int scale = $mem$$scale;
4216     int disp = $mem$$disp;
4217     if (index == -1) {
4218        if (disp != 0) {
4219         __ lea(rscratch2, Address(base, disp));
4220         __ stlxr(rscratch1, src_reg, rscratch2);
4221       } else {
4222         // TODO
4223         // should we ever get anything other than this case?
4224         __ stlxr(rscratch1, src_reg, base);
4225       }
4226     } else {
4227       Register index_reg = as_Register(index);
4228       if (disp == 0) {
4229         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4230         __ stlxr(rscratch1, src_reg, rscratch2);
4231       } else {
4232         __ lea(rscratch2, Address(base, disp));
4233         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4234         __ stlxr(rscratch1, src_reg, rscratch2);
4235       }
4236     }
4237     __ cmpw(rscratch1, zr);
4238   %}
4239 
4240   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4241     MacroAssembler _masm(&cbuf);
4242     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4243     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4244                Assembler::xword, /*acquire*/ false, /*release*/ true);
4245   %}
4246 
4247   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4248     MacroAssembler _masm(&cbuf);
4249     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4250     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4251                Assembler::word, /*acquire*/ false, /*release*/ true);
4252   %}
4253 
4254 
4255   // The only difference between aarch64_enc_cmpxchg and
4256   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4257   // CompareAndSwap sequence to serve as a barrier on acquiring a
4258   // lock.
4259   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4260     MacroAssembler _masm(&cbuf);
4261     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4262     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4263                Assembler::xword, /*acquire*/ true, /*release*/ true);
4264   %}
4265 
4266   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4267     MacroAssembler _masm(&cbuf);
4268     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4269     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4270                Assembler::word, /*acquire*/ true, /*release*/ true);
4271   %}
4272 
4273 
4274   // auxiliary used for CompareAndSwapX to set result register
4275   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4276     MacroAssembler _masm(&cbuf);
4277     Register res_reg = as_Register($res$$reg);
4278     __ cset(res_reg, Assembler::EQ);
4279   %}
4280 
4281   // prefetch encodings
4282 
4283   enc_class aarch64_enc_prefetchw(memory mem) %{
4284     MacroAssembler _masm(&cbuf);
4285     Register base = as_Register($mem$$base);
4286     int index = $mem$$index;
4287     int scale = $mem$$scale;
4288     int disp = $mem$$disp;
4289     if (index == -1) {
4290       __ prfm(Address(base, disp), PSTL1KEEP);
4291     } else {
4292       Register index_reg = as_Register(index);
4293       if (disp == 0) {
4294         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4295       } else {
4296         __ lea(rscratch1, Address(base, disp));
4297         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4298       }
4299     }
4300   %}
4301 
4302   /// mov envcodings
4303 
4304   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4305     MacroAssembler _masm(&cbuf);
4306     u_int32_t con = (u_int32_t)$src$$constant;
4307     Register dst_reg = as_Register($dst$$reg);
4308     if (con == 0) {
4309       __ movw(dst_reg, zr);
4310     } else {
4311       __ movw(dst_reg, con);
4312     }
4313   %}
4314 
4315   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4316     MacroAssembler _masm(&cbuf);
4317     Register dst_reg = as_Register($dst$$reg);
4318     u_int64_t con = (u_int64_t)$src$$constant;
4319     if (con == 0) {
4320       __ mov(dst_reg, zr);
4321     } else {
4322       __ mov(dst_reg, con);
4323     }
4324   %}
4325 
4326   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4327     MacroAssembler _masm(&cbuf);
4328     Register dst_reg = as_Register($dst$$reg);
4329     address con = (address)$src$$constant;
4330     if (con == NULL || con == (address)1) {
4331       ShouldNotReachHere();
4332     } else {
4333       relocInfo::relocType rtype = $src->constant_reloc();
4334       if (rtype == relocInfo::oop_type) {
4335         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4336       } else if (rtype == relocInfo::metadata_type) {
4337         __ mov_metadata(dst_reg, (Metadata*)con);
4338       } else {
4339         assert(rtype == relocInfo::none, "unexpected reloc type");
4340         if (con < (address)(uintptr_t)os::vm_page_size()) {
4341           __ mov(dst_reg, con);
4342         } else {
4343           unsigned long offset;
4344           __ adrp(dst_reg, con, offset);
4345           __ add(dst_reg, dst_reg, offset);
4346         }
4347       }
4348     }
4349   %}
4350 
4351   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4352     MacroAssembler _masm(&cbuf);
4353     Register dst_reg = as_Register($dst$$reg);
4354     __ mov(dst_reg, zr);
4355   %}
4356 
4357   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4358     MacroAssembler _masm(&cbuf);
4359     Register dst_reg = as_Register($dst$$reg);
4360     __ mov(dst_reg, (u_int64_t)1);
4361   %}
4362 
4363   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4364     MacroAssembler _masm(&cbuf);
4365     address page = (address)$src$$constant;
4366     Register dst_reg = as_Register($dst$$reg);
4367     unsigned long off;
4368     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4369     assert(off == 0, "assumed offset == 0");
4370   %}
4371 
4372   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4373     MacroAssembler _masm(&cbuf);
4374     __ load_byte_map_base($dst$$Register);
4375   %}
4376 
4377   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4378     MacroAssembler _masm(&cbuf);
4379     Register dst_reg = as_Register($dst$$reg);
4380     address con = (address)$src$$constant;
4381     if (con == NULL) {
4382       ShouldNotReachHere();
4383     } else {
4384       relocInfo::relocType rtype = $src->constant_reloc();
4385       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4386       __ set_narrow_oop(dst_reg, (jobject)con);
4387     }
4388   %}
4389 
4390   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4391     MacroAssembler _masm(&cbuf);
4392     Register dst_reg = as_Register($dst$$reg);
4393     __ mov(dst_reg, zr);
4394   %}
4395 
4396   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4397     MacroAssembler _masm(&cbuf);
4398     Register dst_reg = as_Register($dst$$reg);
4399     address con = (address)$src$$constant;
4400     if (con == NULL) {
4401       ShouldNotReachHere();
4402     } else {
4403       relocInfo::relocType rtype = $src->constant_reloc();
4404       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4405       __ set_narrow_klass(dst_reg, (Klass *)con);
4406     }
4407   %}
4408 
4409   // arithmetic encodings
4410 
4411   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4412     MacroAssembler _masm(&cbuf);
4413     Register dst_reg = as_Register($dst$$reg);
4414     Register src_reg = as_Register($src1$$reg);
4415     int32_t con = (int32_t)$src2$$constant;
4416     // add has primary == 0, subtract has primary == 1
4417     if ($primary) { con = -con; }
4418     if (con < 0) {
4419       __ subw(dst_reg, src_reg, -con);
4420     } else {
4421       __ addw(dst_reg, src_reg, con);
4422     }
4423   %}
4424 
4425   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4426     MacroAssembler _masm(&cbuf);
4427     Register dst_reg = as_Register($dst$$reg);
4428     Register src_reg = as_Register($src1$$reg);
4429     int32_t con = (int32_t)$src2$$constant;
4430     // add has primary == 0, subtract has primary == 1
4431     if ($primary) { con = -con; }
4432     if (con < 0) {
4433       __ sub(dst_reg, src_reg, -con);
4434     } else {
4435       __ add(dst_reg, src_reg, con);
4436     }
4437   %}
4438 
4439   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4440     MacroAssembler _masm(&cbuf);
4441    Register dst_reg = as_Register($dst$$reg);
4442    Register src1_reg = as_Register($src1$$reg);
4443    Register src2_reg = as_Register($src2$$reg);
4444     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4445   %}
4446 
4447   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4448     MacroAssembler _masm(&cbuf);
4449    Register dst_reg = as_Register($dst$$reg);
4450    Register src1_reg = as_Register($src1$$reg);
4451    Register src2_reg = as_Register($src2$$reg);
4452     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4453   %}
4454 
4455   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4456     MacroAssembler _masm(&cbuf);
4457    Register dst_reg = as_Register($dst$$reg);
4458    Register src1_reg = as_Register($src1$$reg);
4459    Register src2_reg = as_Register($src2$$reg);
4460     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4461   %}
4462 
4463   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4464     MacroAssembler _masm(&cbuf);
4465    Register dst_reg = as_Register($dst$$reg);
4466    Register src1_reg = as_Register($src1$$reg);
4467    Register src2_reg = as_Register($src2$$reg);
4468     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4469   %}
4470 
4471   // compare instruction encodings
4472 
4473   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4474     MacroAssembler _masm(&cbuf);
4475     Register reg1 = as_Register($src1$$reg);
4476     Register reg2 = as_Register($src2$$reg);
4477     __ cmpw(reg1, reg2);
4478   %}
4479 
4480   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4481     MacroAssembler _masm(&cbuf);
4482     Register reg = as_Register($src1$$reg);
4483     int32_t val = $src2$$constant;
4484     if (val >= 0) {
4485       __ subsw(zr, reg, val);
4486     } else {
4487       __ addsw(zr, reg, -val);
4488     }
4489   %}
4490 
4491   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4492     MacroAssembler _masm(&cbuf);
4493     Register reg1 = as_Register($src1$$reg);
4494     u_int32_t val = (u_int32_t)$src2$$constant;
4495     __ movw(rscratch1, val);
4496     __ cmpw(reg1, rscratch1);
4497   %}
4498 
4499   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4500     MacroAssembler _masm(&cbuf);
4501     Register reg1 = as_Register($src1$$reg);
4502     Register reg2 = as_Register($src2$$reg);
4503     __ cmp(reg1, reg2);
4504   %}
4505 
4506   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4507     MacroAssembler _masm(&cbuf);
4508     Register reg = as_Register($src1$$reg);
4509     int64_t val = $src2$$constant;
4510     if (val >= 0) {
4511       __ subs(zr, reg, val);
4512     } else if (val != -val) {
4513       __ adds(zr, reg, -val);
4514     } else {
4515     // aargh, Long.MIN_VALUE is a special case
4516       __ orr(rscratch1, zr, (u_int64_t)val);
4517       __ subs(zr, reg, rscratch1);
4518     }
4519   %}
4520 
4521   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4522     MacroAssembler _masm(&cbuf);
4523     Register reg1 = as_Register($src1$$reg);
4524     u_int64_t val = (u_int64_t)$src2$$constant;
4525     __ mov(rscratch1, val);
4526     __ cmp(reg1, rscratch1);
4527   %}
4528 
4529   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4530     MacroAssembler _masm(&cbuf);
4531     Register reg1 = as_Register($src1$$reg);
4532     Register reg2 = as_Register($src2$$reg);
4533     __ cmp(reg1, reg2);
4534   %}
4535 
4536   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4537     MacroAssembler _masm(&cbuf);
4538     Register reg1 = as_Register($src1$$reg);
4539     Register reg2 = as_Register($src2$$reg);
4540     __ cmpw(reg1, reg2);
4541   %}
4542 
4543   enc_class aarch64_enc_testp(iRegP src) %{
4544     MacroAssembler _masm(&cbuf);
4545     Register reg = as_Register($src$$reg);
4546     __ cmp(reg, zr);
4547   %}
4548 
4549   enc_class aarch64_enc_testn(iRegN src) %{
4550     MacroAssembler _masm(&cbuf);
4551     Register reg = as_Register($src$$reg);
4552     __ cmpw(reg, zr);
4553   %}
4554 
4555   enc_class aarch64_enc_b(label lbl) %{
4556     MacroAssembler _masm(&cbuf);
4557     Label *L = $lbl$$label;
4558     __ b(*L);
4559   %}
4560 
4561   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4562     MacroAssembler _masm(&cbuf);
4563     Label *L = $lbl$$label;
4564     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4565   %}
4566 
4567   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4568     MacroAssembler _masm(&cbuf);
4569     Label *L = $lbl$$label;
4570     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4571   %}
4572 
4573   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4574   %{
4575      Register sub_reg = as_Register($sub$$reg);
4576      Register super_reg = as_Register($super$$reg);
4577      Register temp_reg = as_Register($temp$$reg);
4578      Register result_reg = as_Register($result$$reg);
4579 
4580      Label miss;
4581      MacroAssembler _masm(&cbuf);
4582      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4583                                      NULL, &miss,
4584                                      /*set_cond_codes:*/ true);
4585      if ($primary) {
4586        __ mov(result_reg, zr);
4587      }
4588      __ bind(miss);
4589   %}
4590 
4591   enc_class aarch64_enc_java_static_call(method meth) %{
4592     MacroAssembler _masm(&cbuf);
4593 
4594     address addr = (address)$meth$$method;
4595     address call;
4596     if (!_method) {
4597       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4598       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4599     } else {
4600       int method_index = resolved_method_index(cbuf);
4601       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4602                                                   : static_call_Relocation::spec(method_index);
4603       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4604 
4605       // Emit stub for static call
4606       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4607       if (stub == NULL) {
4608         ciEnv::current()->record_failure("CodeCache is full");
4609         return;
4610       }
4611     }
4612     if (call == NULL) {
4613       ciEnv::current()->record_failure("CodeCache is full");
4614       return;
4615     }
4616   %}
4617 
4618   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4619     MacroAssembler _masm(&cbuf);
4620     int method_index = resolved_method_index(cbuf);
4621     address call = __ ic_call((address)$meth$$method, method_index);
4622     if (call == NULL) {
4623       ciEnv::current()->record_failure("CodeCache is full");
4624       return;
4625     }
4626   %}
4627 
4628   enc_class aarch64_enc_call_epilog() %{
4629     MacroAssembler _masm(&cbuf);
4630     if (VerifyStackAtCalls) {
4631       // Check that stack depth is unchanged: find majik cookie on stack
4632       __ call_Unimplemented();
4633     }
4634   %}
4635 
4636   enc_class aarch64_enc_java_to_runtime(method meth) %{
4637     MacroAssembler _masm(&cbuf);
4638 
4639     // some calls to generated routines (arraycopy code) are scheduled
4640     // by C2 as runtime calls. if so we can call them using a br (they
4641     // will be in a reachable segment) otherwise we have to use a blrt
4642     // which loads the absolute address into a register.
4643     address entry = (address)$meth$$method;
4644     CodeBlob *cb = CodeCache::find_blob(entry);
4645     if (cb) {
4646       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4647       if (call == NULL) {
4648         ciEnv::current()->record_failure("CodeCache is full");
4649         return;
4650       }
4651     } else {
4652       int gpcnt;
4653       int fpcnt;
4654       int rtype;
4655       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4656       Label retaddr;
4657       __ adr(rscratch2, retaddr);
4658       __ lea(rscratch1, RuntimeAddress(entry));
4659       // Leave a breadcrumb for JavaThread::pd_last_frame().
4660       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4661       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4662       __ bind(retaddr);
4663       __ add(sp, sp, 2 * wordSize);
4664     }
4665   %}
4666 
4667   enc_class aarch64_enc_rethrow() %{
4668     MacroAssembler _masm(&cbuf);
4669     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4670   %}
4671 
4672   enc_class aarch64_enc_ret() %{
4673     MacroAssembler _masm(&cbuf);
4674     __ ret(lr);
4675   %}
4676 
4677   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4678     MacroAssembler _masm(&cbuf);
4679     Register target_reg = as_Register($jump_target$$reg);
4680     __ br(target_reg);
4681   %}
4682 
4683   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4684     MacroAssembler _masm(&cbuf);
4685     Register target_reg = as_Register($jump_target$$reg);
4686     // exception oop should be in r0
4687     // ret addr has been popped into lr
4688     // callee expects it in r3
4689     __ mov(r3, lr);
4690     __ br(target_reg);
4691   %}
4692 
4693   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4694     MacroAssembler _masm(&cbuf);
4695     Register oop = as_Register($object$$reg);
4696     Register box = as_Register($box$$reg);
4697     Register disp_hdr = as_Register($tmp$$reg);
4698     Register tmp = as_Register($tmp2$$reg);
4699     Label cont;
4700     Label object_has_monitor;
4701     Label cas_failed;
4702 
4703     assert_different_registers(oop, box, tmp, disp_hdr);
4704 
4705     // Load markOop from object into displaced_header.
4706     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4707 
4708     // Always do locking in runtime.
4709     if (EmitSync & 0x01) {
4710       __ cmp(oop, zr);
4711       return;
4712     }
4713 
4714     if (UseBiasedLocking && !UseOptoBiasInlining) {
4715       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4716     }
4717 
4718     // Handle existing monitor
4719     if ((EmitSync & 0x02) == 0) {
4720       // we can use AArch64's bit test and branch here but
4721       // markoopDesc does not define a bit index just the bit value
4722       // so assert in case the bit pos changes
4723 #     define __monitor_value_log2 1
4724       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4725       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4726 #     undef __monitor_value_log2
4727     }
4728 
4729     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4730     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4731 
4732     // Load Compare Value application register.
4733 
4734     // Initialize the box. (Must happen before we update the object mark!)
4735     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4736 
4737     // Compare object markOop with mark and if equal exchange scratch1
4738     // with object markOop.
4739     if (UseLSE) {
4740       __ mov(tmp, disp_hdr);
4741       __ casal(Assembler::xword, tmp, box, oop);
4742       __ cmp(tmp, disp_hdr);
4743       __ br(Assembler::EQ, cont);
4744     } else {
4745       Label retry_load;
4746       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4747         __ prfm(Address(oop), PSTL1STRM);
4748       __ bind(retry_load);
4749       __ ldaxr(tmp, oop);
4750       __ cmp(tmp, disp_hdr);
4751       __ br(Assembler::NE, cas_failed);
4752       // use stlxr to ensure update is immediately visible
4753       __ stlxr(tmp, box, oop);
4754       __ cbzw(tmp, cont);
4755       __ b(retry_load);
4756     }
4757 
4758     // Formerly:
4759     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4760     //               /*newv=*/box,
4761     //               /*addr=*/oop,
4762     //               /*tmp=*/tmp,
4763     //               cont,
4764     //               /*fail*/NULL);
4765 
4766     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4767 
4768     // If the compare-and-exchange succeeded, then we found an unlocked
4769     // object, will have now locked it will continue at label cont
4770 
4771     __ bind(cas_failed);
4772     // We did not see an unlocked object so try the fast recursive case.
4773 
4774     // Check if the owner is self by comparing the value in the
4775     // markOop of object (disp_hdr) with the stack pointer.
4776     __ mov(rscratch1, sp);
4777     __ sub(disp_hdr, disp_hdr, rscratch1);
4778     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4779     // If condition is true we are cont and hence we can store 0 as the
4780     // displaced header in the box, which indicates that it is a recursive lock.
4781     __ ands(tmp/*==0?*/, disp_hdr, tmp);
4782     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4783 
4784     // Handle existing monitor.
4785     if ((EmitSync & 0x02) == 0) {
4786       __ b(cont);
4787 
4788       __ bind(object_has_monitor);
4789       // The object's monitor m is unlocked iff m->owner == NULL,
4790       // otherwise m->owner may contain a thread or a stack address.
4791       //
4792       // Try to CAS m->owner from NULL to current thread.
4793       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4794       __ mov(disp_hdr, zr);
4795 
4796       if (UseLSE) {
4797         __ mov(rscratch1, disp_hdr);
4798         __ casal(Assembler::xword, rscratch1, rthread, tmp);
4799         __ cmp(rscratch1, disp_hdr);
4800       } else {
4801         Label retry_load, fail;
4802         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4803           __ prfm(Address(tmp), PSTL1STRM);
4804         __ bind(retry_load);
4805         __ ldaxr(rscratch1, tmp);
4806         __ cmp(disp_hdr, rscratch1);
4807         __ br(Assembler::NE, fail);
4808         // use stlxr to ensure update is immediately visible
4809         __ stlxr(rscratch1, rthread, tmp);
4810         __ cbnzw(rscratch1, retry_load);
4811         __ bind(fail);
4812       }
4813 
4814       // Label next;
4815       // __ cmpxchgptr(/*oldv=*/disp_hdr,
4816       //               /*newv=*/rthread,
4817       //               /*addr=*/tmp,
4818       //               /*tmp=*/rscratch1,
4819       //               /*succeed*/next,
4820       //               /*fail*/NULL);
4821       // __ bind(next);
4822 
4823       // store a non-null value into the box.
4824       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4825 
4826       // PPC port checks the following invariants
4827       // #ifdef ASSERT
4828       // bne(flag, cont);
4829       // We have acquired the monitor, check some invariants.
4830       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4831       // Invariant 1: _recursions should be 0.
4832       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4833       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4834       //                        "monitor->_recursions should be 0", -1);
4835       // Invariant 2: OwnerIsThread shouldn't be 0.
4836       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4837       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4838       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
4839       // #endif
4840     }
4841 
4842     __ bind(cont);
4843     // flag == EQ indicates success
4844     // flag == NE indicates failure
4845 
4846   %}
4847 
4848   // TODO
4849   // reimplement this with custom cmpxchgptr code
4850   // which avoids some of the unnecessary branching
4851   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4852     MacroAssembler _masm(&cbuf);
4853     Register oop = as_Register($object$$reg);
4854     Register box = as_Register($box$$reg);
4855     Register disp_hdr = as_Register($tmp$$reg);
4856     Register tmp = as_Register($tmp2$$reg);
4857     Label cont;
4858     Label object_has_monitor;
4859     Label cas_failed;
4860 
4861     assert_different_registers(oop, box, tmp, disp_hdr);
4862 
4863     // Always do locking in runtime.
4864     if (EmitSync & 0x01) {
4865       __ cmp(oop, zr); // Oop can't be 0 here => always false.
4866       return;
4867     }
4868 
4869     if (UseBiasedLocking && !UseOptoBiasInlining) {
4870       __ biased_locking_exit(oop, tmp, cont);
4871     }
4872 
4873     // Find the lock address and load the displaced header from the stack.
4874     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4875 
4876     // If the displaced header is 0, we have a recursive unlock.
4877     __ cmp(disp_hdr, zr);
4878     __ br(Assembler::EQ, cont);
4879 
4880 
4881     // Handle existing monitor.
4882     if ((EmitSync & 0x02) == 0) {
4883       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4884       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4885     }
4886 
4887     // Check if it is still a light weight lock, this is is true if we
4888     // see the stack address of the basicLock in the markOop of the
4889     // object.
4890 
4891       if (UseLSE) {
4892         __ mov(tmp, box);
4893         __ casl(Assembler::xword, tmp, disp_hdr, oop);
4894         __ cmp(tmp, box);
4895       } else {
4896         Label retry_load;
4897         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4898           __ prfm(Address(oop), PSTL1STRM);
4899         __ bind(retry_load);
4900         __ ldxr(tmp, oop);
4901         __ cmp(box, tmp);
4902         __ br(Assembler::NE, cas_failed);
4903         // use stlxr to ensure update is immediately visible
4904         __ stlxr(tmp, disp_hdr, oop);
4905         __ cbzw(tmp, cont);
4906         __ b(retry_load);
4907       }
4908 
4909     // __ cmpxchgptr(/*compare_value=*/box,
4910     //               /*exchange_value=*/disp_hdr,
4911     //               /*where=*/oop,
4912     //               /*result=*/tmp,
4913     //               cont,
4914     //               /*cas_failed*/NULL);
4915     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4916 
4917     __ bind(cas_failed);
4918 
4919     // Handle existing monitor.
4920     if ((EmitSync & 0x02) == 0) {
4921       __ b(cont);
4922 
4923       __ bind(object_has_monitor);
4924       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4925       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4926       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4927       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4928       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4929       __ cmp(rscratch1, zr);
4930       __ br(Assembler::NE, cont);
4931 
4932       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4933       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4934       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4935       __ cmp(rscratch1, zr);
4936       __ cbnz(rscratch1, cont);
4937       // need a release store here
4938       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4939       __ stlr(rscratch1, tmp); // rscratch1 is zero
4940     }
4941 
4942     __ bind(cont);
4943     // flag == EQ indicates success
4944     // flag == NE indicates failure
4945   %}
4946 
4947 %}
4948 
4949 //----------FRAME--------------------------------------------------------------
4950 // Definition of frame structure and management information.
4951 //
4952 //  S T A C K   L A Y O U T    Allocators stack-slot number
4953 //                             |   (to get allocators register number
4954 //  G  Owned by    |        |  v    add OptoReg::stack0())
4955 //  r   CALLER     |        |
4956 //  o     |        +--------+      pad to even-align allocators stack-slot
4957 //  w     V        |  pad0  |        numbers; owned by CALLER
4958 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4959 //  h     ^        |   in   |  5
4960 //        |        |  args  |  4   Holes in incoming args owned by SELF
4961 //  |     |        |        |  3
4962 //  |     |        +--------+
4963 //  V     |        | old out|      Empty on Intel, window on Sparc
4964 //        |    old |preserve|      Must be even aligned.
4965 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4966 //        |        |   in   |  3   area for Intel ret address
4967 //     Owned by    |preserve|      Empty on Sparc.
4968 //       SELF      +--------+
4969 //        |        |  pad2  |  2   pad to align old SP
4970 //        |        +--------+  1
4971 //        |        | locks  |  0
4972 //        |        +--------+----> OptoReg::stack0(), even aligned
4973 //        |        |  pad1  | 11   pad to align new SP
4974 //        |        +--------+
4975 //        |        |        | 10
4976 //        |        | spills |  9   spills
4977 //        V        |        |  8   (pad0 slot for callee)
4978 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4979 //        ^        |  out   |  7
4980 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4981 //     Owned by    +--------+
4982 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4983 //        |    new |preserve|      Must be even-aligned.
4984 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4985 //        |        |        |
4986 //
4987 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4988 //         known from SELF's arguments and the Java calling convention.
4989 //         Region 6-7 is determined per call site.
4990 // Note 2: If the calling convention leaves holes in the incoming argument
4991 //         area, those holes are owned by SELF.  Holes in the outgoing area
4992 //         are owned by the CALLEE.  Holes should not be nessecary in the
4993 //         incoming area, as the Java calling convention is completely under
4994 //         the control of the AD file.  Doubles can be sorted and packed to
4995 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4996 //         varargs C calling conventions.
4997 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4998 //         even aligned with pad0 as needed.
4999 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5000 //           (the latter is true on Intel but is it false on AArch64?)
5001 //         region 6-11 is even aligned; it may be padded out more so that
5002 //         the region from SP to FP meets the minimum stack alignment.
5003 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5004 //         alignment.  Region 11, pad1, may be dynamically extended so that
5005 //         SP meets the minimum alignment.
5006 
5007 frame %{
5008   // What direction does stack grow in (assumed to be same for C & Java)
5009   stack_direction(TOWARDS_LOW);
5010 
5011   // These three registers define part of the calling convention
5012   // between compiled code and the interpreter.
5013 
5014   // Inline Cache Register or methodOop for I2C.
5015   inline_cache_reg(R12);
5016 
5017   // Method Oop Register when calling interpreter.
5018   interpreter_method_oop_reg(R12);
5019 
5020   // Number of stack slots consumed by locking an object
5021   sync_stack_slots(2);
5022 
5023   // Compiled code's Frame Pointer
5024   frame_pointer(R31);
5025 
5026   // Interpreter stores its frame pointer in a register which is
5027   // stored to the stack by I2CAdaptors.
5028   // I2CAdaptors convert from interpreted java to compiled java.
5029   interpreter_frame_pointer(R29);
5030 
5031   // Stack alignment requirement
5032   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5033 
5034   // Number of stack slots between incoming argument block and the start of
5035   // a new frame.  The PROLOG must add this many slots to the stack.  The
5036   // EPILOG must remove this many slots. aarch64 needs two slots for
5037   // return address and fp.
5038   // TODO think this is correct but check
5039   in_preserve_stack_slots(4);
5040 
5041   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5042   // for calls to C.  Supports the var-args backing area for register parms.
5043   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5044 
5045   // The after-PROLOG location of the return address.  Location of
5046   // return address specifies a type (REG or STACK) and a number
5047   // representing the register number (i.e. - use a register name) or
5048   // stack slot.
5049   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5050   // Otherwise, it is above the locks and verification slot and alignment word
5051   // TODO this may well be correct but need to check why that - 2 is there
5052   // ppc port uses 0 but we definitely need to allow for fixed_slots
5053   // which folds in the space used for monitors
5054   return_addr(STACK - 2 +
5055               round_to((Compile::current()->in_preserve_stack_slots() +
5056                         Compile::current()->fixed_slots()),
5057                        stack_alignment_in_slots()));
5058 
5059   // Body of function which returns an integer array locating
5060   // arguments either in registers or in stack slots.  Passed an array
5061   // of ideal registers called "sig" and a "length" count.  Stack-slot
5062   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5063   // arguments for a CALLEE.  Incoming stack arguments are
5064   // automatically biased by the preserve_stack_slots field above.
5065 
5066   calling_convention
5067   %{
5068     // No difference between ingoing/outgoing just pass false
5069     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5070   %}
5071 
5072   c_calling_convention
5073   %{
5074     // This is obviously always outgoing
5075     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5076   %}
5077 
5078   // Location of compiled Java return values.  Same as C for now.
5079   return_value
5080   %{
5081     // TODO do we allow ideal_reg == Op_RegN???
5082     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5083            "only return normal values");
5084 
5085     static const int lo[Op_RegL + 1] = { // enum name
5086       0,                                 // Op_Node
5087       0,                                 // Op_Set
5088       R0_num,                            // Op_RegN
5089       R0_num,                            // Op_RegI
5090       R0_num,                            // Op_RegP
5091       V0_num,                            // Op_RegF
5092       V0_num,                            // Op_RegD
5093       R0_num                             // Op_RegL
5094     };
5095 
5096     static const int hi[Op_RegL + 1] = { // enum name
5097       0,                                 // Op_Node
5098       0,                                 // Op_Set
5099       OptoReg::Bad,                       // Op_RegN
5100       OptoReg::Bad,                      // Op_RegI
5101       R0_H_num,                          // Op_RegP
5102       OptoReg::Bad,                      // Op_RegF
5103       V0_H_num,                          // Op_RegD
5104       R0_H_num                           // Op_RegL
5105     };
5106 
5107     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5108   %}
5109 %}
5110 
5111 //----------ATTRIBUTES---------------------------------------------------------
5112 //----------Operand Attributes-------------------------------------------------
5113 op_attrib op_cost(1);        // Required cost attribute
5114 
5115 //----------Instruction Attributes---------------------------------------------
5116 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5117 ins_attrib ins_size(32);        // Required size attribute (in bits)
5118 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5119                                 // a non-matching short branch variant
5120                                 // of some long branch?
5121 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5122                                 // be a power of 2) specifies the
5123                                 // alignment that some part of the
5124                                 // instruction (not necessarily the
5125                                 // start) requires.  If > 1, a
5126                                 // compute_padding() function must be
5127                                 // provided for the instruction
5128 
5129 //----------OPERANDS-----------------------------------------------------------
5130 // Operand definitions must precede instruction definitions for correct parsing
5131 // in the ADLC because operands constitute user defined types which are used in
5132 // instruction definitions.
5133 
5134 //----------Simple Operands----------------------------------------------------
5135 
5136 // Integer operands 32 bit
5137 // 32 bit immediate
5138 operand immI()
5139 %{
5140   match(ConI);
5141 
5142   op_cost(0);
5143   format %{ %}
5144   interface(CONST_INTER);
5145 %}
5146 
5147 // 32 bit zero
5148 operand immI0()
5149 %{
5150   predicate(n->get_int() == 0);
5151   match(ConI);
5152 
5153   op_cost(0);
5154   format %{ %}
5155   interface(CONST_INTER);
5156 %}
5157 
5158 // 32 bit unit increment
5159 operand immI_1()
5160 %{
5161   predicate(n->get_int() == 1);
5162   match(ConI);
5163 
5164   op_cost(0);
5165   format %{ %}
5166   interface(CONST_INTER);
5167 %}
5168 
5169 // 32 bit unit decrement
5170 operand immI_M1()
5171 %{
5172   predicate(n->get_int() == -1);
5173   match(ConI);
5174 
5175   op_cost(0);
5176   format %{ %}
5177   interface(CONST_INTER);
5178 %}
5179 
5180 operand immI_le_4()
5181 %{
5182   predicate(n->get_int() <= 4);
5183   match(ConI);
5184 
5185   op_cost(0);
5186   format %{ %}
5187   interface(CONST_INTER);
5188 %}
5189 
5190 operand immI_31()
5191 %{
5192   predicate(n->get_int() == 31);
5193   match(ConI);
5194 
5195   op_cost(0);
5196   format %{ %}
5197   interface(CONST_INTER);
5198 %}
5199 
5200 operand immI_8()
5201 %{
5202   predicate(n->get_int() == 8);
5203   match(ConI);
5204 
5205   op_cost(0);
5206   format %{ %}
5207   interface(CONST_INTER);
5208 %}
5209 
5210 operand immI_16()
5211 %{
5212   predicate(n->get_int() == 16);
5213   match(ConI);
5214 
5215   op_cost(0);
5216   format %{ %}
5217   interface(CONST_INTER);
5218 %}
5219 
5220 operand immI_24()
5221 %{
5222   predicate(n->get_int() == 24);
5223   match(ConI);
5224 
5225   op_cost(0);
5226   format %{ %}
5227   interface(CONST_INTER);
5228 %}
5229 
5230 operand immI_32()
5231 %{
5232   predicate(n->get_int() == 32);
5233   match(ConI);
5234 
5235   op_cost(0);
5236   format %{ %}
5237   interface(CONST_INTER);
5238 %}
5239 
5240 operand immI_48()
5241 %{
5242   predicate(n->get_int() == 48);
5243   match(ConI);
5244 
5245   op_cost(0);
5246   format %{ %}
5247   interface(CONST_INTER);
5248 %}
5249 
5250 operand immI_56()
5251 %{
5252   predicate(n->get_int() == 56);
5253   match(ConI);
5254 
5255   op_cost(0);
5256   format %{ %}
5257   interface(CONST_INTER);
5258 %}
5259 
5260 operand immI_64()
5261 %{
5262   predicate(n->get_int() == 64);
5263   match(ConI);
5264 
5265   op_cost(0);
5266   format %{ %}
5267   interface(CONST_INTER);
5268 %}
5269 
5270 operand immI_255()
5271 %{
5272   predicate(n->get_int() == 255);
5273   match(ConI);
5274 
5275   op_cost(0);
5276   format %{ %}
5277   interface(CONST_INTER);
5278 %}
5279 
5280 operand immI_65535()
5281 %{
5282   predicate(n->get_int() == 65535);
5283   match(ConI);
5284 
5285   op_cost(0);
5286   format %{ %}
5287   interface(CONST_INTER);
5288 %}
5289 
5290 operand immL_63()
5291 %{
5292   predicate(n->get_int() == 63);
5293   match(ConI);
5294 
5295   op_cost(0);
5296   format %{ %}
5297   interface(CONST_INTER);
5298 %}
5299 
5300 operand immL_255()
5301 %{
5302   predicate(n->get_int() == 255);
5303   match(ConI);
5304 
5305   op_cost(0);
5306   format %{ %}
5307   interface(CONST_INTER);
5308 %}
5309 
5310 operand immL_65535()
5311 %{
5312   predicate(n->get_long() == 65535L);
5313   match(ConL);
5314 
5315   op_cost(0);
5316   format %{ %}
5317   interface(CONST_INTER);
5318 %}
5319 
5320 operand immL_4294967295()
5321 %{
5322   predicate(n->get_long() == 4294967295L);
5323   match(ConL);
5324 
5325   op_cost(0);
5326   format %{ %}
5327   interface(CONST_INTER);
5328 %}
5329 
5330 operand immL_bitmask()
5331 %{
5332   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5333             && is_power_of_2(n->get_long() + 1));
5334   match(ConL);
5335 
5336   op_cost(0);
5337   format %{ %}
5338   interface(CONST_INTER);
5339 %}
5340 
5341 operand immI_bitmask()
5342 %{
5343   predicate(((n->get_int() & 0xc0000000) == 0)
5344             && is_power_of_2(n->get_int() + 1));
5345   match(ConI);
5346 
5347   op_cost(0);
5348   format %{ %}
5349   interface(CONST_INTER);
5350 %}
5351 
5352 // Scale values for scaled offset addressing modes (up to long but not quad)
5353 operand immIScale()
5354 %{
5355   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5356   match(ConI);
5357 
5358   op_cost(0);
5359   format %{ %}
5360   interface(CONST_INTER);
5361 %}
5362 
5363 // 26 bit signed offset -- for pc-relative branches
5364 operand immI26()
5365 %{
5366   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5367   match(ConI);
5368 
5369   op_cost(0);
5370   format %{ %}
5371   interface(CONST_INTER);
5372 %}
5373 
5374 // 19 bit signed offset -- for pc-relative loads
5375 operand immI19()
5376 %{
5377   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5378   match(ConI);
5379 
5380   op_cost(0);
5381   format %{ %}
5382   interface(CONST_INTER);
5383 %}
5384 
5385 // 12 bit unsigned offset -- for base plus immediate loads
5386 operand immIU12()
5387 %{
5388   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5389   match(ConI);
5390 
5391   op_cost(0);
5392   format %{ %}
5393   interface(CONST_INTER);
5394 %}
5395 
5396 operand immLU12()
5397 %{
5398   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5399   match(ConL);
5400 
5401   op_cost(0);
5402   format %{ %}
5403   interface(CONST_INTER);
5404 %}
5405 
5406 // Offset for scaled or unscaled immediate loads and stores
5407 operand immIOffset()
5408 %{
5409   predicate(Address::offset_ok_for_immed(n->get_int()));
5410   match(ConI);
5411 
5412   op_cost(0);
5413   format %{ %}
5414   interface(CONST_INTER);
5415 %}
5416 
5417 operand immIOffset4()
5418 %{
5419   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5420   match(ConI);
5421 
5422   op_cost(0);
5423   format %{ %}
5424   interface(CONST_INTER);
5425 %}
5426 
5427 operand immIOffset8()
5428 %{
5429   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5430   match(ConI);
5431 
5432   op_cost(0);
5433   format %{ %}
5434   interface(CONST_INTER);
5435 %}
5436 
5437 operand immIOffset16()
5438 %{
5439   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5440   match(ConI);
5441 
5442   op_cost(0);
5443   format %{ %}
5444   interface(CONST_INTER);
5445 %}
5446 
5447 operand immLoffset()
5448 %{
5449   predicate(Address::offset_ok_for_immed(n->get_long()));
5450   match(ConL);
5451 
5452   op_cost(0);
5453   format %{ %}
5454   interface(CONST_INTER);
5455 %}
5456 
5457 operand immLoffset4()
5458 %{
5459   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5460   match(ConL);
5461 
5462   op_cost(0);
5463   format %{ %}
5464   interface(CONST_INTER);
5465 %}
5466 
5467 operand immLoffset8()
5468 %{
5469   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5470   match(ConL);
5471 
5472   op_cost(0);
5473   format %{ %}
5474   interface(CONST_INTER);
5475 %}
5476 
5477 operand immLoffset16()
5478 %{
5479   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5480   match(ConL);
5481 
5482   op_cost(0);
5483   format %{ %}
5484   interface(CONST_INTER);
5485 %}
5486 
5487 // 32 bit integer valid for add sub immediate
5488 operand immIAddSub()
5489 %{
5490   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5491   match(ConI);
5492   op_cost(0);
5493   format %{ %}
5494   interface(CONST_INTER);
5495 %}
5496 
5497 // 32 bit unsigned integer valid for logical immediate
5498 // TODO -- check this is right when e.g the mask is 0x80000000
5499 operand immILog()
5500 %{
5501   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5502   match(ConI);
5503 
5504   op_cost(0);
5505   format %{ %}
5506   interface(CONST_INTER);
5507 %}
5508 
5509 // Integer operands 64 bit
5510 // 64 bit immediate
5511 operand immL()
5512 %{
5513   match(ConL);
5514 
5515   op_cost(0);
5516   format %{ %}
5517   interface(CONST_INTER);
5518 %}
5519 
5520 // 64 bit zero
5521 operand immL0()
5522 %{
5523   predicate(n->get_long() == 0);
5524   match(ConL);
5525 
5526   op_cost(0);
5527   format %{ %}
5528   interface(CONST_INTER);
5529 %}
5530 
5531 // 64 bit unit increment
5532 operand immL_1()
5533 %{
5534   predicate(n->get_long() == 1);
5535   match(ConL);
5536 
5537   op_cost(0);
5538   format %{ %}
5539   interface(CONST_INTER);
5540 %}
5541 
5542 // 64 bit unit decrement
5543 operand immL_M1()
5544 %{
5545   predicate(n->get_long() == -1);
5546   match(ConL);
5547 
5548   op_cost(0);
5549   format %{ %}
5550   interface(CONST_INTER);
5551 %}
5552 
5553 // 32 bit offset of pc in thread anchor
5554 
5555 operand immL_pc_off()
5556 %{
5557   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5558                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5559   match(ConL);
5560 
5561   op_cost(0);
5562   format %{ %}
5563   interface(CONST_INTER);
5564 %}
5565 
5566 // 64 bit integer valid for add sub immediate
5567 operand immLAddSub()
5568 %{
5569   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5570   match(ConL);
5571   op_cost(0);
5572   format %{ %}
5573   interface(CONST_INTER);
5574 %}
5575 
5576 // 64 bit integer valid for logical immediate
5577 operand immLLog()
5578 %{
5579   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5580   match(ConL);
5581   op_cost(0);
5582   format %{ %}
5583   interface(CONST_INTER);
5584 %}
5585 
5586 // Long Immediate: low 32-bit mask
5587 operand immL_32bits()
5588 %{
5589   predicate(n->get_long() == 0xFFFFFFFFL);
5590   match(ConL);
5591   op_cost(0);
5592   format %{ %}
5593   interface(CONST_INTER);
5594 %}
5595 
5596 // Pointer operands
5597 // Pointer Immediate
5598 operand immP()
5599 %{
5600   match(ConP);
5601 
5602   op_cost(0);
5603   format %{ %}
5604   interface(CONST_INTER);
5605 %}
5606 
5607 // NULL Pointer Immediate
5608 operand immP0()
5609 %{
5610   predicate(n->get_ptr() == 0);
5611   match(ConP);
5612 
5613   op_cost(0);
5614   format %{ %}
5615   interface(CONST_INTER);
5616 %}
5617 
5618 // Pointer Immediate One
5619 // this is used in object initialization (initial object header)
5620 operand immP_1()
5621 %{
5622   predicate(n->get_ptr() == 1);
5623   match(ConP);
5624 
5625   op_cost(0);
5626   format %{ %}
5627   interface(CONST_INTER);
5628 %}
5629 
5630 // Polling Page Pointer Immediate
5631 operand immPollPage()
5632 %{
5633   predicate((address)n->get_ptr() == os::get_polling_page());
5634   match(ConP);
5635 
5636   op_cost(0);
5637   format %{ %}
5638   interface(CONST_INTER);
5639 %}
5640 
5641 // Card Table Byte Map Base
5642 operand immByteMapBase()
5643 %{
5644   // Get base of card map
5645   predicate((jbyte*)n->get_ptr() ==
5646         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5647   match(ConP);
5648 
5649   op_cost(0);
5650   format %{ %}
5651   interface(CONST_INTER);
5652 %}
5653 
5654 // Pointer Immediate Minus One
5655 // this is used when we want to write the current PC to the thread anchor
5656 operand immP_M1()
5657 %{
5658   predicate(n->get_ptr() == -1);
5659   match(ConP);
5660 
5661   op_cost(0);
5662   format %{ %}
5663   interface(CONST_INTER);
5664 %}
5665 
5666 // Pointer Immediate Minus Two
5667 // this is used when we want to write the current PC to the thread anchor
5668 operand immP_M2()
5669 %{
5670   predicate(n->get_ptr() == -2);
5671   match(ConP);
5672 
5673   op_cost(0);
5674   format %{ %}
5675   interface(CONST_INTER);
5676 %}
5677 
5678 // Float and Double operands
5679 // Double Immediate
5680 operand immD()
5681 %{
5682   match(ConD);
5683   op_cost(0);
5684   format %{ %}
5685   interface(CONST_INTER);
5686 %}
5687 
5688 // Double Immediate: +0.0d
5689 operand immD0()
5690 %{
5691   predicate(jlong_cast(n->getd()) == 0);
5692   match(ConD);
5693 
5694   op_cost(0);
5695   format %{ %}
5696   interface(CONST_INTER);
5697 %}
5698 
5699 // constant 'double +0.0'.
5700 operand immDPacked()
5701 %{
5702   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5703   match(ConD);
5704   op_cost(0);
5705   format %{ %}
5706   interface(CONST_INTER);
5707 %}
5708 
5709 // Float Immediate
5710 operand immF()
5711 %{
5712   match(ConF);
5713   op_cost(0);
5714   format %{ %}
5715   interface(CONST_INTER);
5716 %}
5717 
5718 // Float Immediate: +0.0f.
5719 operand immF0()
5720 %{
5721   predicate(jint_cast(n->getf()) == 0);
5722   match(ConF);
5723 
5724   op_cost(0);
5725   format %{ %}
5726   interface(CONST_INTER);
5727 %}
5728 
5729 //
5730 operand immFPacked()
5731 %{
5732   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5733   match(ConF);
5734   op_cost(0);
5735   format %{ %}
5736   interface(CONST_INTER);
5737 %}
5738 
5739 // Narrow pointer operands
5740 // Narrow Pointer Immediate
5741 operand immN()
5742 %{
5743   match(ConN);
5744 
5745   op_cost(0);
5746   format %{ %}
5747   interface(CONST_INTER);
5748 %}
5749 
5750 // Narrow NULL Pointer Immediate
5751 operand immN0()
5752 %{
5753   predicate(n->get_narrowcon() == 0);
5754   match(ConN);
5755 
5756   op_cost(0);
5757   format %{ %}
5758   interface(CONST_INTER);
5759 %}
5760 
5761 operand immNKlass()
5762 %{
5763   match(ConNKlass);
5764 
5765   op_cost(0);
5766   format %{ %}
5767   interface(CONST_INTER);
5768 %}
5769 
5770 // Integer 32 bit Register Operands
5771 // Integer 32 bitRegister (excludes SP)
5772 operand iRegI()
5773 %{
5774   constraint(ALLOC_IN_RC(any_reg32));
5775   match(RegI);
5776   match(iRegINoSp);
5777   op_cost(0);
5778   format %{ %}
5779   interface(REG_INTER);
5780 %}
5781 
5782 // Integer 32 bit Register not Special
5783 operand iRegINoSp()
5784 %{
5785   constraint(ALLOC_IN_RC(no_special_reg32));
5786   match(RegI);
5787   op_cost(0);
5788   format %{ %}
5789   interface(REG_INTER);
5790 %}
5791 
5792 // Integer 64 bit Register Operands
5793 // Integer 64 bit Register (includes SP)
5794 operand iRegL()
5795 %{
5796   constraint(ALLOC_IN_RC(any_reg));
5797   match(RegL);
5798   match(iRegLNoSp);
5799   op_cost(0);
5800   format %{ %}
5801   interface(REG_INTER);
5802 %}
5803 
5804 // Integer 64 bit Register not Special
5805 operand iRegLNoSp()
5806 %{
5807   constraint(ALLOC_IN_RC(no_special_reg));
5808   match(RegL);
5809   format %{ %}
5810   interface(REG_INTER);
5811 %}
5812 
5813 // Pointer Register Operands
5814 // Pointer Register
5815 operand iRegP()
5816 %{
5817   constraint(ALLOC_IN_RC(ptr_reg));
5818   match(RegP);
5819   match(iRegPNoSp);
5820   match(iRegP_R0);
5821   //match(iRegP_R2);
5822   //match(iRegP_R4);
5823   //match(iRegP_R5);
5824   match(thread_RegP);
5825   op_cost(0);
5826   format %{ %}
5827   interface(REG_INTER);
5828 %}
5829 
5830 // Pointer 64 bit Register not Special
5831 operand iRegPNoSp()
5832 %{
5833   constraint(ALLOC_IN_RC(no_special_ptr_reg));
5834   match(RegP);
5835   // match(iRegP);
5836   // match(iRegP_R0);
5837   // match(iRegP_R2);
5838   // match(iRegP_R4);
5839   // match(iRegP_R5);
5840   // match(thread_RegP);
5841   op_cost(0);
5842   format %{ %}
5843   interface(REG_INTER);
5844 %}
5845 
5846 // Pointer 64 bit Register R0 only
5847 operand iRegP_R0()
5848 %{
5849   constraint(ALLOC_IN_RC(r0_reg));
5850   match(RegP);
5851   // match(iRegP);
5852   match(iRegPNoSp);
5853   op_cost(0);
5854   format %{ %}
5855   interface(REG_INTER);
5856 %}
5857 
5858 // Pointer 64 bit Register R1 only
5859 operand iRegP_R1()
5860 %{
5861   constraint(ALLOC_IN_RC(r1_reg));
5862   match(RegP);
5863   // match(iRegP);
5864   match(iRegPNoSp);
5865   op_cost(0);
5866   format %{ %}
5867   interface(REG_INTER);
5868 %}
5869 
5870 // Pointer 64 bit Register R2 only
5871 operand iRegP_R2()
5872 %{
5873   constraint(ALLOC_IN_RC(r2_reg));
5874   match(RegP);
5875   // match(iRegP);
5876   match(iRegPNoSp);
5877   op_cost(0);
5878   format %{ %}
5879   interface(REG_INTER);
5880 %}
5881 
5882 // Pointer 64 bit Register R3 only
5883 operand iRegP_R3()
5884 %{
5885   constraint(ALLOC_IN_RC(r3_reg));
5886   match(RegP);
5887   // match(iRegP);
5888   match(iRegPNoSp);
5889   op_cost(0);
5890   format %{ %}
5891   interface(REG_INTER);
5892 %}
5893 
5894 // Pointer 64 bit Register R4 only
5895 operand iRegP_R4()
5896 %{
5897   constraint(ALLOC_IN_RC(r4_reg));
5898   match(RegP);
5899   // match(iRegP);
5900   match(iRegPNoSp);
5901   op_cost(0);
5902   format %{ %}
5903   interface(REG_INTER);
5904 %}
5905 
5906 // Pointer 64 bit Register R5 only
5907 operand iRegP_R5()
5908 %{
5909   constraint(ALLOC_IN_RC(r5_reg));
5910   match(RegP);
5911   // match(iRegP);
5912   match(iRegPNoSp);
5913   op_cost(0);
5914   format %{ %}
5915   interface(REG_INTER);
5916 %}
5917 
5918 // Pointer 64 bit Register R10 only
5919 operand iRegP_R10()
5920 %{
5921   constraint(ALLOC_IN_RC(r10_reg));
5922   match(RegP);
5923   // match(iRegP);
5924   match(iRegPNoSp);
5925   op_cost(0);
5926   format %{ %}
5927   interface(REG_INTER);
5928 %}
5929 
5930 // Long 64 bit Register R11 only
5931 operand iRegL_R11()
5932 %{
5933   constraint(ALLOC_IN_RC(r11_reg));
5934   match(RegL);
5935   match(iRegLNoSp);
5936   op_cost(0);
5937   format %{ %}
5938   interface(REG_INTER);
5939 %}
5940 
5941 // Pointer 64 bit Register FP only
5942 operand iRegP_FP()
5943 %{
5944   constraint(ALLOC_IN_RC(fp_reg));
5945   match(RegP);
5946   // match(iRegP);
5947   op_cost(0);
5948   format %{ %}
5949   interface(REG_INTER);
5950 %}
5951 
5952 // Register R0 only
5953 operand iRegI_R0()
5954 %{
5955   constraint(ALLOC_IN_RC(int_r0_reg));
5956   match(RegI);
5957   match(iRegINoSp);
5958   op_cost(0);
5959   format %{ %}
5960   interface(REG_INTER);
5961 %}
5962 
5963 // Register R2 only
5964 operand iRegI_R2()
5965 %{
5966   constraint(ALLOC_IN_RC(int_r2_reg));
5967   match(RegI);
5968   match(iRegINoSp);
5969   op_cost(0);
5970   format %{ %}
5971   interface(REG_INTER);
5972 %}
5973 
5974 // Register R3 only
5975 operand iRegI_R3()
5976 %{
5977   constraint(ALLOC_IN_RC(int_r3_reg));
5978   match(RegI);
5979   match(iRegINoSp);
5980   op_cost(0);
5981   format %{ %}
5982   interface(REG_INTER);
5983 %}
5984 
5985 
5986 // Register R2 only
5987 operand iRegI_R4()
5988 %{
5989   constraint(ALLOC_IN_RC(int_r4_reg));
5990   match(RegI);
5991   match(iRegINoSp);
5992   op_cost(0);
5993   format %{ %}
5994   interface(REG_INTER);
5995 %}
5996 
5997 
5998 // Pointer Register Operands
5999 // Narrow Pointer Register
6000 operand iRegN()
6001 %{
6002   constraint(ALLOC_IN_RC(any_reg32));
6003   match(RegN);
6004   match(iRegNNoSp);
6005   op_cost(0);
6006   format %{ %}
6007   interface(REG_INTER);
6008 %}
6009 
6010 // Integer 64 bit Register not Special
6011 operand iRegNNoSp()
6012 %{
6013   constraint(ALLOC_IN_RC(no_special_reg32));
6014   match(RegN);
6015   op_cost(0);
6016   format %{ %}
6017   interface(REG_INTER);
6018 %}
6019 
6020 // heap base register -- used for encoding immN0
6021 
6022 operand iRegIHeapbase()
6023 %{
6024   constraint(ALLOC_IN_RC(heapbase_reg));
6025   match(RegI);
6026   op_cost(0);
6027   format %{ %}
6028   interface(REG_INTER);
6029 %}
6030 
6031 // Float Register
6032 // Float register operands
6033 operand vRegF()
6034 %{
6035   constraint(ALLOC_IN_RC(float_reg));
6036   match(RegF);
6037 
6038   op_cost(0);
6039   format %{ %}
6040   interface(REG_INTER);
6041 %}
6042 
6043 // Double Register
6044 // Double register operands
6045 operand vRegD()
6046 %{
6047   constraint(ALLOC_IN_RC(double_reg));
6048   match(RegD);
6049 
6050   op_cost(0);
6051   format %{ %}
6052   interface(REG_INTER);
6053 %}
6054 
6055 operand vecD()
6056 %{
6057   constraint(ALLOC_IN_RC(vectord_reg));
6058   match(VecD);
6059 
6060   op_cost(0);
6061   format %{ %}
6062   interface(REG_INTER);
6063 %}
6064 
6065 operand vecX()
6066 %{
6067   constraint(ALLOC_IN_RC(vectorx_reg));
6068   match(VecX);
6069 
6070   op_cost(0);
6071   format %{ %}
6072   interface(REG_INTER);
6073 %}
6074 
6075 operand vRegD_V0()
6076 %{
6077   constraint(ALLOC_IN_RC(v0_reg));
6078   match(RegD);
6079   op_cost(0);
6080   format %{ %}
6081   interface(REG_INTER);
6082 %}
6083 
6084 operand vRegD_V1()
6085 %{
6086   constraint(ALLOC_IN_RC(v1_reg));
6087   match(RegD);
6088   op_cost(0);
6089   format %{ %}
6090   interface(REG_INTER);
6091 %}
6092 
6093 operand vRegD_V2()
6094 %{
6095   constraint(ALLOC_IN_RC(v2_reg));
6096   match(RegD);
6097   op_cost(0);
6098   format %{ %}
6099   interface(REG_INTER);
6100 %}
6101 
6102 operand vRegD_V3()
6103 %{
6104   constraint(ALLOC_IN_RC(v3_reg));
6105   match(RegD);
6106   op_cost(0);
6107   format %{ %}
6108   interface(REG_INTER);
6109 %}
6110 
6111 // Flags register, used as output of signed compare instructions
6112 
6113 // note that on AArch64 we also use this register as the output for
6114 // for floating point compare instructions (CmpF CmpD). this ensures
6115 // that ordered inequality tests use GT, GE, LT or LE none of which
6116 // pass through cases where the result is unordered i.e. one or both
6117 // inputs to the compare is a NaN. this means that the ideal code can
6118 // replace e.g. a GT with an LE and not end up capturing the NaN case
6119 // (where the comparison should always fail). EQ and NE tests are
6120 // always generated in ideal code so that unordered folds into the NE
6121 // case, matching the behaviour of AArch64 NE.
6122 //
6123 // This differs from x86 where the outputs of FP compares use a
6124 // special FP flags registers and where compares based on this
6125 // register are distinguished into ordered inequalities (cmpOpUCF) and
6126 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6127 // to explicitly handle the unordered case in branches. x86 also has
6128 // to include extra CMoveX rules to accept a cmpOpUCF input.
6129 
6130 operand rFlagsReg()
6131 %{
6132   constraint(ALLOC_IN_RC(int_flags));
6133   match(RegFlags);
6134 
6135   op_cost(0);
6136   format %{ "RFLAGS" %}
6137   interface(REG_INTER);
6138 %}
6139 
6140 // Flags register, used as output of unsigned compare instructions
6141 operand rFlagsRegU()
6142 %{
6143   constraint(ALLOC_IN_RC(int_flags));
6144   match(RegFlags);
6145 
6146   op_cost(0);
6147   format %{ "RFLAGSU" %}
6148   interface(REG_INTER);
6149 %}
6150 
6151 // Special Registers
6152 
6153 // Method Register
6154 operand inline_cache_RegP(iRegP reg)
6155 %{
6156   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6157   match(reg);
6158   match(iRegPNoSp);
6159   op_cost(0);
6160   format %{ %}
6161   interface(REG_INTER);
6162 %}
6163 
6164 operand interpreter_method_oop_RegP(iRegP reg)
6165 %{
6166   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6167   match(reg);
6168   match(iRegPNoSp);
6169   op_cost(0);
6170   format %{ %}
6171   interface(REG_INTER);
6172 %}
6173 
6174 // Thread Register
6175 operand thread_RegP(iRegP reg)
6176 %{
6177   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6178   match(reg);
6179   op_cost(0);
6180   format %{ %}
6181   interface(REG_INTER);
6182 %}
6183 
6184 operand lr_RegP(iRegP reg)
6185 %{
6186   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6187   match(reg);
6188   op_cost(0);
6189   format %{ %}
6190   interface(REG_INTER);
6191 %}
6192 
6193 //----------Memory Operands----------------------------------------------------
6194 
6195 operand indirect(iRegP reg)
6196 %{
6197   constraint(ALLOC_IN_RC(ptr_reg));
6198   match(reg);
6199   op_cost(0);
6200   format %{ "[$reg]" %}
6201   interface(MEMORY_INTER) %{
6202     base($reg);
6203     index(0xffffffff);
6204     scale(0x0);
6205     disp(0x0);
6206   %}
6207 %}
6208 
6209 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6210 %{
6211   constraint(ALLOC_IN_RC(ptr_reg));
6212   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6213   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6214   op_cost(0);
6215   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6216   interface(MEMORY_INTER) %{
6217     base($reg);
6218     index($ireg);
6219     scale($scale);
6220     disp(0x0);
6221   %}
6222 %}
6223 
6224 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6225 %{
6226   constraint(ALLOC_IN_RC(ptr_reg));
6227   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6228   match(AddP reg (LShiftL lreg scale));
6229   op_cost(0);
6230   format %{ "$reg, $lreg lsl($scale)" %}
6231   interface(MEMORY_INTER) %{
6232     base($reg);
6233     index($lreg);
6234     scale($scale);
6235     disp(0x0);
6236   %}
6237 %}
6238 
6239 operand indIndexI2L(iRegP reg, iRegI ireg)
6240 %{
6241   constraint(ALLOC_IN_RC(ptr_reg));
6242   match(AddP reg (ConvI2L ireg));
6243   op_cost(0);
6244   format %{ "$reg, $ireg, 0, I2L" %}
6245   interface(MEMORY_INTER) %{
6246     base($reg);
6247     index($ireg);
6248     scale(0x0);
6249     disp(0x0);
6250   %}
6251 %}
6252 
6253 operand indIndex(iRegP reg, iRegL lreg)
6254 %{
6255   constraint(ALLOC_IN_RC(ptr_reg));
6256   match(AddP reg lreg);
6257   op_cost(0);
6258   format %{ "$reg, $lreg" %}
6259   interface(MEMORY_INTER) %{
6260     base($reg);
6261     index($lreg);
6262     scale(0x0);
6263     disp(0x0);
6264   %}
6265 %}
6266 
6267 operand indOffI(iRegP reg, immIOffset off)
6268 %{
6269   constraint(ALLOC_IN_RC(ptr_reg));
6270   match(AddP reg off);
6271   op_cost(0);
6272   format %{ "[$reg, $off]" %}
6273   interface(MEMORY_INTER) %{
6274     base($reg);
6275     index(0xffffffff);
6276     scale(0x0);
6277     disp($off);
6278   %}
6279 %}
6280 
6281 operand indOffI4(iRegP reg, immIOffset4 off)
6282 %{
6283   constraint(ALLOC_IN_RC(ptr_reg));
6284   match(AddP reg off);
6285   op_cost(0);
6286   format %{ "[$reg, $off]" %}
6287   interface(MEMORY_INTER) %{
6288     base($reg);
6289     index(0xffffffff);
6290     scale(0x0);
6291     disp($off);
6292   %}
6293 %}
6294 
6295 operand indOffI8(iRegP reg, immIOffset8 off)
6296 %{
6297   constraint(ALLOC_IN_RC(ptr_reg));
6298   match(AddP reg off);
6299   op_cost(0);
6300   format %{ "[$reg, $off]" %}
6301   interface(MEMORY_INTER) %{
6302     base($reg);
6303     index(0xffffffff);
6304     scale(0x0);
6305     disp($off);
6306   %}
6307 %}
6308 
6309 operand indOffI16(iRegP reg, immIOffset16 off)
6310 %{
6311   constraint(ALLOC_IN_RC(ptr_reg));
6312   match(AddP reg off);
6313   op_cost(0);
6314   format %{ "[$reg, $off]" %}
6315   interface(MEMORY_INTER) %{
6316     base($reg);
6317     index(0xffffffff);
6318     scale(0x0);
6319     disp($off);
6320   %}
6321 %}
6322 
6323 operand indOffL(iRegP reg, immLoffset off)
6324 %{
6325   constraint(ALLOC_IN_RC(ptr_reg));
6326   match(AddP reg off);
6327   op_cost(0);
6328   format %{ "[$reg, $off]" %}
6329   interface(MEMORY_INTER) %{
6330     base($reg);
6331     index(0xffffffff);
6332     scale(0x0);
6333     disp($off);
6334   %}
6335 %}
6336 
6337 operand indOffL4(iRegP reg, immLoffset4 off)
6338 %{
6339   constraint(ALLOC_IN_RC(ptr_reg));
6340   match(AddP reg off);
6341   op_cost(0);
6342   format %{ "[$reg, $off]" %}
6343   interface(MEMORY_INTER) %{
6344     base($reg);
6345     index(0xffffffff);
6346     scale(0x0);
6347     disp($off);
6348   %}
6349 %}
6350 
6351 operand indOffL8(iRegP reg, immLoffset8 off)
6352 %{
6353   constraint(ALLOC_IN_RC(ptr_reg));
6354   match(AddP reg off);
6355   op_cost(0);
6356   format %{ "[$reg, $off]" %}
6357   interface(MEMORY_INTER) %{
6358     base($reg);
6359     index(0xffffffff);
6360     scale(0x0);
6361     disp($off);
6362   %}
6363 %}
6364 
6365 operand indOffL16(iRegP reg, immLoffset16 off)
6366 %{
6367   constraint(ALLOC_IN_RC(ptr_reg));
6368   match(AddP reg off);
6369   op_cost(0);
6370   format %{ "[$reg, $off]" %}
6371   interface(MEMORY_INTER) %{
6372     base($reg);
6373     index(0xffffffff);
6374     scale(0x0);
6375     disp($off);
6376   %}
6377 %}
6378 
6379 operand indirectN(iRegN reg)
6380 %{
6381   predicate(Universe::narrow_oop_shift() == 0);
6382   constraint(ALLOC_IN_RC(ptr_reg));
6383   match(DecodeN reg);
6384   op_cost(0);
6385   format %{ "[$reg]\t# narrow" %}
6386   interface(MEMORY_INTER) %{
6387     base($reg);
6388     index(0xffffffff);
6389     scale(0x0);
6390     disp(0x0);
6391   %}
6392 %}
6393 
6394 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6395 %{
6396   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6397   constraint(ALLOC_IN_RC(ptr_reg));
6398   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6399   op_cost(0);
6400   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6401   interface(MEMORY_INTER) %{
6402     base($reg);
6403     index($ireg);
6404     scale($scale);
6405     disp(0x0);
6406   %}
6407 %}
6408 
6409 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6410 %{
6411   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6412   constraint(ALLOC_IN_RC(ptr_reg));
6413   match(AddP (DecodeN reg) (LShiftL lreg scale));
6414   op_cost(0);
6415   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6416   interface(MEMORY_INTER) %{
6417     base($reg);
6418     index($lreg);
6419     scale($scale);
6420     disp(0x0);
6421   %}
6422 %}
6423 
6424 operand indIndexI2LN(iRegN reg, iRegI ireg)
6425 %{
6426   predicate(Universe::narrow_oop_shift() == 0);
6427   constraint(ALLOC_IN_RC(ptr_reg));
6428   match(AddP (DecodeN reg) (ConvI2L ireg));
6429   op_cost(0);
6430   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6431   interface(MEMORY_INTER) %{
6432     base($reg);
6433     index($ireg);
6434     scale(0x0);
6435     disp(0x0);
6436   %}
6437 %}
6438 
6439 operand indIndexN(iRegN reg, iRegL lreg)
6440 %{
6441   predicate(Universe::narrow_oop_shift() == 0);
6442   constraint(ALLOC_IN_RC(ptr_reg));
6443   match(AddP (DecodeN reg) lreg);
6444   op_cost(0);
6445   format %{ "$reg, $lreg\t# narrow" %}
6446   interface(MEMORY_INTER) %{
6447     base($reg);
6448     index($lreg);
6449     scale(0x0);
6450     disp(0x0);
6451   %}
6452 %}
6453 
6454 operand indOffIN(iRegN reg, immIOffset off)
6455 %{
6456   predicate(Universe::narrow_oop_shift() == 0);
6457   constraint(ALLOC_IN_RC(ptr_reg));
6458   match(AddP (DecodeN reg) off);
6459   op_cost(0);
6460   format %{ "[$reg, $off]\t# narrow" %}
6461   interface(MEMORY_INTER) %{
6462     base($reg);
6463     index(0xffffffff);
6464     scale(0x0);
6465     disp($off);
6466   %}
6467 %}
6468 
6469 operand indOffLN(iRegN reg, immLoffset off)
6470 %{
6471   predicate(Universe::narrow_oop_shift() == 0);
6472   constraint(ALLOC_IN_RC(ptr_reg));
6473   match(AddP (DecodeN reg) off);
6474   op_cost(0);
6475   format %{ "[$reg, $off]\t# narrow" %}
6476   interface(MEMORY_INTER) %{
6477     base($reg);
6478     index(0xffffffff);
6479     scale(0x0);
6480     disp($off);
6481   %}
6482 %}
6483 
6484 
6485 
6486 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6487 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6488 %{
6489   constraint(ALLOC_IN_RC(ptr_reg));
6490   match(AddP reg off);
6491   op_cost(0);
6492   format %{ "[$reg, $off]" %}
6493   interface(MEMORY_INTER) %{
6494     base($reg);
6495     index(0xffffffff);
6496     scale(0x0);
6497     disp($off);
6498   %}
6499 %}
6500 
6501 //----------Special Memory Operands--------------------------------------------
6502 // Stack Slot Operand - This operand is used for loading and storing temporary
6503 //                      values on the stack where a match requires a value to
6504 //                      flow through memory.
6505 operand stackSlotP(sRegP reg)
6506 %{
6507   constraint(ALLOC_IN_RC(stack_slots));
6508   op_cost(100);
6509   // No match rule because this operand is only generated in matching
6510   // match(RegP);
6511   format %{ "[$reg]" %}
6512   interface(MEMORY_INTER) %{
6513     base(0x1e);  // RSP
6514     index(0x0);  // No Index
6515     scale(0x0);  // No Scale
6516     disp($reg);  // Stack Offset
6517   %}
6518 %}
6519 
6520 operand stackSlotI(sRegI reg)
6521 %{
6522   constraint(ALLOC_IN_RC(stack_slots));
6523   // No match rule because this operand is only generated in matching
6524   // match(RegI);
6525   format %{ "[$reg]" %}
6526   interface(MEMORY_INTER) %{
6527     base(0x1e);  // RSP
6528     index(0x0);  // No Index
6529     scale(0x0);  // No Scale
6530     disp($reg);  // Stack Offset
6531   %}
6532 %}
6533 
6534 operand stackSlotF(sRegF reg)
6535 %{
6536   constraint(ALLOC_IN_RC(stack_slots));
6537   // No match rule because this operand is only generated in matching
6538   // match(RegF);
6539   format %{ "[$reg]" %}
6540   interface(MEMORY_INTER) %{
6541     base(0x1e);  // RSP
6542     index(0x0);  // No Index
6543     scale(0x0);  // No Scale
6544     disp($reg);  // Stack Offset
6545   %}
6546 %}
6547 
6548 operand stackSlotD(sRegD reg)
6549 %{
6550   constraint(ALLOC_IN_RC(stack_slots));
6551   // No match rule because this operand is only generated in matching
6552   // match(RegD);
6553   format %{ "[$reg]" %}
6554   interface(MEMORY_INTER) %{
6555     base(0x1e);  // RSP
6556     index(0x0);  // No Index
6557     scale(0x0);  // No Scale
6558     disp($reg);  // Stack Offset
6559   %}
6560 %}
6561 
6562 operand stackSlotL(sRegL reg)
6563 %{
6564   constraint(ALLOC_IN_RC(stack_slots));
6565   // No match rule because this operand is only generated in matching
6566   // match(RegL);
6567   format %{ "[$reg]" %}
6568   interface(MEMORY_INTER) %{
6569     base(0x1e);  // RSP
6570     index(0x0);  // No Index
6571     scale(0x0);  // No Scale
6572     disp($reg);  // Stack Offset
6573   %}
6574 %}
6575 
6576 // Operands for expressing Control Flow
6577 // NOTE: Label is a predefined operand which should not be redefined in
6578 //       the AD file. It is generically handled within the ADLC.
6579 
6580 //----------Conditional Branch Operands----------------------------------------
6581 // Comparison Op  - This is the operation of the comparison, and is limited to
6582 //                  the following set of codes:
6583 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6584 //
6585 // Other attributes of the comparison, such as unsignedness, are specified
6586 // by the comparison instruction that sets a condition code flags register.
6587 // That result is represented by a flags operand whose subtype is appropriate
6588 // to the unsignedness (etc.) of the comparison.
6589 //
6590 // Later, the instruction which matches both the Comparison Op (a Bool) and
6591 // the flags (produced by the Cmp) specifies the coding of the comparison op
6592 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6593 
6594 // used for signed integral comparisons and fp comparisons
6595 
6596 operand cmpOp()
6597 %{
6598   match(Bool);
6599 
6600   format %{ "" %}
6601   interface(COND_INTER) %{
6602     equal(0x0, "eq");
6603     not_equal(0x1, "ne");
6604     less(0xb, "lt");
6605     greater_equal(0xa, "ge");
6606     less_equal(0xd, "le");
6607     greater(0xc, "gt");
6608     overflow(0x6, "vs");
6609     no_overflow(0x7, "vc");
6610   %}
6611 %}
6612 
6613 // used for unsigned integral comparisons
6614 
6615 operand cmpOpU()
6616 %{
6617   match(Bool);
6618 
6619   format %{ "" %}
6620   interface(COND_INTER) %{
6621     equal(0x0, "eq");
6622     not_equal(0x1, "ne");
6623     less(0x3, "lo");
6624     greater_equal(0x2, "hs");
6625     less_equal(0x9, "ls");
6626     greater(0x8, "hi");
6627     overflow(0x6, "vs");
6628     no_overflow(0x7, "vc");
6629   %}
6630 %}
6631 
6632 // used for certain integral comparisons which can be
6633 // converted to cbxx or tbxx instructions
6634 
6635 operand cmpOpEqNe()
6636 %{
6637   match(Bool);
6638   match(CmpOp);
6639   op_cost(0);
6640   predicate(n->as_Bool()->_test._test == BoolTest::ne
6641             || n->as_Bool()->_test._test == BoolTest::eq);
6642 
6643   format %{ "" %}
6644   interface(COND_INTER) %{
6645     equal(0x0, "eq");
6646     not_equal(0x1, "ne");
6647     less(0xb, "lt");
6648     greater_equal(0xa, "ge");
6649     less_equal(0xd, "le");
6650     greater(0xc, "gt");
6651     overflow(0x6, "vs");
6652     no_overflow(0x7, "vc");
6653   %}
6654 %}
6655 
6656 // used for certain integral comparisons which can be
6657 // converted to cbxx or tbxx instructions
6658 
6659 operand cmpOpLtGe()
6660 %{
6661   match(Bool);
6662   match(CmpOp);
6663   op_cost(0);
6664 
6665   predicate(n->as_Bool()->_test._test == BoolTest::lt
6666             || n->as_Bool()->_test._test == BoolTest::ge);
6667 
6668   format %{ "" %}
6669   interface(COND_INTER) %{
6670     equal(0x0, "eq");
6671     not_equal(0x1, "ne");
6672     less(0xb, "lt");
6673     greater_equal(0xa, "ge");
6674     less_equal(0xd, "le");
6675     greater(0xc, "gt");
6676     overflow(0x6, "vs");
6677     no_overflow(0x7, "vc");
6678   %}
6679 %}
6680 
6681 // used for certain unsigned integral comparisons which can be
6682 // converted to cbxx or tbxx instructions
6683 
6684 operand cmpOpUEqNeLtGe()
6685 %{
6686   match(Bool);
6687   match(CmpOp);
6688   op_cost(0);
6689 
6690   predicate(n->as_Bool()->_test._test == BoolTest::eq
6691             || n->as_Bool()->_test._test == BoolTest::ne
6692             || n->as_Bool()->_test._test == BoolTest::lt
6693             || n->as_Bool()->_test._test == BoolTest::ge);
6694 
6695   format %{ "" %}
6696   interface(COND_INTER) %{
6697     equal(0x0, "eq");
6698     not_equal(0x1, "ne");
6699     less(0xb, "lt");
6700     greater_equal(0xa, "ge");
6701     less_equal(0xd, "le");
6702     greater(0xc, "gt");
6703     overflow(0x6, "vs");
6704     no_overflow(0x7, "vc");
6705   %}
6706 %}
6707 
6708 // Special operand allowing long args to int ops to be truncated for free
6709 
6710 operand iRegL2I(iRegL reg) %{
6711 
6712   op_cost(0);
6713 
6714   match(ConvL2I reg);
6715 
6716   format %{ "l2i($reg)" %}
6717 
6718   interface(REG_INTER)
6719 %}
6720 
6721 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6722 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6723 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6724 
6725 //----------OPERAND CLASSES----------------------------------------------------
6726 // Operand Classes are groups of operands that are used as to simplify
6727 // instruction definitions by not requiring the AD writer to specify
6728 // separate instructions for every form of operand when the
6729 // instruction accepts multiple operand types with the same basic
6730 // encoding and format. The classic case of this is memory operands.
6731 
6732 // memory is used to define read/write location for load/store
6733 // instruction defs. we can turn a memory op into an Address
6734 
6735 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6736                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6737 
6738 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6739 // operations. it allows the src to be either an iRegI or a (ConvL2I
6740 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6741 // can be elided because the 32-bit instruction will just employ the
6742 // lower 32 bits anyway.
6743 //
6744 // n.b. this does not elide all L2I conversions. if the truncated
6745 // value is consumed by more than one operation then the ConvL2I
6746 // cannot be bundled into the consuming nodes so an l2i gets planted
6747 // (actually a movw $dst $src) and the downstream instructions consume
6748 // the result of the l2i as an iRegI input. That's a shame since the
6749 // movw is actually redundant but its not too costly.
6750 
6751 opclass iRegIorL2I(iRegI, iRegL2I);
6752 
6753 //----------PIPELINE-----------------------------------------------------------
6754 // Rules which define the behavior of the target architectures pipeline.
6755 
6756 // For specific pipelines, eg A53, define the stages of that pipeline
6757 //pipe_desc(ISS, EX1, EX2, WR);
6758 #define ISS S0
6759 #define EX1 S1
6760 #define EX2 S2
6761 #define WR  S3
6762 
6763 // Integer ALU reg operation
6764 pipeline %{
6765 
6766 attributes %{
6767   // ARM instructions are of fixed length
6768   fixed_size_instructions;        // Fixed size instructions TODO does
6769   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
6770   // ARM instructions come in 32-bit word units
6771   instruction_unit_size = 4;         // An instruction is 4 bytes long
6772   instruction_fetch_unit_size = 64;  // The processor fetches one line
6773   instruction_fetch_units = 1;       // of 64 bytes
6774 
6775   // List of nop instructions
6776   nops( MachNop );
6777 %}
6778 
6779 // We don't use an actual pipeline model so don't care about resources
6780 // or description. we do use pipeline classes to introduce fixed
6781 // latencies
6782 
6783 //----------RESOURCES----------------------------------------------------------
6784 // Resources are the functional units available to the machine
6785 
6786 resources( INS0, INS1, INS01 = INS0 | INS1,
6787            ALU0, ALU1, ALU = ALU0 | ALU1,
6788            MAC,
6789            DIV,
6790            BRANCH,
6791            LDST,
6792            NEON_FP);
6793 
6794 //----------PIPELINE DESCRIPTION-----------------------------------------------
6795 // Pipeline Description specifies the stages in the machine's pipeline
6796 
6797 // Define the pipeline as a generic 6 stage pipeline
6798 pipe_desc(S0, S1, S2, S3, S4, S5);
6799 
6800 //----------PIPELINE CLASSES---------------------------------------------------
6801 // Pipeline Classes describe the stages in which input and output are
6802 // referenced by the hardware pipeline.
6803 
6804 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6805 %{
6806   single_instruction;
6807   src1   : S1(read);
6808   src2   : S2(read);
6809   dst    : S5(write);
6810   INS01  : ISS;
6811   NEON_FP : S5;
6812 %}
6813 
6814 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6815 %{
6816   single_instruction;
6817   src1   : S1(read);
6818   src2   : S2(read);
6819   dst    : S5(write);
6820   INS01  : ISS;
6821   NEON_FP : S5;
6822 %}
6823 
6824 pipe_class fp_uop_s(vRegF dst, vRegF src)
6825 %{
6826   single_instruction;
6827   src    : S1(read);
6828   dst    : S5(write);
6829   INS01  : ISS;
6830   NEON_FP : S5;
6831 %}
6832 
6833 pipe_class fp_uop_d(vRegD dst, vRegD src)
6834 %{
6835   single_instruction;
6836   src    : S1(read);
6837   dst    : S5(write);
6838   INS01  : ISS;
6839   NEON_FP : S5;
6840 %}
6841 
6842 pipe_class fp_d2f(vRegF dst, vRegD src)
6843 %{
6844   single_instruction;
6845   src    : S1(read);
6846   dst    : S5(write);
6847   INS01  : ISS;
6848   NEON_FP : S5;
6849 %}
6850 
6851 pipe_class fp_f2d(vRegD dst, vRegF src)
6852 %{
6853   single_instruction;
6854   src    : S1(read);
6855   dst    : S5(write);
6856   INS01  : ISS;
6857   NEON_FP : S5;
6858 %}
6859 
6860 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6861 %{
6862   single_instruction;
6863   src    : S1(read);
6864   dst    : S5(write);
6865   INS01  : ISS;
6866   NEON_FP : S5;
6867 %}
6868 
6869 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6870 %{
6871   single_instruction;
6872   src    : S1(read);
6873   dst    : S5(write);
6874   INS01  : ISS;
6875   NEON_FP : S5;
6876 %}
6877 
6878 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
6879 %{
6880   single_instruction;
6881   src    : S1(read);
6882   dst    : S5(write);
6883   INS01  : ISS;
6884   NEON_FP : S5;
6885 %}
6886 
6887 pipe_class fp_l2f(vRegF dst, iRegL src)
6888 %{
6889   single_instruction;
6890   src    : S1(read);
6891   dst    : S5(write);
6892   INS01  : ISS;
6893   NEON_FP : S5;
6894 %}
6895 
6896 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
6897 %{
6898   single_instruction;
6899   src    : S1(read);
6900   dst    : S5(write);
6901   INS01  : ISS;
6902   NEON_FP : S5;
6903 %}
6904 
6905 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
6906 %{
6907   single_instruction;
6908   src    : S1(read);
6909   dst    : S5(write);
6910   INS01  : ISS;
6911   NEON_FP : S5;
6912 %}
6913 
6914 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
6915 %{
6916   single_instruction;
6917   src    : S1(read);
6918   dst    : S5(write);
6919   INS01  : ISS;
6920   NEON_FP : S5;
6921 %}
6922 
6923 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
6924 %{
6925   single_instruction;
6926   src    : S1(read);
6927   dst    : S5(write);
6928   INS01  : ISS;
6929   NEON_FP : S5;
6930 %}
6931 
6932 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
6933 %{
6934   single_instruction;
6935   src1   : S1(read);
6936   src2   : S2(read);
6937   dst    : S5(write);
6938   INS0   : ISS;
6939   NEON_FP : S5;
6940 %}
6941 
6942 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
6943 %{
6944   single_instruction;
6945   src1   : S1(read);
6946   src2   : S2(read);
6947   dst    : S5(write);
6948   INS0   : ISS;
6949   NEON_FP : S5;
6950 %}
6951 
6952 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
6953 %{
6954   single_instruction;
6955   cr     : S1(read);
6956   src1   : S1(read);
6957   src2   : S1(read);
6958   dst    : S3(write);
6959   INS01  : ISS;
6960   NEON_FP : S3;
6961 %}
6962 
6963 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
6964 %{
6965   single_instruction;
6966   cr     : S1(read);
6967   src1   : S1(read);
6968   src2   : S1(read);
6969   dst    : S3(write);
6970   INS01  : ISS;
6971   NEON_FP : S3;
6972 %}
6973 
6974 pipe_class fp_imm_s(vRegF dst)
6975 %{
6976   single_instruction;
6977   dst    : S3(write);
6978   INS01  : ISS;
6979   NEON_FP : S3;
6980 %}
6981 
6982 pipe_class fp_imm_d(vRegD dst)
6983 %{
6984   single_instruction;
6985   dst    : S3(write);
6986   INS01  : ISS;
6987   NEON_FP : S3;
6988 %}
6989 
6990 pipe_class fp_load_constant_s(vRegF dst)
6991 %{
6992   single_instruction;
6993   dst    : S4(write);
6994   INS01  : ISS;
6995   NEON_FP : S4;
6996 %}
6997 
6998 pipe_class fp_load_constant_d(vRegD dst)
6999 %{
7000   single_instruction;
7001   dst    : S4(write);
7002   INS01  : ISS;
7003   NEON_FP : S4;
7004 %}
7005 
7006 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7007 %{
7008   single_instruction;
7009   dst    : S5(write);
7010   src1   : S1(read);
7011   src2   : S1(read);
7012   INS01  : ISS;
7013   NEON_FP : S5;
7014 %}
7015 
7016 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7017 %{
7018   single_instruction;
7019   dst    : S5(write);
7020   src1   : S1(read);
7021   src2   : S1(read);
7022   INS0   : ISS;
7023   NEON_FP : S5;
7024 %}
7025 
7026 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7027 %{
7028   single_instruction;
7029   dst    : S5(write);
7030   src1   : S1(read);
7031   src2   : S1(read);
7032   dst    : S1(read);
7033   INS01  : ISS;
7034   NEON_FP : S5;
7035 %}
7036 
7037 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7038 %{
7039   single_instruction;
7040   dst    : S5(write);
7041   src1   : S1(read);
7042   src2   : S1(read);
7043   dst    : S1(read);
7044   INS0   : ISS;
7045   NEON_FP : S5;
7046 %}
7047 
7048 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7049 %{
7050   single_instruction;
7051   dst    : S4(write);
7052   src1   : S2(read);
7053   src2   : S2(read);
7054   INS01  : ISS;
7055   NEON_FP : S4;
7056 %}
7057 
7058 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7059 %{
7060   single_instruction;
7061   dst    : S4(write);
7062   src1   : S2(read);
7063   src2   : S2(read);
7064   INS0   : ISS;
7065   NEON_FP : S4;
7066 %}
7067 
7068 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7069 %{
7070   single_instruction;
7071   dst    : S3(write);
7072   src1   : S2(read);
7073   src2   : S2(read);
7074   INS01  : ISS;
7075   NEON_FP : S3;
7076 %}
7077 
7078 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7079 %{
7080   single_instruction;
7081   dst    : S3(write);
7082   src1   : S2(read);
7083   src2   : S2(read);
7084   INS0   : ISS;
7085   NEON_FP : S3;
7086 %}
7087 
7088 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7089 %{
7090   single_instruction;
7091   dst    : S3(write);
7092   src    : S1(read);
7093   shift  : S1(read);
7094   INS01  : ISS;
7095   NEON_FP : S3;
7096 %}
7097 
7098 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7099 %{
7100   single_instruction;
7101   dst    : S3(write);
7102   src    : S1(read);
7103   shift  : S1(read);
7104   INS0   : ISS;
7105   NEON_FP : S3;
7106 %}
7107 
7108 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7109 %{
7110   single_instruction;
7111   dst    : S3(write);
7112   src    : S1(read);
7113   INS01  : ISS;
7114   NEON_FP : S3;
7115 %}
7116 
7117 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7118 %{
7119   single_instruction;
7120   dst    : S3(write);
7121   src    : S1(read);
7122   INS0   : ISS;
7123   NEON_FP : S3;
7124 %}
7125 
7126 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7127 %{
7128   single_instruction;
7129   dst    : S5(write);
7130   src1   : S1(read);
7131   src2   : S1(read);
7132   INS01  : ISS;
7133   NEON_FP : S5;
7134 %}
7135 
7136 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7137 %{
7138   single_instruction;
7139   dst    : S5(write);
7140   src1   : S1(read);
7141   src2   : S1(read);
7142   INS0   : ISS;
7143   NEON_FP : S5;
7144 %}
7145 
7146 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7147 %{
7148   single_instruction;
7149   dst    : S5(write);
7150   src1   : S1(read);
7151   src2   : S1(read);
7152   INS0   : ISS;
7153   NEON_FP : S5;
7154 %}
7155 
7156 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7157 %{
7158   single_instruction;
7159   dst    : S5(write);
7160   src1   : S1(read);
7161   src2   : S1(read);
7162   INS0   : ISS;
7163   NEON_FP : S5;
7164 %}
7165 
7166 pipe_class vsqrt_fp128(vecX dst, vecX src)
7167 %{
7168   single_instruction;
7169   dst    : S5(write);
7170   src    : S1(read);
7171   INS0   : ISS;
7172   NEON_FP : S5;
7173 %}
7174 
7175 pipe_class vunop_fp64(vecD dst, vecD src)
7176 %{
7177   single_instruction;
7178   dst    : S5(write);
7179   src    : S1(read);
7180   INS01  : ISS;
7181   NEON_FP : S5;
7182 %}
7183 
7184 pipe_class vunop_fp128(vecX dst, vecX src)
7185 %{
7186   single_instruction;
7187   dst    : S5(write);
7188   src    : S1(read);
7189   INS0   : ISS;
7190   NEON_FP : S5;
7191 %}
7192 
7193 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7194 %{
7195   single_instruction;
7196   dst    : S3(write);
7197   src    : S1(read);
7198   INS01  : ISS;
7199   NEON_FP : S3;
7200 %}
7201 
7202 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7203 %{
7204   single_instruction;
7205   dst    : S3(write);
7206   src    : S1(read);
7207   INS01  : ISS;
7208   NEON_FP : S3;
7209 %}
7210 
7211 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7212 %{
7213   single_instruction;
7214   dst    : S3(write);
7215   src    : S1(read);
7216   INS01  : ISS;
7217   NEON_FP : S3;
7218 %}
7219 
7220 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7221 %{
7222   single_instruction;
7223   dst    : S3(write);
7224   src    : S1(read);
7225   INS01  : ISS;
7226   NEON_FP : S3;
7227 %}
7228 
7229 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7230 %{
7231   single_instruction;
7232   dst    : S3(write);
7233   src    : S1(read);
7234   INS01  : ISS;
7235   NEON_FP : S3;
7236 %}
7237 
7238 pipe_class vmovi_reg_imm64(vecD dst)
7239 %{
7240   single_instruction;
7241   dst    : S3(write);
7242   INS01  : ISS;
7243   NEON_FP : S3;
7244 %}
7245 
7246 pipe_class vmovi_reg_imm128(vecX dst)
7247 %{
7248   single_instruction;
7249   dst    : S3(write);
7250   INS0   : ISS;
7251   NEON_FP : S3;
7252 %}
7253 
7254 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7255 %{
7256   single_instruction;
7257   dst    : S5(write);
7258   mem    : ISS(read);
7259   INS01  : ISS;
7260   NEON_FP : S3;
7261 %}
7262 
7263 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7264 %{
7265   single_instruction;
7266   dst    : S5(write);
7267   mem    : ISS(read);
7268   INS01  : ISS;
7269   NEON_FP : S3;
7270 %}
7271 
7272 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7273 %{
7274   single_instruction;
7275   mem    : ISS(read);
7276   src    : S2(read);
7277   INS01  : ISS;
7278   NEON_FP : S3;
7279 %}
7280 
7281 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7282 %{
7283   single_instruction;
7284   mem    : ISS(read);
7285   src    : S2(read);
7286   INS01  : ISS;
7287   NEON_FP : S3;
7288 %}
7289 
7290 //------- Integer ALU operations --------------------------
7291 
7292 // Integer ALU reg-reg operation
7293 // Operands needed in EX1, result generated in EX2
7294 // Eg.  ADD     x0, x1, x2
7295 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7296 %{
7297   single_instruction;
7298   dst    : EX2(write);
7299   src1   : EX1(read);
7300   src2   : EX1(read);
7301   INS01  : ISS; // Dual issue as instruction 0 or 1
7302   ALU    : EX2;
7303 %}
7304 
7305 // Integer ALU reg-reg operation with constant shift
7306 // Shifted register must be available in LATE_ISS instead of EX1
7307 // Eg.  ADD     x0, x1, x2, LSL #2
7308 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7309 %{
7310   single_instruction;
7311   dst    : EX2(write);
7312   src1   : EX1(read);
7313   src2   : ISS(read);
7314   INS01  : ISS;
7315   ALU    : EX2;
7316 %}
7317 
7318 // Integer ALU reg operation with constant shift
7319 // Eg.  LSL     x0, x1, #shift
7320 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7321 %{
7322   single_instruction;
7323   dst    : EX2(write);
7324   src1   : ISS(read);
7325   INS01  : ISS;
7326   ALU    : EX2;
7327 %}
7328 
7329 // Integer ALU reg-reg operation with variable shift
7330 // Both operands must be available in LATE_ISS instead of EX1
7331 // Result is available in EX1 instead of EX2
7332 // Eg.  LSLV    x0, x1, x2
7333 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7334 %{
7335   single_instruction;
7336   dst    : EX1(write);
7337   src1   : ISS(read);
7338   src2   : ISS(read);
7339   INS01  : ISS;
7340   ALU    : EX1;
7341 %}
7342 
7343 // Integer ALU reg-reg operation with extract
7344 // As for _vshift above, but result generated in EX2
7345 // Eg.  EXTR    x0, x1, x2, #N
7346 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7347 %{
7348   single_instruction;
7349   dst    : EX2(write);
7350   src1   : ISS(read);
7351   src2   : ISS(read);
7352   INS1   : ISS; // Can only dual issue as Instruction 1
7353   ALU    : EX1;
7354 %}
7355 
7356 // Integer ALU reg operation
7357 // Eg.  NEG     x0, x1
7358 pipe_class ialu_reg(iRegI dst, iRegI src)
7359 %{
7360   single_instruction;
7361   dst    : EX2(write);
7362   src    : EX1(read);
7363   INS01  : ISS;
7364   ALU    : EX2;
7365 %}
7366 
7367 // Integer ALU reg mmediate operation
7368 // Eg.  ADD     x0, x1, #N
7369 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7370 %{
7371   single_instruction;
7372   dst    : EX2(write);
7373   src1   : EX1(read);
7374   INS01  : ISS;
7375   ALU    : EX2;
7376 %}
7377 
7378 // Integer ALU immediate operation (no source operands)
7379 // Eg.  MOV     x0, #N
7380 pipe_class ialu_imm(iRegI dst)
7381 %{
7382   single_instruction;
7383   dst    : EX1(write);
7384   INS01  : ISS;
7385   ALU    : EX1;
7386 %}
7387 
7388 //------- Compare operation -------------------------------
7389 
7390 // Compare reg-reg
7391 // Eg.  CMP     x0, x1
7392 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7393 %{
7394   single_instruction;
7395 //  fixed_latency(16);
7396   cr     : EX2(write);
7397   op1    : EX1(read);
7398   op2    : EX1(read);
7399   INS01  : ISS;
7400   ALU    : EX2;
7401 %}
7402 
7403 // Compare reg-reg
7404 // Eg.  CMP     x0, #N
7405 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7406 %{
7407   single_instruction;
7408 //  fixed_latency(16);
7409   cr     : EX2(write);
7410   op1    : EX1(read);
7411   INS01  : ISS;
7412   ALU    : EX2;
7413 %}
7414 
7415 //------- Conditional instructions ------------------------
7416 
7417 // Conditional no operands
7418 // Eg.  CSINC   x0, zr, zr, <cond>
7419 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7420 %{
7421   single_instruction;
7422   cr     : EX1(read);
7423   dst    : EX2(write);
7424   INS01  : ISS;
7425   ALU    : EX2;
7426 %}
7427 
7428 // Conditional 2 operand
7429 // EG.  CSEL    X0, X1, X2, <cond>
7430 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7431 %{
7432   single_instruction;
7433   cr     : EX1(read);
7434   src1   : EX1(read);
7435   src2   : EX1(read);
7436   dst    : EX2(write);
7437   INS01  : ISS;
7438   ALU    : EX2;
7439 %}
7440 
7441 // Conditional 2 operand
7442 // EG.  CSEL    X0, X1, X2, <cond>
7443 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7444 %{
7445   single_instruction;
7446   cr     : EX1(read);
7447   src    : EX1(read);
7448   dst    : EX2(write);
7449   INS01  : ISS;
7450   ALU    : EX2;
7451 %}
7452 
7453 //------- Multiply pipeline operations --------------------
7454 
7455 // Multiply reg-reg
7456 // Eg.  MUL     w0, w1, w2
7457 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7458 %{
7459   single_instruction;
7460   dst    : WR(write);
7461   src1   : ISS(read);
7462   src2   : ISS(read);
7463   INS01  : ISS;
7464   MAC    : WR;
7465 %}
7466 
7467 // Multiply accumulate
7468 // Eg.  MADD    w0, w1, w2, w3
7469 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7470 %{
7471   single_instruction;
7472   dst    : WR(write);
7473   src1   : ISS(read);
7474   src2   : ISS(read);
7475   src3   : ISS(read);
7476   INS01  : ISS;
7477   MAC    : WR;
7478 %}
7479 
7480 // Eg.  MUL     w0, w1, w2
7481 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7482 %{
7483   single_instruction;
7484   fixed_latency(3); // Maximum latency for 64 bit mul
7485   dst    : WR(write);
7486   src1   : ISS(read);
7487   src2   : ISS(read);
7488   INS01  : ISS;
7489   MAC    : WR;
7490 %}
7491 
7492 // Multiply accumulate
7493 // Eg.  MADD    w0, w1, w2, w3
7494 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7495 %{
7496   single_instruction;
7497   fixed_latency(3); // Maximum latency for 64 bit mul
7498   dst    : WR(write);
7499   src1   : ISS(read);
7500   src2   : ISS(read);
7501   src3   : ISS(read);
7502   INS01  : ISS;
7503   MAC    : WR;
7504 %}
7505 
7506 //------- Divide pipeline operations --------------------
7507 
7508 // Eg.  SDIV    w0, w1, w2
7509 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7510 %{
7511   single_instruction;
7512   fixed_latency(8); // Maximum latency for 32 bit divide
7513   dst    : WR(write);
7514   src1   : ISS(read);
7515   src2   : ISS(read);
7516   INS0   : ISS; // Can only dual issue as instruction 0
7517   DIV    : WR;
7518 %}
7519 
7520 // Eg.  SDIV    x0, x1, x2
7521 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7522 %{
7523   single_instruction;
7524   fixed_latency(16); // Maximum latency for 64 bit divide
7525   dst    : WR(write);
7526   src1   : ISS(read);
7527   src2   : ISS(read);
7528   INS0   : ISS; // Can only dual issue as instruction 0
7529   DIV    : WR;
7530 %}
7531 
7532 //------- Load pipeline operations ------------------------
7533 
7534 // Load - prefetch
7535 // Eg.  PFRM    <mem>
7536 pipe_class iload_prefetch(memory mem)
7537 %{
7538   single_instruction;
7539   mem    : ISS(read);
7540   INS01  : ISS;
7541   LDST   : WR;
7542 %}
7543 
7544 // Load - reg, mem
7545 // Eg.  LDR     x0, <mem>
7546 pipe_class iload_reg_mem(iRegI dst, memory mem)
7547 %{
7548   single_instruction;
7549   dst    : WR(write);
7550   mem    : ISS(read);
7551   INS01  : ISS;
7552   LDST   : WR;
7553 %}
7554 
7555 // Load - reg, reg
7556 // Eg.  LDR     x0, [sp, x1]
7557 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7558 %{
7559   single_instruction;
7560   dst    : WR(write);
7561   src    : ISS(read);
7562   INS01  : ISS;
7563   LDST   : WR;
7564 %}
7565 
7566 //------- Store pipeline operations -----------------------
7567 
7568 // Store - zr, mem
7569 // Eg.  STR     zr, <mem>
7570 pipe_class istore_mem(memory mem)
7571 %{
7572   single_instruction;
7573   mem    : ISS(read);
7574   INS01  : ISS;
7575   LDST   : WR;
7576 %}
7577 
7578 // Store - reg, mem
7579 // Eg.  STR     x0, <mem>
7580 pipe_class istore_reg_mem(iRegI src, memory mem)
7581 %{
7582   single_instruction;
7583   mem    : ISS(read);
7584   src    : EX2(read);
7585   INS01  : ISS;
7586   LDST   : WR;
7587 %}
7588 
7589 // Store - reg, reg
7590 // Eg. STR      x0, [sp, x1]
7591 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7592 %{
7593   single_instruction;
7594   dst    : ISS(read);
7595   src    : EX2(read);
7596   INS01  : ISS;
7597   LDST   : WR;
7598 %}
7599 
7600 //------- Store pipeline operations -----------------------
7601 
7602 // Branch
7603 pipe_class pipe_branch()
7604 %{
7605   single_instruction;
7606   INS01  : ISS;
7607   BRANCH : EX1;
7608 %}
7609 
7610 // Conditional branch
7611 pipe_class pipe_branch_cond(rFlagsReg cr)
7612 %{
7613   single_instruction;
7614   cr     : EX1(read);
7615   INS01  : ISS;
7616   BRANCH : EX1;
7617 %}
7618 
7619 // Compare & Branch
7620 // EG.  CBZ/CBNZ
7621 pipe_class pipe_cmp_branch(iRegI op1)
7622 %{
7623   single_instruction;
7624   op1    : EX1(read);
7625   INS01  : ISS;
7626   BRANCH : EX1;
7627 %}
7628 
7629 //------- Synchronisation operations ----------------------
7630 
7631 // Any operation requiring serialization.
7632 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7633 pipe_class pipe_serial()
7634 %{
7635   single_instruction;
7636   force_serialization;
7637   fixed_latency(16);
7638   INS01  : ISS(2); // Cannot dual issue with any other instruction
7639   LDST   : WR;
7640 %}
7641 
7642 // Generic big/slow expanded idiom - also serialized
7643 pipe_class pipe_slow()
7644 %{
7645   instruction_count(10);
7646   multiple_bundles;
7647   force_serialization;
7648   fixed_latency(16);
7649   INS01  : ISS(2); // Cannot dual issue with any other instruction
7650   LDST   : WR;
7651 %}
7652 
7653 // Empty pipeline class
7654 pipe_class pipe_class_empty()
7655 %{
7656   single_instruction;
7657   fixed_latency(0);
7658 %}
7659 
7660 // Default pipeline class.
7661 pipe_class pipe_class_default()
7662 %{
7663   single_instruction;
7664   fixed_latency(2);
7665 %}
7666 
7667 // Pipeline class for compares.
7668 pipe_class pipe_class_compare()
7669 %{
7670   single_instruction;
7671   fixed_latency(16);
7672 %}
7673 
7674 // Pipeline class for memory operations.
7675 pipe_class pipe_class_memory()
7676 %{
7677   single_instruction;
7678   fixed_latency(16);
7679 %}
7680 
7681 // Pipeline class for call.
7682 pipe_class pipe_class_call()
7683 %{
7684   single_instruction;
7685   fixed_latency(100);
7686 %}
7687 
7688 // Define the class for the Nop node.
7689 define %{
7690    MachNop = pipe_class_empty;
7691 %}
7692 
7693 %}
7694 //----------INSTRUCTIONS-------------------------------------------------------
7695 //
7696 // match      -- States which machine-independent subtree may be replaced
7697 //               by this instruction.
7698 // ins_cost   -- The estimated cost of this instruction is used by instruction
7699 //               selection to identify a minimum cost tree of machine
7700 //               instructions that matches a tree of machine-independent
7701 //               instructions.
7702 // format     -- A string providing the disassembly for this instruction.
7703 //               The value of an instruction's operand may be inserted
7704 //               by referring to it with a '$' prefix.
7705 // opcode     -- Three instruction opcodes may be provided.  These are referred
7706 //               to within an encode class as $primary, $secondary, and $tertiary
7707 //               rrspectively.  The primary opcode is commonly used to
7708 //               indicate the type of machine instruction, while secondary
7709 //               and tertiary are often used for prefix options or addressing
7710 //               modes.
7711 // ins_encode -- A list of encode classes with parameters. The encode class
7712 //               name must have been defined in an 'enc_class' specification
7713 //               in the encode section of the architecture description.
7714 
7715 // ============================================================================
7716 // Memory (Load/Store) Instructions
7717 
7718 // Load Instructions
7719 
7720 // Load Byte (8 bit signed)
7721 instruct loadB(iRegINoSp dst, memory mem)
7722 %{
7723   match(Set dst (LoadB mem));
7724   predicate(!needs_acquiring_load(n));
7725 
7726   ins_cost(4 * INSN_COST);
7727   format %{ "ldrsbw  $dst, $mem\t# byte" %}
7728 
7729   ins_encode(aarch64_enc_ldrsbw(dst, mem));
7730 
7731   ins_pipe(iload_reg_mem);
7732 %}
7733 
7734 // Load Byte (8 bit signed) into long
7735 instruct loadB2L(iRegLNoSp dst, memory mem)
7736 %{
7737   match(Set dst (ConvI2L (LoadB mem)));
7738   predicate(!needs_acquiring_load(n->in(1)));
7739 
7740   ins_cost(4 * INSN_COST);
7741   format %{ "ldrsb  $dst, $mem\t# byte" %}
7742 
7743   ins_encode(aarch64_enc_ldrsb(dst, mem));
7744 
7745   ins_pipe(iload_reg_mem);
7746 %}
7747 
7748 // Load Byte (8 bit unsigned)
7749 instruct loadUB(iRegINoSp dst, memory mem)
7750 %{
7751   match(Set dst (LoadUB mem));
7752   predicate(!needs_acquiring_load(n));
7753 
7754   ins_cost(4 * INSN_COST);
7755   format %{ "ldrbw  $dst, $mem\t# byte" %}
7756 
7757   ins_encode(aarch64_enc_ldrb(dst, mem));
7758 
7759   ins_pipe(iload_reg_mem);
7760 %}
7761 
7762 // Load Byte (8 bit unsigned) into long
7763 instruct loadUB2L(iRegLNoSp dst, memory mem)
7764 %{
7765   match(Set dst (ConvI2L (LoadUB mem)));
7766   predicate(!needs_acquiring_load(n->in(1)));
7767 
7768   ins_cost(4 * INSN_COST);
7769   format %{ "ldrb  $dst, $mem\t# byte" %}
7770 
7771   ins_encode(aarch64_enc_ldrb(dst, mem));
7772 
7773   ins_pipe(iload_reg_mem);
7774 %}
7775 
7776 // Load Short (16 bit signed)
7777 instruct loadS(iRegINoSp dst, memory mem)
7778 %{
7779   match(Set dst (LoadS mem));
7780   predicate(!needs_acquiring_load(n));
7781 
7782   ins_cost(4 * INSN_COST);
7783   format %{ "ldrshw  $dst, $mem\t# short" %}
7784 
7785   ins_encode(aarch64_enc_ldrshw(dst, mem));
7786 
7787   ins_pipe(iload_reg_mem);
7788 %}
7789 
7790 // Load Short (16 bit signed) into long
7791 instruct loadS2L(iRegLNoSp dst, memory mem)
7792 %{
7793   match(Set dst (ConvI2L (LoadS mem)));
7794   predicate(!needs_acquiring_load(n->in(1)));
7795 
7796   ins_cost(4 * INSN_COST);
7797   format %{ "ldrsh  $dst, $mem\t# short" %}
7798 
7799   ins_encode(aarch64_enc_ldrsh(dst, mem));
7800 
7801   ins_pipe(iload_reg_mem);
7802 %}
7803 
7804 // Load Char (16 bit unsigned)
7805 instruct loadUS(iRegINoSp dst, memory mem)
7806 %{
7807   match(Set dst (LoadUS mem));
7808   predicate(!needs_acquiring_load(n));
7809 
7810   ins_cost(4 * INSN_COST);
7811   format %{ "ldrh  $dst, $mem\t# short" %}
7812 
7813   ins_encode(aarch64_enc_ldrh(dst, mem));
7814 
7815   ins_pipe(iload_reg_mem);
7816 %}
7817 
7818 // Load Short/Char (16 bit unsigned) into long
7819 instruct loadUS2L(iRegLNoSp dst, memory mem)
7820 %{
7821   match(Set dst (ConvI2L (LoadUS mem)));
7822   predicate(!needs_acquiring_load(n->in(1)));
7823 
7824   ins_cost(4 * INSN_COST);
7825   format %{ "ldrh  $dst, $mem\t# short" %}
7826 
7827   ins_encode(aarch64_enc_ldrh(dst, mem));
7828 
7829   ins_pipe(iload_reg_mem);
7830 %}
7831 
7832 // Load Integer (32 bit signed)
7833 instruct loadI(iRegINoSp dst, memory mem)
7834 %{
7835   match(Set dst (LoadI mem));
7836   predicate(!needs_acquiring_load(n));
7837 
7838   ins_cost(4 * INSN_COST);
7839   format %{ "ldrw  $dst, $mem\t# int" %}
7840 
7841   ins_encode(aarch64_enc_ldrw(dst, mem));
7842 
7843   ins_pipe(iload_reg_mem);
7844 %}
7845 
7846 // Load Integer (32 bit signed) into long
7847 instruct loadI2L(iRegLNoSp dst, memory mem)
7848 %{
7849   match(Set dst (ConvI2L (LoadI mem)));
7850   predicate(!needs_acquiring_load(n->in(1)));
7851 
7852   ins_cost(4 * INSN_COST);
7853   format %{ "ldrsw  $dst, $mem\t# int" %}
7854 
7855   ins_encode(aarch64_enc_ldrsw(dst, mem));
7856 
7857   ins_pipe(iload_reg_mem);
7858 %}
7859 
7860 // Load Integer (32 bit unsigned) into long
7861 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7862 %{
7863   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7864   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7865 
7866   ins_cost(4 * INSN_COST);
7867   format %{ "ldrw  $dst, $mem\t# int" %}
7868 
7869   ins_encode(aarch64_enc_ldrw(dst, mem));
7870 
7871   ins_pipe(iload_reg_mem);
7872 %}
7873 
7874 // Load Long (64 bit signed)
7875 instruct loadL(iRegLNoSp dst, memory mem)
7876 %{
7877   match(Set dst (LoadL mem));
7878   predicate(!needs_acquiring_load(n));
7879 
7880   ins_cost(4 * INSN_COST);
7881   format %{ "ldr  $dst, $mem\t# int" %}
7882 
7883   ins_encode(aarch64_enc_ldr(dst, mem));
7884 
7885   ins_pipe(iload_reg_mem);
7886 %}
7887 
7888 // Load Range
7889 instruct loadRange(iRegINoSp dst, memory mem)
7890 %{
7891   match(Set dst (LoadRange mem));
7892 
7893   ins_cost(4 * INSN_COST);
7894   format %{ "ldrw  $dst, $mem\t# range" %}
7895 
7896   ins_encode(aarch64_enc_ldrw(dst, mem));
7897 
7898   ins_pipe(iload_reg_mem);
7899 %}
7900 
7901 // Load Pointer
7902 instruct loadP(iRegPNoSp dst, memory mem)
7903 %{
7904   match(Set dst (LoadP mem));
7905   predicate(!needs_acquiring_load(n));
7906 
7907   ins_cost(4 * INSN_COST);
7908   format %{ "ldr  $dst, $mem\t# ptr" %}
7909 
7910   ins_encode(aarch64_enc_ldr(dst, mem));
7911 
7912   ins_pipe(iload_reg_mem);
7913 %}
7914 
7915 // Load Compressed Pointer
7916 instruct loadN(iRegNNoSp dst, memory mem)
7917 %{
7918   match(Set dst (LoadN mem));
7919   predicate(!needs_acquiring_load(n));
7920 
7921   ins_cost(4 * INSN_COST);
7922   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
7923 
7924   ins_encode(aarch64_enc_ldrw(dst, mem));
7925 
7926   ins_pipe(iload_reg_mem);
7927 %}
7928 
7929 // Load Klass Pointer
7930 instruct loadKlass(iRegPNoSp dst, memory mem)
7931 %{
7932   match(Set dst (LoadKlass mem));
7933   predicate(!needs_acquiring_load(n));
7934 
7935   ins_cost(4 * INSN_COST);
7936   format %{ "ldr  $dst, $mem\t# class" %}
7937 
7938   ins_encode(aarch64_enc_ldr(dst, mem));
7939 
7940   ins_pipe(iload_reg_mem);
7941 %}
7942 
7943 // Load Narrow Klass Pointer
7944 instruct loadNKlass(iRegNNoSp dst, memory mem)
7945 %{
7946   match(Set dst (LoadNKlass mem));
7947   predicate(!needs_acquiring_load(n));
7948 
7949   ins_cost(4 * INSN_COST);
7950   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
7951 
7952   ins_encode(aarch64_enc_ldrw(dst, mem));
7953 
7954   ins_pipe(iload_reg_mem);
7955 %}
7956 
7957 // Load Float
7958 instruct loadF(vRegF dst, memory mem)
7959 %{
7960   match(Set dst (LoadF mem));
7961   predicate(!needs_acquiring_load(n));
7962 
7963   ins_cost(4 * INSN_COST);
7964   format %{ "ldrs  $dst, $mem\t# float" %}
7965 
7966   ins_encode( aarch64_enc_ldrs(dst, mem) );
7967 
7968   ins_pipe(pipe_class_memory);
7969 %}
7970 
7971 // Load Double
7972 instruct loadD(vRegD dst, memory mem)
7973 %{
7974   match(Set dst (LoadD mem));
7975   predicate(!needs_acquiring_load(n));
7976 
7977   ins_cost(4 * INSN_COST);
7978   format %{ "ldrd  $dst, $mem\t# double" %}
7979 
7980   ins_encode( aarch64_enc_ldrd(dst, mem) );
7981 
7982   ins_pipe(pipe_class_memory);
7983 %}
7984 
7985 
7986 // Load Int Constant
7987 instruct loadConI(iRegINoSp dst, immI src)
7988 %{
7989   match(Set dst src);
7990 
7991   ins_cost(INSN_COST);
7992   format %{ "mov $dst, $src\t# int" %}
7993 
7994   ins_encode( aarch64_enc_movw_imm(dst, src) );
7995 
7996   ins_pipe(ialu_imm);
7997 %}
7998 
7999 // Load Long Constant
8000 instruct loadConL(iRegLNoSp dst, immL src)
8001 %{
8002   match(Set dst src);
8003 
8004   ins_cost(INSN_COST);
8005   format %{ "mov $dst, $src\t# long" %}
8006 
8007   ins_encode( aarch64_enc_mov_imm(dst, src) );
8008 
8009   ins_pipe(ialu_imm);
8010 %}
8011 
8012 // Load Pointer Constant
8013 
8014 instruct loadConP(iRegPNoSp dst, immP con)
8015 %{
8016   match(Set dst con);
8017 
8018   ins_cost(INSN_COST * 4);
8019   format %{
8020     "mov  $dst, $con\t# ptr\n\t"
8021   %}
8022 
8023   ins_encode(aarch64_enc_mov_p(dst, con));
8024 
8025   ins_pipe(ialu_imm);
8026 %}
8027 
8028 // Load Null Pointer Constant
8029 
8030 instruct loadConP0(iRegPNoSp dst, immP0 con)
8031 %{
8032   match(Set dst con);
8033 
8034   ins_cost(INSN_COST);
8035   format %{ "mov  $dst, $con\t# NULL ptr" %}
8036 
8037   ins_encode(aarch64_enc_mov_p0(dst, con));
8038 
8039   ins_pipe(ialu_imm);
8040 %}
8041 
8042 // Load Pointer Constant One
8043 
8044 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8045 %{
8046   match(Set dst con);
8047 
8048   ins_cost(INSN_COST);
8049   format %{ "mov  $dst, $con\t# NULL ptr" %}
8050 
8051   ins_encode(aarch64_enc_mov_p1(dst, con));
8052 
8053   ins_pipe(ialu_imm);
8054 %}
8055 
8056 // Load Poll Page Constant
8057 
8058 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8059 %{
8060   match(Set dst con);
8061 
8062   ins_cost(INSN_COST);
8063   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8064 
8065   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8066 
8067   ins_pipe(ialu_imm);
8068 %}
8069 
8070 // Load Byte Map Base Constant
8071 
8072 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8073 %{
8074   match(Set dst con);
8075 
8076   ins_cost(INSN_COST);
8077   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8078 
8079   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8080 
8081   ins_pipe(ialu_imm);
8082 %}
8083 
8084 // Load Narrow Pointer Constant
8085 
8086 instruct loadConN(iRegNNoSp dst, immN con)
8087 %{
8088   match(Set dst con);
8089 
8090   ins_cost(INSN_COST * 4);
8091   format %{ "mov  $dst, $con\t# compressed ptr" %}
8092 
8093   ins_encode(aarch64_enc_mov_n(dst, con));
8094 
8095   ins_pipe(ialu_imm);
8096 %}
8097 
8098 // Load Narrow Null Pointer Constant
8099 
8100 instruct loadConN0(iRegNNoSp dst, immN0 con)
8101 %{
8102   match(Set dst con);
8103 
8104   ins_cost(INSN_COST);
8105   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8106 
8107   ins_encode(aarch64_enc_mov_n0(dst, con));
8108 
8109   ins_pipe(ialu_imm);
8110 %}
8111 
8112 // Load Narrow Klass Constant
8113 
8114 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8115 %{
8116   match(Set dst con);
8117 
8118   ins_cost(INSN_COST);
8119   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8120 
8121   ins_encode(aarch64_enc_mov_nk(dst, con));
8122 
8123   ins_pipe(ialu_imm);
8124 %}
8125 
8126 // Load Packed Float Constant
8127 
8128 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8129   match(Set dst con);
8130   ins_cost(INSN_COST * 4);
8131   format %{ "fmovs  $dst, $con"%}
8132   ins_encode %{
8133     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8134   %}
8135 
8136   ins_pipe(fp_imm_s);
8137 %}
8138 
8139 // Load Float Constant
8140 
8141 instruct loadConF(vRegF dst, immF con) %{
8142   match(Set dst con);
8143 
8144   ins_cost(INSN_COST * 4);
8145 
8146   format %{
8147     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8148   %}
8149 
8150   ins_encode %{
8151     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8152   %}
8153 
8154   ins_pipe(fp_load_constant_s);
8155 %}
8156 
8157 // Load Packed Double Constant
8158 
8159 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8160   match(Set dst con);
8161   ins_cost(INSN_COST);
8162   format %{ "fmovd  $dst, $con"%}
8163   ins_encode %{
8164     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8165   %}
8166 
8167   ins_pipe(fp_imm_d);
8168 %}
8169 
8170 // Load Double Constant
8171 
8172 instruct loadConD(vRegD dst, immD con) %{
8173   match(Set dst con);
8174 
8175   ins_cost(INSN_COST * 5);
8176   format %{
8177     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8178   %}
8179 
8180   ins_encode %{
8181     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8182   %}
8183 
8184   ins_pipe(fp_load_constant_d);
8185 %}
8186 
8187 // Store Instructions
8188 
8189 // Store CMS card-mark Immediate
8190 instruct storeimmCM0(immI0 zero, memory mem)
8191 %{
8192   match(Set mem (StoreCM mem zero));
8193   predicate(unnecessary_storestore(n));
8194 
8195   ins_cost(INSN_COST);
8196   format %{ "strb zr, $mem\t# byte" %}
8197 
8198   ins_encode(aarch64_enc_strb0(mem));
8199 
8200   ins_pipe(istore_mem);
8201 %}
8202 
8203 // Store CMS card-mark Immediate with intervening StoreStore
8204 // needed when using CMS with no conditional card marking
8205 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8206 %{
8207   match(Set mem (StoreCM mem zero));
8208 
8209   ins_cost(INSN_COST * 2);
8210   format %{ "dmb ishst"
8211       "\n\tstrb zr, $mem\t# byte" %}
8212 
8213   ins_encode(aarch64_enc_strb0_ordered(mem));
8214 
8215   ins_pipe(istore_mem);
8216 %}
8217 
8218 // Store Byte
8219 instruct storeB(iRegIorL2I src, memory mem)
8220 %{
8221   match(Set mem (StoreB mem src));
8222   predicate(!needs_releasing_store(n));
8223 
8224   ins_cost(INSN_COST);
8225   format %{ "strb  $src, $mem\t# byte" %}
8226 
8227   ins_encode(aarch64_enc_strb(src, mem));
8228 
8229   ins_pipe(istore_reg_mem);
8230 %}
8231 
8232 
8233 instruct storeimmB0(immI0 zero, memory mem)
8234 %{
8235   match(Set mem (StoreB mem zero));
8236   predicate(!needs_releasing_store(n));
8237 
8238   ins_cost(INSN_COST);
8239   format %{ "strb rscractch2, $mem\t# byte" %}
8240 
8241   ins_encode(aarch64_enc_strb0(mem));
8242 
8243   ins_pipe(istore_mem);
8244 %}
8245 
8246 // Store Char/Short
8247 instruct storeC(iRegIorL2I src, memory mem)
8248 %{
8249   match(Set mem (StoreC mem src));
8250   predicate(!needs_releasing_store(n));
8251 
8252   ins_cost(INSN_COST);
8253   format %{ "strh  $src, $mem\t# short" %}
8254 
8255   ins_encode(aarch64_enc_strh(src, mem));
8256 
8257   ins_pipe(istore_reg_mem);
8258 %}
8259 
8260 instruct storeimmC0(immI0 zero, memory mem)
8261 %{
8262   match(Set mem (StoreC mem zero));
8263   predicate(!needs_releasing_store(n));
8264 
8265   ins_cost(INSN_COST);
8266   format %{ "strh  zr, $mem\t# short" %}
8267 
8268   ins_encode(aarch64_enc_strh0(mem));
8269 
8270   ins_pipe(istore_mem);
8271 %}
8272 
8273 // Store Integer
8274 
8275 instruct storeI(iRegIorL2I src, memory mem)
8276 %{
8277   match(Set mem(StoreI mem src));
8278   predicate(!needs_releasing_store(n));
8279 
8280   ins_cost(INSN_COST);
8281   format %{ "strw  $src, $mem\t# int" %}
8282 
8283   ins_encode(aarch64_enc_strw(src, mem));
8284 
8285   ins_pipe(istore_reg_mem);
8286 %}
8287 
8288 instruct storeimmI0(immI0 zero, memory mem)
8289 %{
8290   match(Set mem(StoreI mem zero));
8291   predicate(!needs_releasing_store(n));
8292 
8293   ins_cost(INSN_COST);
8294   format %{ "strw  zr, $mem\t# int" %}
8295 
8296   ins_encode(aarch64_enc_strw0(mem));
8297 
8298   ins_pipe(istore_mem);
8299 %}
8300 
8301 // Store Long (64 bit signed)
8302 instruct storeL(iRegL src, memory mem)
8303 %{
8304   match(Set mem (StoreL mem src));
8305   predicate(!needs_releasing_store(n));
8306 
8307   ins_cost(INSN_COST);
8308   format %{ "str  $src, $mem\t# int" %}
8309 
8310   ins_encode(aarch64_enc_str(src, mem));
8311 
8312   ins_pipe(istore_reg_mem);
8313 %}
8314 
8315 // Store Long (64 bit signed)
8316 instruct storeimmL0(immL0 zero, memory mem)
8317 %{
8318   match(Set mem (StoreL mem zero));
8319   predicate(!needs_releasing_store(n));
8320 
8321   ins_cost(INSN_COST);
8322   format %{ "str  zr, $mem\t# int" %}
8323 
8324   ins_encode(aarch64_enc_str0(mem));
8325 
8326   ins_pipe(istore_mem);
8327 %}
8328 
8329 // Store Pointer
8330 instruct storeP(iRegP src, memory mem)
8331 %{
8332   match(Set mem (StoreP mem src));
8333   predicate(!needs_releasing_store(n));
8334 
8335   ins_cost(INSN_COST);
8336   format %{ "str  $src, $mem\t# ptr" %}
8337 
8338   ins_encode(aarch64_enc_str(src, mem));
8339 
8340   ins_pipe(istore_reg_mem);
8341 %}
8342 
8343 // Store Pointer
8344 instruct storeimmP0(immP0 zero, memory mem)
8345 %{
8346   match(Set mem (StoreP mem zero));
8347   predicate(!needs_releasing_store(n));
8348 
8349   ins_cost(INSN_COST);
8350   format %{ "str zr, $mem\t# ptr" %}
8351 
8352   ins_encode(aarch64_enc_str0(mem));
8353 
8354   ins_pipe(istore_mem);
8355 %}
8356 
8357 // Store Compressed Pointer
8358 instruct storeN(iRegN src, memory mem)
8359 %{
8360   match(Set mem (StoreN mem src));
8361   predicate(!needs_releasing_store(n));
8362 
8363   ins_cost(INSN_COST);
8364   format %{ "strw  $src, $mem\t# compressed ptr" %}
8365 
8366   ins_encode(aarch64_enc_strw(src, mem));
8367 
8368   ins_pipe(istore_reg_mem);
8369 %}
8370 
8371 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8372 %{
8373   match(Set mem (StoreN mem zero));
8374   predicate(Universe::narrow_oop_base() == NULL &&
8375             Universe::narrow_klass_base() == NULL &&
8376             (!needs_releasing_store(n)));
8377 
8378   ins_cost(INSN_COST);
8379   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8380 
8381   ins_encode(aarch64_enc_strw(heapbase, mem));
8382 
8383   ins_pipe(istore_reg_mem);
8384 %}
8385 
8386 // Store Float
8387 instruct storeF(vRegF src, memory mem)
8388 %{
8389   match(Set mem (StoreF mem src));
8390   predicate(!needs_releasing_store(n));
8391 
8392   ins_cost(INSN_COST);
8393   format %{ "strs  $src, $mem\t# float" %}
8394 
8395   ins_encode( aarch64_enc_strs(src, mem) );
8396 
8397   ins_pipe(pipe_class_memory);
8398 %}
8399 
8400 // TODO
8401 // implement storeImmF0 and storeFImmPacked
8402 
8403 // Store Double
8404 instruct storeD(vRegD src, memory mem)
8405 %{
8406   match(Set mem (StoreD mem src));
8407   predicate(!needs_releasing_store(n));
8408 
8409   ins_cost(INSN_COST);
8410   format %{ "strd  $src, $mem\t# double" %}
8411 
8412   ins_encode( aarch64_enc_strd(src, mem) );
8413 
8414   ins_pipe(pipe_class_memory);
8415 %}
8416 
8417 // Store Compressed Klass Pointer
8418 instruct storeNKlass(iRegN src, memory mem)
8419 %{
8420   predicate(!needs_releasing_store(n));
8421   match(Set mem (StoreNKlass mem src));
8422 
8423   ins_cost(INSN_COST);
8424   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8425 
8426   ins_encode(aarch64_enc_strw(src, mem));
8427 
8428   ins_pipe(istore_reg_mem);
8429 %}
8430 
8431 // TODO
8432 // implement storeImmD0 and storeDImmPacked
8433 
8434 // prefetch instructions
8435 // Must be safe to execute with invalid address (cannot fault).
8436 
8437 instruct prefetchalloc( memory mem ) %{
8438   match(PrefetchAllocation mem);
8439 
8440   ins_cost(INSN_COST);
8441   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8442 
8443   ins_encode( aarch64_enc_prefetchw(mem) );
8444 
8445   ins_pipe(iload_prefetch);
8446 %}
8447 
8448 //  ---------------- volatile loads and stores ----------------
8449 
8450 // Load Byte (8 bit signed)
8451 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8452 %{
8453   match(Set dst (LoadB mem));
8454 
8455   ins_cost(VOLATILE_REF_COST);
8456   format %{ "ldarsb  $dst, $mem\t# byte" %}
8457 
8458   ins_encode(aarch64_enc_ldarsb(dst, mem));
8459 
8460   ins_pipe(pipe_serial);
8461 %}
8462 
8463 // Load Byte (8 bit signed) into long
8464 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8465 %{
8466   match(Set dst (ConvI2L (LoadB mem)));
8467 
8468   ins_cost(VOLATILE_REF_COST);
8469   format %{ "ldarsb  $dst, $mem\t# byte" %}
8470 
8471   ins_encode(aarch64_enc_ldarsb(dst, mem));
8472 
8473   ins_pipe(pipe_serial);
8474 %}
8475 
8476 // Load Byte (8 bit unsigned)
8477 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8478 %{
8479   match(Set dst (LoadUB mem));
8480 
8481   ins_cost(VOLATILE_REF_COST);
8482   format %{ "ldarb  $dst, $mem\t# byte" %}
8483 
8484   ins_encode(aarch64_enc_ldarb(dst, mem));
8485 
8486   ins_pipe(pipe_serial);
8487 %}
8488 
8489 // Load Byte (8 bit unsigned) into long
8490 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8491 %{
8492   match(Set dst (ConvI2L (LoadUB mem)));
8493 
8494   ins_cost(VOLATILE_REF_COST);
8495   format %{ "ldarb  $dst, $mem\t# byte" %}
8496 
8497   ins_encode(aarch64_enc_ldarb(dst, mem));
8498 
8499   ins_pipe(pipe_serial);
8500 %}
8501 
8502 // Load Short (16 bit signed)
8503 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8504 %{
8505   match(Set dst (LoadS mem));
8506 
8507   ins_cost(VOLATILE_REF_COST);
8508   format %{ "ldarshw  $dst, $mem\t# short" %}
8509 
8510   ins_encode(aarch64_enc_ldarshw(dst, mem));
8511 
8512   ins_pipe(pipe_serial);
8513 %}
8514 
8515 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8516 %{
8517   match(Set dst (LoadUS mem));
8518 
8519   ins_cost(VOLATILE_REF_COST);
8520   format %{ "ldarhw  $dst, $mem\t# short" %}
8521 
8522   ins_encode(aarch64_enc_ldarhw(dst, mem));
8523 
8524   ins_pipe(pipe_serial);
8525 %}
8526 
8527 // Load Short/Char (16 bit unsigned) into long
8528 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8529 %{
8530   match(Set dst (ConvI2L (LoadUS mem)));
8531 
8532   ins_cost(VOLATILE_REF_COST);
8533   format %{ "ldarh  $dst, $mem\t# short" %}
8534 
8535   ins_encode(aarch64_enc_ldarh(dst, mem));
8536 
8537   ins_pipe(pipe_serial);
8538 %}
8539 
8540 // Load Short/Char (16 bit signed) into long
8541 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8542 %{
8543   match(Set dst (ConvI2L (LoadS mem)));
8544 
8545   ins_cost(VOLATILE_REF_COST);
8546   format %{ "ldarh  $dst, $mem\t# short" %}
8547 
8548   ins_encode(aarch64_enc_ldarsh(dst, mem));
8549 
8550   ins_pipe(pipe_serial);
8551 %}
8552 
8553 // Load Integer (32 bit signed)
8554 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8555 %{
8556   match(Set dst (LoadI mem));
8557 
8558   ins_cost(VOLATILE_REF_COST);
8559   format %{ "ldarw  $dst, $mem\t# int" %}
8560 
8561   ins_encode(aarch64_enc_ldarw(dst, mem));
8562 
8563   ins_pipe(pipe_serial);
8564 %}
8565 
8566 // Load Integer (32 bit unsigned) into long
8567 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8568 %{
8569   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8570 
8571   ins_cost(VOLATILE_REF_COST);
8572   format %{ "ldarw  $dst, $mem\t# int" %}
8573 
8574   ins_encode(aarch64_enc_ldarw(dst, mem));
8575 
8576   ins_pipe(pipe_serial);
8577 %}
8578 
8579 // Load Long (64 bit signed)
8580 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8581 %{
8582   match(Set dst (LoadL mem));
8583 
8584   ins_cost(VOLATILE_REF_COST);
8585   format %{ "ldar  $dst, $mem\t# int" %}
8586 
8587   ins_encode(aarch64_enc_ldar(dst, mem));
8588 
8589   ins_pipe(pipe_serial);
8590 %}
8591 
8592 // Load Pointer
8593 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8594 %{
8595   match(Set dst (LoadP mem));
8596 
8597   ins_cost(VOLATILE_REF_COST);
8598   format %{ "ldar  $dst, $mem\t# ptr" %}
8599 
8600   ins_encode(aarch64_enc_ldar(dst, mem));
8601 
8602   ins_pipe(pipe_serial);
8603 %}
8604 
8605 // Load Compressed Pointer
8606 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8607 %{
8608   match(Set dst (LoadN mem));
8609 
8610   ins_cost(VOLATILE_REF_COST);
8611   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8612 
8613   ins_encode(aarch64_enc_ldarw(dst, mem));
8614 
8615   ins_pipe(pipe_serial);
8616 %}
8617 
8618 // Load Float
8619 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8620 %{
8621   match(Set dst (LoadF mem));
8622 
8623   ins_cost(VOLATILE_REF_COST);
8624   format %{ "ldars  $dst, $mem\t# float" %}
8625 
8626   ins_encode( aarch64_enc_fldars(dst, mem) );
8627 
8628   ins_pipe(pipe_serial);
8629 %}
8630 
8631 // Load Double
8632 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8633 %{
8634   match(Set dst (LoadD mem));
8635 
8636   ins_cost(VOLATILE_REF_COST);
8637   format %{ "ldard  $dst, $mem\t# double" %}
8638 
8639   ins_encode( aarch64_enc_fldard(dst, mem) );
8640 
8641   ins_pipe(pipe_serial);
8642 %}
8643 
8644 // Store Byte
8645 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8646 %{
8647   match(Set mem (StoreB mem src));
8648 
8649   ins_cost(VOLATILE_REF_COST);
8650   format %{ "stlrb  $src, $mem\t# byte" %}
8651 
8652   ins_encode(aarch64_enc_stlrb(src, mem));
8653 
8654   ins_pipe(pipe_class_memory);
8655 %}
8656 
8657 // Store Char/Short
8658 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8659 %{
8660   match(Set mem (StoreC mem src));
8661 
8662   ins_cost(VOLATILE_REF_COST);
8663   format %{ "stlrh  $src, $mem\t# short" %}
8664 
8665   ins_encode(aarch64_enc_stlrh(src, mem));
8666 
8667   ins_pipe(pipe_class_memory);
8668 %}
8669 
8670 // Store Integer
8671 
8672 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8673 %{
8674   match(Set mem(StoreI mem src));
8675 
8676   ins_cost(VOLATILE_REF_COST);
8677   format %{ "stlrw  $src, $mem\t# int" %}
8678 
8679   ins_encode(aarch64_enc_stlrw(src, mem));
8680 
8681   ins_pipe(pipe_class_memory);
8682 %}
8683 
8684 // Store Long (64 bit signed)
8685 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8686 %{
8687   match(Set mem (StoreL mem src));
8688 
8689   ins_cost(VOLATILE_REF_COST);
8690   format %{ "stlr  $src, $mem\t# int" %}
8691 
8692   ins_encode(aarch64_enc_stlr(src, mem));
8693 
8694   ins_pipe(pipe_class_memory);
8695 %}
8696 
8697 // Store Pointer
8698 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8699 %{
8700   match(Set mem (StoreP mem src));
8701 
8702   ins_cost(VOLATILE_REF_COST);
8703   format %{ "stlr  $src, $mem\t# ptr" %}
8704 
8705   ins_encode(aarch64_enc_stlr(src, mem));
8706 
8707   ins_pipe(pipe_class_memory);
8708 %}
8709 
8710 // Store Compressed Pointer
8711 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8712 %{
8713   match(Set mem (StoreN mem src));
8714 
8715   ins_cost(VOLATILE_REF_COST);
8716   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
8717 
8718   ins_encode(aarch64_enc_stlrw(src, mem));
8719 
8720   ins_pipe(pipe_class_memory);
8721 %}
8722 
8723 // Store Float
8724 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8725 %{
8726   match(Set mem (StoreF mem src));
8727 
8728   ins_cost(VOLATILE_REF_COST);
8729   format %{ "stlrs  $src, $mem\t# float" %}
8730 
8731   ins_encode( aarch64_enc_fstlrs(src, mem) );
8732 
8733   ins_pipe(pipe_class_memory);
8734 %}
8735 
8736 // TODO
8737 // implement storeImmF0 and storeFImmPacked
8738 
8739 // Store Double
8740 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8741 %{
8742   match(Set mem (StoreD mem src));
8743 
8744   ins_cost(VOLATILE_REF_COST);
8745   format %{ "stlrd  $src, $mem\t# double" %}
8746 
8747   ins_encode( aarch64_enc_fstlrd(src, mem) );
8748 
8749   ins_pipe(pipe_class_memory);
8750 %}
8751 
8752 //  ---------------- end of volatile loads and stores ----------------
8753 
8754 // ============================================================================
8755 // BSWAP Instructions
8756 
8757 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8758   match(Set dst (ReverseBytesI src));
8759 
8760   ins_cost(INSN_COST);
8761   format %{ "revw  $dst, $src" %}
8762 
8763   ins_encode %{
8764     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8765   %}
8766 
8767   ins_pipe(ialu_reg);
8768 %}
8769 
8770 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8771   match(Set dst (ReverseBytesL src));
8772 
8773   ins_cost(INSN_COST);
8774   format %{ "rev  $dst, $src" %}
8775 
8776   ins_encode %{
8777     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8778   %}
8779 
8780   ins_pipe(ialu_reg);
8781 %}
8782 
8783 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8784   match(Set dst (ReverseBytesUS src));
8785 
8786   ins_cost(INSN_COST);
8787   format %{ "rev16w  $dst, $src" %}
8788 
8789   ins_encode %{
8790     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8791   %}
8792 
8793   ins_pipe(ialu_reg);
8794 %}
8795 
8796 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8797   match(Set dst (ReverseBytesS src));
8798 
8799   ins_cost(INSN_COST);
8800   format %{ "rev16w  $dst, $src\n\t"
8801             "sbfmw $dst, $dst, #0, #15" %}
8802 
8803   ins_encode %{
8804     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8805     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8806   %}
8807 
8808   ins_pipe(ialu_reg);
8809 %}
8810 
8811 // ============================================================================
8812 // Zero Count Instructions
8813 
8814 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8815   match(Set dst (CountLeadingZerosI src));
8816 
8817   ins_cost(INSN_COST);
8818   format %{ "clzw  $dst, $src" %}
8819   ins_encode %{
8820     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8821   %}
8822 
8823   ins_pipe(ialu_reg);
8824 %}
8825 
8826 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8827   match(Set dst (CountLeadingZerosL src));
8828 
8829   ins_cost(INSN_COST);
8830   format %{ "clz   $dst, $src" %}
8831   ins_encode %{
8832     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8833   %}
8834 
8835   ins_pipe(ialu_reg);
8836 %}
8837 
8838 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8839   match(Set dst (CountTrailingZerosI src));
8840 
8841   ins_cost(INSN_COST * 2);
8842   format %{ "rbitw  $dst, $src\n\t"
8843             "clzw   $dst, $dst" %}
8844   ins_encode %{
8845     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8846     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8847   %}
8848 
8849   ins_pipe(ialu_reg);
8850 %}
8851 
8852 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8853   match(Set dst (CountTrailingZerosL src));
8854 
8855   ins_cost(INSN_COST * 2);
8856   format %{ "rbit   $dst, $src\n\t"
8857             "clz    $dst, $dst" %}
8858   ins_encode %{
8859     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8860     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8861   %}
8862 
8863   ins_pipe(ialu_reg);
8864 %}
8865 
8866 //---------- Population Count Instructions -------------------------------------
8867 //
8868 
8869 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8870   predicate(UsePopCountInstruction);
8871   match(Set dst (PopCountI src));
8872   effect(TEMP tmp);
8873   ins_cost(INSN_COST * 13);
8874 
8875   format %{ "movw   $src, $src\n\t"
8876             "mov    $tmp, $src\t# vector (1D)\n\t"
8877             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8878             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8879             "mov    $dst, $tmp\t# vector (1D)" %}
8880   ins_encode %{
8881     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8882     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8883     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8884     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8885     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8886   %}
8887 
8888   ins_pipe(pipe_class_default);
8889 %}
8890 
8891 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
8892   predicate(UsePopCountInstruction);
8893   match(Set dst (PopCountI (LoadI mem)));
8894   effect(TEMP tmp);
8895   ins_cost(INSN_COST * 13);
8896 
8897   format %{ "ldrs   $tmp, $mem\n\t"
8898             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8899             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8900             "mov    $dst, $tmp\t# vector (1D)" %}
8901   ins_encode %{
8902     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8903     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8904                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8905     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8906     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8907     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8908   %}
8909 
8910   ins_pipe(pipe_class_default);
8911 %}
8912 
8913 // Note: Long.bitCount(long) returns an int.
8914 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
8915   predicate(UsePopCountInstruction);
8916   match(Set dst (PopCountL src));
8917   effect(TEMP tmp);
8918   ins_cost(INSN_COST * 13);
8919 
8920   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
8921             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8922             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8923             "mov    $dst, $tmp\t# vector (1D)" %}
8924   ins_encode %{
8925     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8926     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8927     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8928     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8929   %}
8930 
8931   ins_pipe(pipe_class_default);
8932 %}
8933 
8934 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
8935   predicate(UsePopCountInstruction);
8936   match(Set dst (PopCountL (LoadL mem)));
8937   effect(TEMP tmp);
8938   ins_cost(INSN_COST * 13);
8939 
8940   format %{ "ldrd   $tmp, $mem\n\t"
8941             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
8942             "addv   $tmp, $tmp\t# vector (8B)\n\t"
8943             "mov    $dst, $tmp\t# vector (1D)" %}
8944   ins_encode %{
8945     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8946     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
8947                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
8948     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8949     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8950     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8951   %}
8952 
8953   ins_pipe(pipe_class_default);
8954 %}
8955 
8956 // ============================================================================
8957 // MemBar Instruction
8958 
8959 instruct load_fence() %{
8960   match(LoadFence);
8961   ins_cost(VOLATILE_REF_COST);
8962 
8963   format %{ "load_fence" %}
8964 
8965   ins_encode %{
8966     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8967   %}
8968   ins_pipe(pipe_serial);
8969 %}
8970 
8971 instruct unnecessary_membar_acquire() %{
8972   predicate(unnecessary_acquire(n));
8973   match(MemBarAcquire);
8974   ins_cost(0);
8975 
8976   format %{ "membar_acquire (elided)" %}
8977 
8978   ins_encode %{
8979     __ block_comment("membar_acquire (elided)");
8980   %}
8981 
8982   ins_pipe(pipe_class_empty);
8983 %}
8984 
8985 instruct membar_acquire() %{
8986   match(MemBarAcquire);
8987   ins_cost(VOLATILE_REF_COST);
8988 
8989   format %{ "membar_acquire" %}
8990 
8991   ins_encode %{
8992     __ block_comment("membar_acquire");
8993     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
8994   %}
8995 
8996   ins_pipe(pipe_serial);
8997 %}
8998 
8999 
9000 instruct membar_acquire_lock() %{
9001   match(MemBarAcquireLock);
9002   ins_cost(VOLATILE_REF_COST);
9003 
9004   format %{ "membar_acquire_lock (elided)" %}
9005 
9006   ins_encode %{
9007     __ block_comment("membar_acquire_lock (elided)");
9008   %}
9009 
9010   ins_pipe(pipe_serial);
9011 %}
9012 
9013 instruct store_fence() %{
9014   match(StoreFence);
9015   ins_cost(VOLATILE_REF_COST);
9016 
9017   format %{ "store_fence" %}
9018 
9019   ins_encode %{
9020     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9021   %}
9022   ins_pipe(pipe_serial);
9023 %}
9024 
9025 instruct unnecessary_membar_release() %{
9026   predicate(unnecessary_release(n));
9027   match(MemBarRelease);
9028   ins_cost(0);
9029 
9030   format %{ "membar_release (elided)" %}
9031 
9032   ins_encode %{
9033     __ block_comment("membar_release (elided)");
9034   %}
9035   ins_pipe(pipe_serial);
9036 %}
9037 
9038 instruct membar_release() %{
9039   match(MemBarRelease);
9040   ins_cost(VOLATILE_REF_COST);
9041 
9042   format %{ "membar_release" %}
9043 
9044   ins_encode %{
9045     __ block_comment("membar_release");
9046     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9047   %}
9048   ins_pipe(pipe_serial);
9049 %}
9050 
9051 instruct membar_storestore() %{
9052   match(MemBarStoreStore);
9053   ins_cost(VOLATILE_REF_COST);
9054 
9055   format %{ "MEMBAR-store-store" %}
9056 
9057   ins_encode %{
9058     __ membar(Assembler::StoreStore);
9059   %}
9060   ins_pipe(pipe_serial);
9061 %}
9062 
9063 instruct membar_release_lock() %{
9064   match(MemBarReleaseLock);
9065   ins_cost(VOLATILE_REF_COST);
9066 
9067   format %{ "membar_release_lock (elided)" %}
9068 
9069   ins_encode %{
9070     __ block_comment("membar_release_lock (elided)");
9071   %}
9072 
9073   ins_pipe(pipe_serial);
9074 %}
9075 
9076 instruct unnecessary_membar_volatile() %{
9077   predicate(unnecessary_volatile(n));
9078   match(MemBarVolatile);
9079   ins_cost(0);
9080 
9081   format %{ "membar_volatile (elided)" %}
9082 
9083   ins_encode %{
9084     __ block_comment("membar_volatile (elided)");
9085   %}
9086 
9087   ins_pipe(pipe_serial);
9088 %}
9089 
9090 instruct membar_volatile() %{
9091   match(MemBarVolatile);
9092   ins_cost(VOLATILE_REF_COST*100);
9093 
9094   format %{ "membar_volatile" %}
9095 
9096   ins_encode %{
9097     __ block_comment("membar_volatile");
9098     __ membar(Assembler::StoreLoad);
9099   %}
9100 
9101   ins_pipe(pipe_serial);
9102 %}
9103 
9104 // ============================================================================
9105 // Cast/Convert Instructions
9106 
9107 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9108   match(Set dst (CastX2P src));
9109 
9110   ins_cost(INSN_COST);
9111   format %{ "mov $dst, $src\t# long -> ptr" %}
9112 
9113   ins_encode %{
9114     if ($dst$$reg != $src$$reg) {
9115       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9116     }
9117   %}
9118 
9119   ins_pipe(ialu_reg);
9120 %}
9121 
9122 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9123   match(Set dst (CastP2X src));
9124 
9125   ins_cost(INSN_COST);
9126   format %{ "mov $dst, $src\t# ptr -> long" %}
9127 
9128   ins_encode %{
9129     if ($dst$$reg != $src$$reg) {
9130       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9131     }
9132   %}
9133 
9134   ins_pipe(ialu_reg);
9135 %}
9136 
9137 // Convert oop into int for vectors alignment masking
9138 instruct convP2I(iRegINoSp dst, iRegP src) %{
9139   match(Set dst (ConvL2I (CastP2X src)));
9140 
9141   ins_cost(INSN_COST);
9142   format %{ "movw $dst, $src\t# ptr -> int" %}
9143   ins_encode %{
9144     __ movw($dst$$Register, $src$$Register);
9145   %}
9146 
9147   ins_pipe(ialu_reg);
9148 %}
9149 
9150 // Convert compressed oop into int for vectors alignment masking
9151 // in case of 32bit oops (heap < 4Gb).
9152 instruct convN2I(iRegINoSp dst, iRegN src)
9153 %{
9154   predicate(Universe::narrow_oop_shift() == 0);
9155   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9156 
9157   ins_cost(INSN_COST);
9158   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9159   ins_encode %{
9160     __ movw($dst$$Register, $src$$Register);
9161   %}
9162 
9163   ins_pipe(ialu_reg);
9164 %}
9165 
9166 
9167 // Convert oop pointer into compressed form
9168 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9169   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9170   match(Set dst (EncodeP src));
9171   effect(KILL cr);
9172   ins_cost(INSN_COST * 3);
9173   format %{ "encode_heap_oop $dst, $src" %}
9174   ins_encode %{
9175     Register s = $src$$Register;
9176     Register d = $dst$$Register;
9177     __ encode_heap_oop(d, s);
9178   %}
9179   ins_pipe(ialu_reg);
9180 %}
9181 
9182 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9183   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9184   match(Set dst (EncodeP src));
9185   ins_cost(INSN_COST * 3);
9186   format %{ "encode_heap_oop_not_null $dst, $src" %}
9187   ins_encode %{
9188     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9189   %}
9190   ins_pipe(ialu_reg);
9191 %}
9192 
9193 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9194   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9195             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9196   match(Set dst (DecodeN src));
9197   ins_cost(INSN_COST * 3);
9198   format %{ "decode_heap_oop $dst, $src" %}
9199   ins_encode %{
9200     Register s = $src$$Register;
9201     Register d = $dst$$Register;
9202     __ decode_heap_oop(d, s);
9203   %}
9204   ins_pipe(ialu_reg);
9205 %}
9206 
9207 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9208   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9209             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9210   match(Set dst (DecodeN src));
9211   ins_cost(INSN_COST * 3);
9212   format %{ "decode_heap_oop_not_null $dst, $src" %}
9213   ins_encode %{
9214     Register s = $src$$Register;
9215     Register d = $dst$$Register;
9216     __ decode_heap_oop_not_null(d, s);
9217   %}
9218   ins_pipe(ialu_reg);
9219 %}
9220 
9221 // n.b. AArch64 implementations of encode_klass_not_null and
9222 // decode_klass_not_null do not modify the flags register so, unlike
9223 // Intel, we don't kill CR as a side effect here
9224 
9225 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9226   match(Set dst (EncodePKlass src));
9227 
9228   ins_cost(INSN_COST * 3);
9229   format %{ "encode_klass_not_null $dst,$src" %}
9230 
9231   ins_encode %{
9232     Register src_reg = as_Register($src$$reg);
9233     Register dst_reg = as_Register($dst$$reg);
9234     __ encode_klass_not_null(dst_reg, src_reg);
9235   %}
9236 
9237    ins_pipe(ialu_reg);
9238 %}
9239 
9240 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9241   match(Set dst (DecodeNKlass src));
9242 
9243   ins_cost(INSN_COST * 3);
9244   format %{ "decode_klass_not_null $dst,$src" %}
9245 
9246   ins_encode %{
9247     Register src_reg = as_Register($src$$reg);
9248     Register dst_reg = as_Register($dst$$reg);
9249     if (dst_reg != src_reg) {
9250       __ decode_klass_not_null(dst_reg, src_reg);
9251     } else {
9252       __ decode_klass_not_null(dst_reg);
9253     }
9254   %}
9255 
9256    ins_pipe(ialu_reg);
9257 %}
9258 
9259 instruct checkCastPP(iRegPNoSp dst)
9260 %{
9261   match(Set dst (CheckCastPP dst));
9262 
9263   size(0);
9264   format %{ "# checkcastPP of $dst" %}
9265   ins_encode(/* empty encoding */);
9266   ins_pipe(pipe_class_empty);
9267 %}
9268 
9269 instruct castPP(iRegPNoSp dst)
9270 %{
9271   match(Set dst (CastPP dst));
9272 
9273   size(0);
9274   format %{ "# castPP of $dst" %}
9275   ins_encode(/* empty encoding */);
9276   ins_pipe(pipe_class_empty);
9277 %}
9278 
9279 instruct castII(iRegI dst)
9280 %{
9281   match(Set dst (CastII dst));
9282 
9283   size(0);
9284   format %{ "# castII of $dst" %}
9285   ins_encode(/* empty encoding */);
9286   ins_cost(0);
9287   ins_pipe(pipe_class_empty);
9288 %}
9289 
9290 // ============================================================================
9291 // Atomic operation instructions
9292 //
9293 // Intel and SPARC both implement Ideal Node LoadPLocked and
9294 // Store{PIL}Conditional instructions using a normal load for the
9295 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9296 //
9297 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9298 // pair to lock object allocations from Eden space when not using
9299 // TLABs.
9300 //
9301 // There does not appear to be a Load{IL}Locked Ideal Node and the
9302 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9303 // and to use StoreIConditional only for 32-bit and StoreLConditional
9304 // only for 64-bit.
9305 //
9306 // We implement LoadPLocked and StorePLocked instructions using,
9307 // respectively the AArch64 hw load-exclusive and store-conditional
9308 // instructions. Whereas we must implement each of
9309 // Store{IL}Conditional using a CAS which employs a pair of
9310 // instructions comprising a load-exclusive followed by a
9311 // store-conditional.
9312 
9313 
9314 // Locked-load (linked load) of the current heap-top
9315 // used when updating the eden heap top
9316 // implemented using ldaxr on AArch64
9317 
9318 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9319 %{
9320   match(Set dst (LoadPLocked mem));
9321 
9322   ins_cost(VOLATILE_REF_COST);
9323 
9324   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9325 
9326   ins_encode(aarch64_enc_ldaxr(dst, mem));
9327 
9328   ins_pipe(pipe_serial);
9329 %}
9330 
9331 // Conditional-store of the updated heap-top.
9332 // Used during allocation of the shared heap.
9333 // Sets flag (EQ) on success.
9334 // implemented using stlxr on AArch64.
9335 
9336 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9337 %{
9338   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9339 
9340   ins_cost(VOLATILE_REF_COST);
9341 
9342  // TODO
9343  // do we need to do a store-conditional release or can we just use a
9344  // plain store-conditional?
9345 
9346   format %{
9347     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9348     "cmpw rscratch1, zr\t# EQ on successful write"
9349   %}
9350 
9351   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9352 
9353   ins_pipe(pipe_serial);
9354 %}
9355 
9356 
9357 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9358 // when attempting to rebias a lock towards the current thread.  We
9359 // must use the acquire form of cmpxchg in order to guarantee acquire
9360 // semantics in this case.
9361 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9362 %{
9363   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9364 
9365   ins_cost(VOLATILE_REF_COST);
9366 
9367   format %{
9368     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9369     "cmpw rscratch1, zr\t# EQ on successful write"
9370   %}
9371 
9372   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9373 
9374   ins_pipe(pipe_slow);
9375 %}
9376 
9377 // storeIConditional also has acquire semantics, for no better reason
9378 // than matching storeLConditional.  At the time of writing this
9379 // comment storeIConditional was not used anywhere by AArch64.
9380 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9381 %{
9382   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9383 
9384   ins_cost(VOLATILE_REF_COST);
9385 
9386   format %{
9387     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9388     "cmpw rscratch1, zr\t# EQ on successful write"
9389   %}
9390 
9391   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9392 
9393   ins_pipe(pipe_slow);
9394 %}
9395 
9396 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9397 // can't match them
9398 
9399 // standard CompareAndSwapX when we are using barriers
9400 // these have higher priority than the rules selected by a predicate
9401 
9402 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9403 
9404   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9405   ins_cost(2 * VOLATILE_REF_COST);
9406 
9407   effect(KILL cr);
9408 
9409  format %{
9410     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9411     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9412  %}
9413 
9414  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9415             aarch64_enc_cset_eq(res));
9416 
9417   ins_pipe(pipe_slow);
9418 %}
9419 
9420 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9421 
9422   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9423   ins_cost(2 * VOLATILE_REF_COST);
9424 
9425   effect(KILL cr);
9426 
9427  format %{
9428     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9429     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9430  %}
9431 
9432  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9433             aarch64_enc_cset_eq(res));
9434 
9435   ins_pipe(pipe_slow);
9436 %}
9437 
9438 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9439 
9440   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9441   ins_cost(2 * VOLATILE_REF_COST);
9442 
9443   effect(KILL cr);
9444 
9445  format %{
9446     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9447     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9448  %}
9449 
9450  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9451             aarch64_enc_cset_eq(res));
9452 
9453   ins_pipe(pipe_slow);
9454 %}
9455 
9456 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9457 
9458   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9459   ins_cost(2 * VOLATILE_REF_COST);
9460 
9461   effect(KILL cr);
9462 
9463  format %{
9464     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9465     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9466  %}
9467 
9468  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9469             aarch64_enc_cset_eq(res));
9470 
9471   ins_pipe(pipe_slow);
9472 %}
9473 
9474 // alternative CompareAndSwapX when we are eliding barriers
9475 
9476 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9477 
9478   predicate(needs_acquiring_load_exclusive(n));
9479   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9480   ins_cost(VOLATILE_REF_COST);
9481 
9482   effect(KILL cr);
9483 
9484  format %{
9485     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9486     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9487  %}
9488 
9489  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9490             aarch64_enc_cset_eq(res));
9491 
9492   ins_pipe(pipe_slow);
9493 %}
9494 
9495 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9496 
9497   predicate(needs_acquiring_load_exclusive(n));
9498   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9499   ins_cost(VOLATILE_REF_COST);
9500 
9501   effect(KILL cr);
9502 
9503  format %{
9504     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9505     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9506  %}
9507 
9508  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9509             aarch64_enc_cset_eq(res));
9510 
9511   ins_pipe(pipe_slow);
9512 %}
9513 
9514 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9515 
9516   predicate(needs_acquiring_load_exclusive(n));
9517   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9518   ins_cost(VOLATILE_REF_COST);
9519 
9520   effect(KILL cr);
9521 
9522  format %{
9523     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9524     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9525  %}
9526 
9527  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9528             aarch64_enc_cset_eq(res));
9529 
9530   ins_pipe(pipe_slow);
9531 %}
9532 
9533 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9534 
9535   predicate(needs_acquiring_load_exclusive(n));
9536   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9537   ins_cost(VOLATILE_REF_COST);
9538 
9539   effect(KILL cr);
9540 
9541  format %{
9542     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9543     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9544  %}
9545 
9546  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9547             aarch64_enc_cset_eq(res));
9548 
9549   ins_pipe(pipe_slow);
9550 %}
9551 
9552 
9553 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
9554   match(Set prev (GetAndSetI mem newv));
9555   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
9556   ins_encode %{
9557     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9558   %}
9559   ins_pipe(pipe_serial);
9560 %}
9561 
9562 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
9563   match(Set prev (GetAndSetL mem newv));
9564   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9565   ins_encode %{
9566     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9567   %}
9568   ins_pipe(pipe_serial);
9569 %}
9570 
9571 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
9572   match(Set prev (GetAndSetN mem newv));
9573   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
9574   ins_encode %{
9575     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9576   %}
9577   ins_pipe(pipe_serial);
9578 %}
9579 
9580 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
9581   match(Set prev (GetAndSetP mem newv));
9582   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
9583   ins_encode %{
9584     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9585   %}
9586   ins_pipe(pipe_serial);
9587 %}
9588 
9589 
9590 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
9591   match(Set newval (GetAndAddL mem incr));
9592   ins_cost(INSN_COST * 10);
9593   format %{ "get_and_addL $newval, [$mem], $incr" %}
9594   ins_encode %{
9595     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9596   %}
9597   ins_pipe(pipe_serial);
9598 %}
9599 
9600 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9601   predicate(n->as_LoadStore()->result_not_used());
9602   match(Set dummy (GetAndAddL mem incr));
9603   ins_cost(INSN_COST * 9);
9604   format %{ "get_and_addL [$mem], $incr" %}
9605   ins_encode %{
9606     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9607   %}
9608   ins_pipe(pipe_serial);
9609 %}
9610 
9611 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9612   match(Set newval (GetAndAddL mem incr));
9613   ins_cost(INSN_COST * 10);
9614   format %{ "get_and_addL $newval, [$mem], $incr" %}
9615   ins_encode %{
9616     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9617   %}
9618   ins_pipe(pipe_serial);
9619 %}
9620 
9621 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9622   predicate(n->as_LoadStore()->result_not_used());
9623   match(Set dummy (GetAndAddL mem incr));
9624   ins_cost(INSN_COST * 9);
9625   format %{ "get_and_addL [$mem], $incr" %}
9626   ins_encode %{
9627     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
9628   %}
9629   ins_pipe(pipe_serial);
9630 %}
9631 
9632 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9633   match(Set newval (GetAndAddI mem incr));
9634   ins_cost(INSN_COST * 10);
9635   format %{ "get_and_addI $newval, [$mem], $incr" %}
9636   ins_encode %{
9637     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9638   %}
9639   ins_pipe(pipe_serial);
9640 %}
9641 
9642 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
9643   predicate(n->as_LoadStore()->result_not_used());
9644   match(Set dummy (GetAndAddI mem incr));
9645   ins_cost(INSN_COST * 9);
9646   format %{ "get_and_addI [$mem], $incr" %}
9647   ins_encode %{
9648     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
9649   %}
9650   ins_pipe(pipe_serial);
9651 %}
9652 
9653 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9654   match(Set newval (GetAndAddI mem incr));
9655   ins_cost(INSN_COST * 10);
9656   format %{ "get_and_addI $newval, [$mem], $incr" %}
9657   ins_encode %{
9658     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9659   %}
9660   ins_pipe(pipe_serial);
9661 %}
9662 
9663 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9664   predicate(n->as_LoadStore()->result_not_used());
9665   match(Set dummy (GetAndAddI mem incr));
9666   ins_cost(INSN_COST * 9);
9667   format %{ "get_and_addI [$mem], $incr" %}
9668   ins_encode %{
9669     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9670   %}
9671   ins_pipe(pipe_serial);
9672 %}
9673 
9674 // Manifest a CmpL result in an integer register.
9675 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9676 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9677 %{
9678   match(Set dst (CmpL3 src1 src2));
9679   effect(KILL flags);
9680 
9681   ins_cost(INSN_COST * 6);
9682   format %{
9683       "cmp $src1, $src2"
9684       "csetw $dst, ne"
9685       "cnegw $dst, lt"
9686   %}
9687   // format %{ "CmpL3 $dst, $src1, $src2" %}
9688   ins_encode %{
9689     __ cmp($src1$$Register, $src2$$Register);
9690     __ csetw($dst$$Register, Assembler::NE);
9691     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9692   %}
9693 
9694   ins_pipe(pipe_class_default);
9695 %}
9696 
9697 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9698 %{
9699   match(Set dst (CmpL3 src1 src2));
9700   effect(KILL flags);
9701 
9702   ins_cost(INSN_COST * 6);
9703   format %{
9704       "cmp $src1, $src2"
9705       "csetw $dst, ne"
9706       "cnegw $dst, lt"
9707   %}
9708   ins_encode %{
9709     int32_t con = (int32_t)$src2$$constant;
9710      if (con < 0) {
9711       __ adds(zr, $src1$$Register, -con);
9712     } else {
9713       __ subs(zr, $src1$$Register, con);
9714     }
9715     __ csetw($dst$$Register, Assembler::NE);
9716     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9717   %}
9718 
9719   ins_pipe(pipe_class_default);
9720 %}
9721 
9722 // ============================================================================
9723 // Conditional Move Instructions
9724 
9725 // n.b. we have identical rules for both a signed compare op (cmpOp)
9726 // and an unsigned compare op (cmpOpU). it would be nice if we could
9727 // define an op class which merged both inputs and use it to type the
9728 // argument to a single rule. unfortunatelyt his fails because the
9729 // opclass does not live up to the COND_INTER interface of its
9730 // component operands. When the generic code tries to negate the
9731 // operand it ends up running the generci Machoper::negate method
9732 // which throws a ShouldNotHappen. So, we have to provide two flavours
9733 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
9734 
9735 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9736   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9737 
9738   ins_cost(INSN_COST * 2);
9739   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9740 
9741   ins_encode %{
9742     __ cselw(as_Register($dst$$reg),
9743              as_Register($src2$$reg),
9744              as_Register($src1$$reg),
9745              (Assembler::Condition)$cmp$$cmpcode);
9746   %}
9747 
9748   ins_pipe(icond_reg_reg);
9749 %}
9750 
9751 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9752   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9753 
9754   ins_cost(INSN_COST * 2);
9755   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9756 
9757   ins_encode %{
9758     __ cselw(as_Register($dst$$reg),
9759              as_Register($src2$$reg),
9760              as_Register($src1$$reg),
9761              (Assembler::Condition)$cmp$$cmpcode);
9762   %}
9763 
9764   ins_pipe(icond_reg_reg);
9765 %}
9766 
9767 // special cases where one arg is zero
9768 
9769 // n.b. this is selected in preference to the rule above because it
9770 // avoids loading constant 0 into a source register
9771 
9772 // TODO
9773 // we ought only to be able to cull one of these variants as the ideal
9774 // transforms ought always to order the zero consistently (to left/right?)
9775 
9776 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9777   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9778 
9779   ins_cost(INSN_COST * 2);
9780   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9781 
9782   ins_encode %{
9783     __ cselw(as_Register($dst$$reg),
9784              as_Register($src$$reg),
9785              zr,
9786              (Assembler::Condition)$cmp$$cmpcode);
9787   %}
9788 
9789   ins_pipe(icond_reg);
9790 %}
9791 
9792 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9793   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9794 
9795   ins_cost(INSN_COST * 2);
9796   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9797 
9798   ins_encode %{
9799     __ cselw(as_Register($dst$$reg),
9800              as_Register($src$$reg),
9801              zr,
9802              (Assembler::Condition)$cmp$$cmpcode);
9803   %}
9804 
9805   ins_pipe(icond_reg);
9806 %}
9807 
9808 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9809   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9810 
9811   ins_cost(INSN_COST * 2);
9812   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9813 
9814   ins_encode %{
9815     __ cselw(as_Register($dst$$reg),
9816              zr,
9817              as_Register($src$$reg),
9818              (Assembler::Condition)$cmp$$cmpcode);
9819   %}
9820 
9821   ins_pipe(icond_reg);
9822 %}
9823 
9824 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9825   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9826 
9827   ins_cost(INSN_COST * 2);
9828   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9829 
9830   ins_encode %{
9831     __ cselw(as_Register($dst$$reg),
9832              zr,
9833              as_Register($src$$reg),
9834              (Assembler::Condition)$cmp$$cmpcode);
9835   %}
9836 
9837   ins_pipe(icond_reg);
9838 %}
9839 
9840 // special case for creating a boolean 0 or 1
9841 
9842 // n.b. this is selected in preference to the rule above because it
9843 // avoids loading constants 0 and 1 into a source register
9844 
9845 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9846   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9847 
9848   ins_cost(INSN_COST * 2);
9849   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9850 
9851   ins_encode %{
9852     // equivalently
9853     // cset(as_Register($dst$$reg),
9854     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9855     __ csincw(as_Register($dst$$reg),
9856              zr,
9857              zr,
9858              (Assembler::Condition)$cmp$$cmpcode);
9859   %}
9860 
9861   ins_pipe(icond_none);
9862 %}
9863 
9864 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9865   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9866 
9867   ins_cost(INSN_COST * 2);
9868   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9869 
9870   ins_encode %{
9871     // equivalently
9872     // cset(as_Register($dst$$reg),
9873     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9874     __ csincw(as_Register($dst$$reg),
9875              zr,
9876              zr,
9877              (Assembler::Condition)$cmp$$cmpcode);
9878   %}
9879 
9880   ins_pipe(icond_none);
9881 %}
9882 
9883 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9884   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9885 
9886   ins_cost(INSN_COST * 2);
9887   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9888 
9889   ins_encode %{
9890     __ csel(as_Register($dst$$reg),
9891             as_Register($src2$$reg),
9892             as_Register($src1$$reg),
9893             (Assembler::Condition)$cmp$$cmpcode);
9894   %}
9895 
9896   ins_pipe(icond_reg_reg);
9897 %}
9898 
9899 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9900   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9901 
9902   ins_cost(INSN_COST * 2);
9903   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9904 
9905   ins_encode %{
9906     __ csel(as_Register($dst$$reg),
9907             as_Register($src2$$reg),
9908             as_Register($src1$$reg),
9909             (Assembler::Condition)$cmp$$cmpcode);
9910   %}
9911 
9912   ins_pipe(icond_reg_reg);
9913 %}
9914 
9915 // special cases where one arg is zero
9916 
9917 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9918   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9919 
9920   ins_cost(INSN_COST * 2);
9921   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9922 
9923   ins_encode %{
9924     __ csel(as_Register($dst$$reg),
9925             zr,
9926             as_Register($src$$reg),
9927             (Assembler::Condition)$cmp$$cmpcode);
9928   %}
9929 
9930   ins_pipe(icond_reg);
9931 %}
9932 
9933 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9934   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9935 
9936   ins_cost(INSN_COST * 2);
9937   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9938 
9939   ins_encode %{
9940     __ csel(as_Register($dst$$reg),
9941             zr,
9942             as_Register($src$$reg),
9943             (Assembler::Condition)$cmp$$cmpcode);
9944   %}
9945 
9946   ins_pipe(icond_reg);
9947 %}
9948 
9949 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9950   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9951 
9952   ins_cost(INSN_COST * 2);
9953   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9954 
9955   ins_encode %{
9956     __ csel(as_Register($dst$$reg),
9957             as_Register($src$$reg),
9958             zr,
9959             (Assembler::Condition)$cmp$$cmpcode);
9960   %}
9961 
9962   ins_pipe(icond_reg);
9963 %}
9964 
9965 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9966   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9967 
9968   ins_cost(INSN_COST * 2);
9969   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9970 
9971   ins_encode %{
9972     __ csel(as_Register($dst$$reg),
9973             as_Register($src$$reg),
9974             zr,
9975             (Assembler::Condition)$cmp$$cmpcode);
9976   %}
9977 
9978   ins_pipe(icond_reg);
9979 %}
9980 
9981 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9982   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9983 
9984   ins_cost(INSN_COST * 2);
9985   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9986 
9987   ins_encode %{
9988     __ csel(as_Register($dst$$reg),
9989             as_Register($src2$$reg),
9990             as_Register($src1$$reg),
9991             (Assembler::Condition)$cmp$$cmpcode);
9992   %}
9993 
9994   ins_pipe(icond_reg_reg);
9995 %}
9996 
9997 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9998   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9999 
10000   ins_cost(INSN_COST * 2);
10001   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10002 
10003   ins_encode %{
10004     __ csel(as_Register($dst$$reg),
10005             as_Register($src2$$reg),
10006             as_Register($src1$$reg),
10007             (Assembler::Condition)$cmp$$cmpcode);
10008   %}
10009 
10010   ins_pipe(icond_reg_reg);
10011 %}
10012 
10013 // special cases where one arg is zero
10014 
10015 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10016   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10017 
10018   ins_cost(INSN_COST * 2);
10019   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10020 
10021   ins_encode %{
10022     __ csel(as_Register($dst$$reg),
10023             zr,
10024             as_Register($src$$reg),
10025             (Assembler::Condition)$cmp$$cmpcode);
10026   %}
10027 
10028   ins_pipe(icond_reg);
10029 %}
10030 
10031 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10032   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10033 
10034   ins_cost(INSN_COST * 2);
10035   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10036 
10037   ins_encode %{
10038     __ csel(as_Register($dst$$reg),
10039             zr,
10040             as_Register($src$$reg),
10041             (Assembler::Condition)$cmp$$cmpcode);
10042   %}
10043 
10044   ins_pipe(icond_reg);
10045 %}
10046 
10047 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10048   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10049 
10050   ins_cost(INSN_COST * 2);
10051   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10052 
10053   ins_encode %{
10054     __ csel(as_Register($dst$$reg),
10055             as_Register($src$$reg),
10056             zr,
10057             (Assembler::Condition)$cmp$$cmpcode);
10058   %}
10059 
10060   ins_pipe(icond_reg);
10061 %}
10062 
10063 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10064   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10065 
10066   ins_cost(INSN_COST * 2);
10067   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10068 
10069   ins_encode %{
10070     __ csel(as_Register($dst$$reg),
10071             as_Register($src$$reg),
10072             zr,
10073             (Assembler::Condition)$cmp$$cmpcode);
10074   %}
10075 
10076   ins_pipe(icond_reg);
10077 %}
10078 
10079 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10080   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10081 
10082   ins_cost(INSN_COST * 2);
10083   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10084 
10085   ins_encode %{
10086     __ cselw(as_Register($dst$$reg),
10087              as_Register($src2$$reg),
10088              as_Register($src1$$reg),
10089              (Assembler::Condition)$cmp$$cmpcode);
10090   %}
10091 
10092   ins_pipe(icond_reg_reg);
10093 %}
10094 
10095 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10096   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10097 
10098   ins_cost(INSN_COST * 2);
10099   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10100 
10101   ins_encode %{
10102     __ cselw(as_Register($dst$$reg),
10103              as_Register($src2$$reg),
10104              as_Register($src1$$reg),
10105              (Assembler::Condition)$cmp$$cmpcode);
10106   %}
10107 
10108   ins_pipe(icond_reg_reg);
10109 %}
10110 
10111 // special cases where one arg is zero
10112 
10113 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10114   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10115 
10116   ins_cost(INSN_COST * 2);
10117   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10118 
10119   ins_encode %{
10120     __ cselw(as_Register($dst$$reg),
10121              zr,
10122              as_Register($src$$reg),
10123              (Assembler::Condition)$cmp$$cmpcode);
10124   %}
10125 
10126   ins_pipe(icond_reg);
10127 %}
10128 
10129 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10130   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10131 
10132   ins_cost(INSN_COST * 2);
10133   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10134 
10135   ins_encode %{
10136     __ cselw(as_Register($dst$$reg),
10137              zr,
10138              as_Register($src$$reg),
10139              (Assembler::Condition)$cmp$$cmpcode);
10140   %}
10141 
10142   ins_pipe(icond_reg);
10143 %}
10144 
10145 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10146   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10147 
10148   ins_cost(INSN_COST * 2);
10149   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10150 
10151   ins_encode %{
10152     __ cselw(as_Register($dst$$reg),
10153              as_Register($src$$reg),
10154              zr,
10155              (Assembler::Condition)$cmp$$cmpcode);
10156   %}
10157 
10158   ins_pipe(icond_reg);
10159 %}
10160 
10161 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10162   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10163 
10164   ins_cost(INSN_COST * 2);
10165   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10166 
10167   ins_encode %{
10168     __ cselw(as_Register($dst$$reg),
10169              as_Register($src$$reg),
10170              zr,
10171              (Assembler::Condition)$cmp$$cmpcode);
10172   %}
10173 
10174   ins_pipe(icond_reg);
10175 %}
10176 
10177 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10178 %{
10179   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10180 
10181   ins_cost(INSN_COST * 3);
10182 
10183   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10184   ins_encode %{
10185     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10186     __ fcsels(as_FloatRegister($dst$$reg),
10187               as_FloatRegister($src2$$reg),
10188               as_FloatRegister($src1$$reg),
10189               cond);
10190   %}
10191 
10192   ins_pipe(fp_cond_reg_reg_s);
10193 %}
10194 
10195 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10196 %{
10197   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10198 
10199   ins_cost(INSN_COST * 3);
10200 
10201   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10202   ins_encode %{
10203     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10204     __ fcsels(as_FloatRegister($dst$$reg),
10205               as_FloatRegister($src2$$reg),
10206               as_FloatRegister($src1$$reg),
10207               cond);
10208   %}
10209 
10210   ins_pipe(fp_cond_reg_reg_s);
10211 %}
10212 
10213 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10214 %{
10215   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10216 
10217   ins_cost(INSN_COST * 3);
10218 
10219   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10220   ins_encode %{
10221     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10222     __ fcseld(as_FloatRegister($dst$$reg),
10223               as_FloatRegister($src2$$reg),
10224               as_FloatRegister($src1$$reg),
10225               cond);
10226   %}
10227 
10228   ins_pipe(fp_cond_reg_reg_d);
10229 %}
10230 
10231 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10232 %{
10233   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10234 
10235   ins_cost(INSN_COST * 3);
10236 
10237   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10238   ins_encode %{
10239     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10240     __ fcseld(as_FloatRegister($dst$$reg),
10241               as_FloatRegister($src2$$reg),
10242               as_FloatRegister($src1$$reg),
10243               cond);
10244   %}
10245 
10246   ins_pipe(fp_cond_reg_reg_d);
10247 %}
10248 
10249 // ============================================================================
10250 // Arithmetic Instructions
10251 //
10252 
10253 // Integer Addition
10254 
10255 // TODO
10256 // these currently employ operations which do not set CR and hence are
10257 // not flagged as killing CR but we would like to isolate the cases
10258 // where we want to set flags from those where we don't. need to work
10259 // out how to do that.
10260 
10261 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10262   match(Set dst (AddI src1 src2));
10263 
10264   ins_cost(INSN_COST);
10265   format %{ "addw  $dst, $src1, $src2" %}
10266 
10267   ins_encode %{
10268     __ addw(as_Register($dst$$reg),
10269             as_Register($src1$$reg),
10270             as_Register($src2$$reg));
10271   %}
10272 
10273   ins_pipe(ialu_reg_reg);
10274 %}
10275 
10276 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10277   match(Set dst (AddI src1 src2));
10278 
10279   ins_cost(INSN_COST);
10280   format %{ "addw $dst, $src1, $src2" %}
10281 
10282   // use opcode to indicate that this is an add not a sub
10283   opcode(0x0);
10284 
10285   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10286 
10287   ins_pipe(ialu_reg_imm);
10288 %}
10289 
10290 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10291   match(Set dst (AddI (ConvL2I src1) src2));
10292 
10293   ins_cost(INSN_COST);
10294   format %{ "addw $dst, $src1, $src2" %}
10295 
10296   // use opcode to indicate that this is an add not a sub
10297   opcode(0x0);
10298 
10299   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10300 
10301   ins_pipe(ialu_reg_imm);
10302 %}
10303 
10304 // Pointer Addition
10305 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10306   match(Set dst (AddP src1 src2));
10307 
10308   ins_cost(INSN_COST);
10309   format %{ "add $dst, $src1, $src2\t# ptr" %}
10310 
10311   ins_encode %{
10312     __ add(as_Register($dst$$reg),
10313            as_Register($src1$$reg),
10314            as_Register($src2$$reg));
10315   %}
10316 
10317   ins_pipe(ialu_reg_reg);
10318 %}
10319 
10320 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10321   match(Set dst (AddP src1 (ConvI2L src2)));
10322 
10323   ins_cost(1.9 * INSN_COST);
10324   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10325 
10326   ins_encode %{
10327     __ add(as_Register($dst$$reg),
10328            as_Register($src1$$reg),
10329            as_Register($src2$$reg), ext::sxtw);
10330   %}
10331 
10332   ins_pipe(ialu_reg_reg);
10333 %}
10334 
10335 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10336   match(Set dst (AddP src1 (LShiftL src2 scale)));
10337 
10338   ins_cost(1.9 * INSN_COST);
10339   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10340 
10341   ins_encode %{
10342     __ lea(as_Register($dst$$reg),
10343            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10344                    Address::lsl($scale$$constant)));
10345   %}
10346 
10347   ins_pipe(ialu_reg_reg_shift);
10348 %}
10349 
10350 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10351   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10352 
10353   ins_cost(1.9 * INSN_COST);
10354   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10355 
10356   ins_encode %{
10357     __ lea(as_Register($dst$$reg),
10358            Address(as_Register($src1$$reg), as_Register($src2$$reg),
10359                    Address::sxtw($scale$$constant)));
10360   %}
10361 
10362   ins_pipe(ialu_reg_reg_shift);
10363 %}
10364 
10365 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10366   match(Set dst (LShiftL (ConvI2L src) scale));
10367 
10368   ins_cost(INSN_COST);
10369   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10370 
10371   ins_encode %{
10372     __ sbfiz(as_Register($dst$$reg),
10373           as_Register($src$$reg),
10374           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10375   %}
10376 
10377   ins_pipe(ialu_reg_shift);
10378 %}
10379 
10380 // Pointer Immediate Addition
10381 // n.b. this needs to be more expensive than using an indirect memory
10382 // operand
10383 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10384   match(Set dst (AddP src1 src2));
10385 
10386   ins_cost(INSN_COST);
10387   format %{ "add $dst, $src1, $src2\t# ptr" %}
10388 
10389   // use opcode to indicate that this is an add not a sub
10390   opcode(0x0);
10391 
10392   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10393 
10394   ins_pipe(ialu_reg_imm);
10395 %}
10396 
10397 // Long Addition
10398 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10399 
10400   match(Set dst (AddL src1 src2));
10401 
10402   ins_cost(INSN_COST);
10403   format %{ "add  $dst, $src1, $src2" %}
10404 
10405   ins_encode %{
10406     __ add(as_Register($dst$$reg),
10407            as_Register($src1$$reg),
10408            as_Register($src2$$reg));
10409   %}
10410 
10411   ins_pipe(ialu_reg_reg);
10412 %}
10413 
10414 // No constant pool entries requiredLong Immediate Addition.
10415 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10416   match(Set dst (AddL src1 src2));
10417 
10418   ins_cost(INSN_COST);
10419   format %{ "add $dst, $src1, $src2" %}
10420 
10421   // use opcode to indicate that this is an add not a sub
10422   opcode(0x0);
10423 
10424   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10425 
10426   ins_pipe(ialu_reg_imm);
10427 %}
10428 
10429 // Integer Subtraction
10430 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10431   match(Set dst (SubI src1 src2));
10432 
10433   ins_cost(INSN_COST);
10434   format %{ "subw  $dst, $src1, $src2" %}
10435 
10436   ins_encode %{
10437     __ subw(as_Register($dst$$reg),
10438             as_Register($src1$$reg),
10439             as_Register($src2$$reg));
10440   %}
10441 
10442   ins_pipe(ialu_reg_reg);
10443 %}
10444 
10445 // Immediate Subtraction
10446 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10447   match(Set dst (SubI src1 src2));
10448 
10449   ins_cost(INSN_COST);
10450   format %{ "subw $dst, $src1, $src2" %}
10451 
10452   // use opcode to indicate that this is a sub not an add
10453   opcode(0x1);
10454 
10455   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10456 
10457   ins_pipe(ialu_reg_imm);
10458 %}
10459 
10460 // Long Subtraction
10461 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10462 
10463   match(Set dst (SubL src1 src2));
10464 
10465   ins_cost(INSN_COST);
10466   format %{ "sub  $dst, $src1, $src2" %}
10467 
10468   ins_encode %{
10469     __ sub(as_Register($dst$$reg),
10470            as_Register($src1$$reg),
10471            as_Register($src2$$reg));
10472   %}
10473 
10474   ins_pipe(ialu_reg_reg);
10475 %}
10476 
10477 // No constant pool entries requiredLong Immediate Subtraction.
10478 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10479   match(Set dst (SubL src1 src2));
10480 
10481   ins_cost(INSN_COST);
10482   format %{ "sub$dst, $src1, $src2" %}
10483 
10484   // use opcode to indicate that this is a sub not an add
10485   opcode(0x1);
10486 
10487   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10488 
10489   ins_pipe(ialu_reg_imm);
10490 %}
10491 
10492 // Integer Negation (special case for sub)
10493 
10494 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10495   match(Set dst (SubI zero src));
10496 
10497   ins_cost(INSN_COST);
10498   format %{ "negw $dst, $src\t# int" %}
10499 
10500   ins_encode %{
10501     __ negw(as_Register($dst$$reg),
10502             as_Register($src$$reg));
10503   %}
10504 
10505   ins_pipe(ialu_reg);
10506 %}
10507 
10508 // Long Negation
10509 
10510 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
10511   match(Set dst (SubL zero src));
10512 
10513   ins_cost(INSN_COST);
10514   format %{ "neg $dst, $src\t# long" %}
10515 
10516   ins_encode %{
10517     __ neg(as_Register($dst$$reg),
10518            as_Register($src$$reg));
10519   %}
10520 
10521   ins_pipe(ialu_reg);
10522 %}
10523 
10524 // Integer Multiply
10525 
10526 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10527   match(Set dst (MulI src1 src2));
10528 
10529   ins_cost(INSN_COST * 3);
10530   format %{ "mulw  $dst, $src1, $src2" %}
10531 
10532   ins_encode %{
10533     __ mulw(as_Register($dst$$reg),
10534             as_Register($src1$$reg),
10535             as_Register($src2$$reg));
10536   %}
10537 
10538   ins_pipe(imul_reg_reg);
10539 %}
10540 
10541 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10542   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
10543 
10544   ins_cost(INSN_COST * 3);
10545   format %{ "smull  $dst, $src1, $src2" %}
10546 
10547   ins_encode %{
10548     __ smull(as_Register($dst$$reg),
10549              as_Register($src1$$reg),
10550              as_Register($src2$$reg));
10551   %}
10552 
10553   ins_pipe(imul_reg_reg);
10554 %}
10555 
10556 // Long Multiply
10557 
10558 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10559   match(Set dst (MulL src1 src2));
10560 
10561   ins_cost(INSN_COST * 5);
10562   format %{ "mul  $dst, $src1, $src2" %}
10563 
10564   ins_encode %{
10565     __ mul(as_Register($dst$$reg),
10566            as_Register($src1$$reg),
10567            as_Register($src2$$reg));
10568   %}
10569 
10570   ins_pipe(lmul_reg_reg);
10571 %}
10572 
10573 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
10574 %{
10575   match(Set dst (MulHiL src1 src2));
10576 
10577   ins_cost(INSN_COST * 7);
10578   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
10579 
10580   ins_encode %{
10581     __ smulh(as_Register($dst$$reg),
10582              as_Register($src1$$reg),
10583              as_Register($src2$$reg));
10584   %}
10585 
10586   ins_pipe(lmul_reg_reg);
10587 %}
10588 
10589 // Combined Integer Multiply & Add/Sub
10590 
10591 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10592   match(Set dst (AddI src3 (MulI src1 src2)));
10593 
10594   ins_cost(INSN_COST * 3);
10595   format %{ "madd  $dst, $src1, $src2, $src3" %}
10596 
10597   ins_encode %{
10598     __ maddw(as_Register($dst$$reg),
10599              as_Register($src1$$reg),
10600              as_Register($src2$$reg),
10601              as_Register($src3$$reg));
10602   %}
10603 
10604   ins_pipe(imac_reg_reg);
10605 %}
10606 
10607 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10608   match(Set dst (SubI src3 (MulI src1 src2)));
10609 
10610   ins_cost(INSN_COST * 3);
10611   format %{ "msub  $dst, $src1, $src2, $src3" %}
10612 
10613   ins_encode %{
10614     __ msubw(as_Register($dst$$reg),
10615              as_Register($src1$$reg),
10616              as_Register($src2$$reg),
10617              as_Register($src3$$reg));
10618   %}
10619 
10620   ins_pipe(imac_reg_reg);
10621 %}
10622 
10623 // Combined Long Multiply & Add/Sub
10624 
10625 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10626   match(Set dst (AddL src3 (MulL src1 src2)));
10627 
10628   ins_cost(INSN_COST * 5);
10629   format %{ "madd  $dst, $src1, $src2, $src3" %}
10630 
10631   ins_encode %{
10632     __ madd(as_Register($dst$$reg),
10633             as_Register($src1$$reg),
10634             as_Register($src2$$reg),
10635             as_Register($src3$$reg));
10636   %}
10637 
10638   ins_pipe(lmac_reg_reg);
10639 %}
10640 
10641 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10642   match(Set dst (SubL src3 (MulL src1 src2)));
10643 
10644   ins_cost(INSN_COST * 5);
10645   format %{ "msub  $dst, $src1, $src2, $src3" %}
10646 
10647   ins_encode %{
10648     __ msub(as_Register($dst$$reg),
10649             as_Register($src1$$reg),
10650             as_Register($src2$$reg),
10651             as_Register($src3$$reg));
10652   %}
10653 
10654   ins_pipe(lmac_reg_reg);
10655 %}
10656 
10657 // Integer Divide
10658 
10659 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10660   match(Set dst (DivI src1 src2));
10661 
10662   ins_cost(INSN_COST * 19);
10663   format %{ "sdivw  $dst, $src1, $src2" %}
10664 
10665   ins_encode(aarch64_enc_divw(dst, src1, src2));
10666   ins_pipe(idiv_reg_reg);
10667 %}
10668 
10669 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10670   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10671   ins_cost(INSN_COST);
10672   format %{ "lsrw $dst, $src1, $div1" %}
10673   ins_encode %{
10674     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10675   %}
10676   ins_pipe(ialu_reg_shift);
10677 %}
10678 
10679 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10680   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10681   ins_cost(INSN_COST);
10682   format %{ "addw $dst, $src, LSR $div1" %}
10683 
10684   ins_encode %{
10685     __ addw(as_Register($dst$$reg),
10686               as_Register($src$$reg),
10687               as_Register($src$$reg),
10688               Assembler::LSR, 31);
10689   %}
10690   ins_pipe(ialu_reg);
10691 %}
10692 
10693 // Long Divide
10694 
10695 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10696   match(Set dst (DivL src1 src2));
10697 
10698   ins_cost(INSN_COST * 35);
10699   format %{ "sdiv   $dst, $src1, $src2" %}
10700 
10701   ins_encode(aarch64_enc_div(dst, src1, src2));
10702   ins_pipe(ldiv_reg_reg);
10703 %}
10704 
10705 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
10706   match(Set dst (URShiftL (RShiftL src1 div1) div2));
10707   ins_cost(INSN_COST);
10708   format %{ "lsr $dst, $src1, $div1" %}
10709   ins_encode %{
10710     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
10711   %}
10712   ins_pipe(ialu_reg_shift);
10713 %}
10714 
10715 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
10716   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
10717   ins_cost(INSN_COST);
10718   format %{ "add $dst, $src, $div1" %}
10719 
10720   ins_encode %{
10721     __ add(as_Register($dst$$reg),
10722               as_Register($src$$reg),
10723               as_Register($src$$reg),
10724               Assembler::LSR, 63);
10725   %}
10726   ins_pipe(ialu_reg);
10727 %}
10728 
10729 // Integer Remainder
10730 
10731 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10732   match(Set dst (ModI src1 src2));
10733 
10734   ins_cost(INSN_COST * 22);
10735   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10736             "msubw($dst, rscratch1, $src2, $src1" %}
10737 
10738   ins_encode(aarch64_enc_modw(dst, src1, src2));
10739   ins_pipe(idiv_reg_reg);
10740 %}
10741 
10742 // Long Remainder
10743 
10744 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10745   match(Set dst (ModL src1 src2));
10746 
10747   ins_cost(INSN_COST * 38);
10748   format %{ "sdiv   rscratch1, $src1, $src2\n"
10749             "msub($dst, rscratch1, $src2, $src1" %}
10750 
10751   ins_encode(aarch64_enc_mod(dst, src1, src2));
10752   ins_pipe(ldiv_reg_reg);
10753 %}
10754 
10755 // Integer Shifts
10756 
10757 // Shift Left Register
10758 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10759   match(Set dst (LShiftI src1 src2));
10760 
10761   ins_cost(INSN_COST * 2);
10762   format %{ "lslvw  $dst, $src1, $src2" %}
10763 
10764   ins_encode %{
10765     __ lslvw(as_Register($dst$$reg),
10766              as_Register($src1$$reg),
10767              as_Register($src2$$reg));
10768   %}
10769 
10770   ins_pipe(ialu_reg_reg_vshift);
10771 %}
10772 
10773 // Shift Left Immediate
10774 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10775   match(Set dst (LShiftI src1 src2));
10776 
10777   ins_cost(INSN_COST);
10778   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10779 
10780   ins_encode %{
10781     __ lslw(as_Register($dst$$reg),
10782             as_Register($src1$$reg),
10783             $src2$$constant & 0x1f);
10784   %}
10785 
10786   ins_pipe(ialu_reg_shift);
10787 %}
10788 
10789 // Shift Right Logical Register
10790 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10791   match(Set dst (URShiftI src1 src2));
10792 
10793   ins_cost(INSN_COST * 2);
10794   format %{ "lsrvw  $dst, $src1, $src2" %}
10795 
10796   ins_encode %{
10797     __ lsrvw(as_Register($dst$$reg),
10798              as_Register($src1$$reg),
10799              as_Register($src2$$reg));
10800   %}
10801 
10802   ins_pipe(ialu_reg_reg_vshift);
10803 %}
10804 
10805 // Shift Right Logical Immediate
10806 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10807   match(Set dst (URShiftI src1 src2));
10808 
10809   ins_cost(INSN_COST);
10810   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10811 
10812   ins_encode %{
10813     __ lsrw(as_Register($dst$$reg),
10814             as_Register($src1$$reg),
10815             $src2$$constant & 0x1f);
10816   %}
10817 
10818   ins_pipe(ialu_reg_shift);
10819 %}
10820 
10821 // Shift Right Arithmetic Register
10822 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10823   match(Set dst (RShiftI src1 src2));
10824 
10825   ins_cost(INSN_COST * 2);
10826   format %{ "asrvw  $dst, $src1, $src2" %}
10827 
10828   ins_encode %{
10829     __ asrvw(as_Register($dst$$reg),
10830              as_Register($src1$$reg),
10831              as_Register($src2$$reg));
10832   %}
10833 
10834   ins_pipe(ialu_reg_reg_vshift);
10835 %}
10836 
10837 // Shift Right Arithmetic Immediate
10838 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10839   match(Set dst (RShiftI src1 src2));
10840 
10841   ins_cost(INSN_COST);
10842   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10843 
10844   ins_encode %{
10845     __ asrw(as_Register($dst$$reg),
10846             as_Register($src1$$reg),
10847             $src2$$constant & 0x1f);
10848   %}
10849 
10850   ins_pipe(ialu_reg_shift);
10851 %}
10852 
10853 // Combined Int Mask and Right Shift (using UBFM)
10854 // TODO
10855 
10856 // Long Shifts
10857 
10858 // Shift Left Register
10859 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10860   match(Set dst (LShiftL src1 src2));
10861 
10862   ins_cost(INSN_COST * 2);
10863   format %{ "lslv  $dst, $src1, $src2" %}
10864 
10865   ins_encode %{
10866     __ lslv(as_Register($dst$$reg),
10867             as_Register($src1$$reg),
10868             as_Register($src2$$reg));
10869   %}
10870 
10871   ins_pipe(ialu_reg_reg_vshift);
10872 %}
10873 
10874 // Shift Left Immediate
10875 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10876   match(Set dst (LShiftL src1 src2));
10877 
10878   ins_cost(INSN_COST);
10879   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10880 
10881   ins_encode %{
10882     __ lsl(as_Register($dst$$reg),
10883             as_Register($src1$$reg),
10884             $src2$$constant & 0x3f);
10885   %}
10886 
10887   ins_pipe(ialu_reg_shift);
10888 %}
10889 
10890 // Shift Right Logical Register
10891 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10892   match(Set dst (URShiftL src1 src2));
10893 
10894   ins_cost(INSN_COST * 2);
10895   format %{ "lsrv  $dst, $src1, $src2" %}
10896 
10897   ins_encode %{
10898     __ lsrv(as_Register($dst$$reg),
10899             as_Register($src1$$reg),
10900             as_Register($src2$$reg));
10901   %}
10902 
10903   ins_pipe(ialu_reg_reg_vshift);
10904 %}
10905 
10906 // Shift Right Logical Immediate
10907 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10908   match(Set dst (URShiftL src1 src2));
10909 
10910   ins_cost(INSN_COST);
10911   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10912 
10913   ins_encode %{
10914     __ lsr(as_Register($dst$$reg),
10915            as_Register($src1$$reg),
10916            $src2$$constant & 0x3f);
10917   %}
10918 
10919   ins_pipe(ialu_reg_shift);
10920 %}
10921 
10922 // A special-case pattern for card table stores.
10923 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10924   match(Set dst (URShiftL (CastP2X src1) src2));
10925 
10926   ins_cost(INSN_COST);
10927   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10928 
10929   ins_encode %{
10930     __ lsr(as_Register($dst$$reg),
10931            as_Register($src1$$reg),
10932            $src2$$constant & 0x3f);
10933   %}
10934 
10935   ins_pipe(ialu_reg_shift);
10936 %}
10937 
10938 // Shift Right Arithmetic Register
10939 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10940   match(Set dst (RShiftL src1 src2));
10941 
10942   ins_cost(INSN_COST * 2);
10943   format %{ "asrv  $dst, $src1, $src2" %}
10944 
10945   ins_encode %{
10946     __ asrv(as_Register($dst$$reg),
10947             as_Register($src1$$reg),
10948             as_Register($src2$$reg));
10949   %}
10950 
10951   ins_pipe(ialu_reg_reg_vshift);
10952 %}
10953 
10954 // Shift Right Arithmetic Immediate
10955 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10956   match(Set dst (RShiftL src1 src2));
10957 
10958   ins_cost(INSN_COST);
10959   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10960 
10961   ins_encode %{
10962     __ asr(as_Register($dst$$reg),
10963            as_Register($src1$$reg),
10964            $src2$$constant & 0x3f);
10965   %}
10966 
10967   ins_pipe(ialu_reg_shift);
10968 %}
10969 
10970 // BEGIN This section of the file is automatically generated. Do not edit --------------
10971 
10972 instruct regL_not_reg(iRegLNoSp dst,
10973                          iRegL src1, immL_M1 m1,
10974                          rFlagsReg cr) %{
10975   match(Set dst (XorL src1 m1));
10976   ins_cost(INSN_COST);
10977   format %{ "eon  $dst, $src1, zr" %}
10978 
10979   ins_encode %{
10980     __ eon(as_Register($dst$$reg),
10981               as_Register($src1$$reg),
10982               zr,
10983               Assembler::LSL, 0);
10984   %}
10985 
10986   ins_pipe(ialu_reg);
10987 %}
10988 instruct regI_not_reg(iRegINoSp dst,
10989                          iRegIorL2I src1, immI_M1 m1,
10990                          rFlagsReg cr) %{
10991   match(Set dst (XorI src1 m1));
10992   ins_cost(INSN_COST);
10993   format %{ "eonw  $dst, $src1, zr" %}
10994 
10995   ins_encode %{
10996     __ eonw(as_Register($dst$$reg),
10997               as_Register($src1$$reg),
10998               zr,
10999               Assembler::LSL, 0);
11000   %}
11001 
11002   ins_pipe(ialu_reg);
11003 %}
11004 
11005 instruct AndI_reg_not_reg(iRegINoSp dst,
11006                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11007                          rFlagsReg cr) %{
11008   match(Set dst (AndI src1 (XorI src2 m1)));
11009   ins_cost(INSN_COST);
11010   format %{ "bicw  $dst, $src1, $src2" %}
11011 
11012   ins_encode %{
11013     __ bicw(as_Register($dst$$reg),
11014               as_Register($src1$$reg),
11015               as_Register($src2$$reg),
11016               Assembler::LSL, 0);
11017   %}
11018 
11019   ins_pipe(ialu_reg_reg);
11020 %}
11021 
11022 instruct AndL_reg_not_reg(iRegLNoSp dst,
11023                          iRegL src1, iRegL src2, immL_M1 m1,
11024                          rFlagsReg cr) %{
11025   match(Set dst (AndL src1 (XorL src2 m1)));
11026   ins_cost(INSN_COST);
11027   format %{ "bic  $dst, $src1, $src2" %}
11028 
11029   ins_encode %{
11030     __ bic(as_Register($dst$$reg),
11031               as_Register($src1$$reg),
11032               as_Register($src2$$reg),
11033               Assembler::LSL, 0);
11034   %}
11035 
11036   ins_pipe(ialu_reg_reg);
11037 %}
11038 
11039 instruct OrI_reg_not_reg(iRegINoSp dst,
11040                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11041                          rFlagsReg cr) %{
11042   match(Set dst (OrI src1 (XorI src2 m1)));
11043   ins_cost(INSN_COST);
11044   format %{ "ornw  $dst, $src1, $src2" %}
11045 
11046   ins_encode %{
11047     __ ornw(as_Register($dst$$reg),
11048               as_Register($src1$$reg),
11049               as_Register($src2$$reg),
11050               Assembler::LSL, 0);
11051   %}
11052 
11053   ins_pipe(ialu_reg_reg);
11054 %}
11055 
11056 instruct OrL_reg_not_reg(iRegLNoSp dst,
11057                          iRegL src1, iRegL src2, immL_M1 m1,
11058                          rFlagsReg cr) %{
11059   match(Set dst (OrL src1 (XorL src2 m1)));
11060   ins_cost(INSN_COST);
11061   format %{ "orn  $dst, $src1, $src2" %}
11062 
11063   ins_encode %{
11064     __ orn(as_Register($dst$$reg),
11065               as_Register($src1$$reg),
11066               as_Register($src2$$reg),
11067               Assembler::LSL, 0);
11068   %}
11069 
11070   ins_pipe(ialu_reg_reg);
11071 %}
11072 
11073 instruct XorI_reg_not_reg(iRegINoSp dst,
11074                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11075                          rFlagsReg cr) %{
11076   match(Set dst (XorI m1 (XorI src2 src1)));
11077   ins_cost(INSN_COST);
11078   format %{ "eonw  $dst, $src1, $src2" %}
11079 
11080   ins_encode %{
11081     __ eonw(as_Register($dst$$reg),
11082               as_Register($src1$$reg),
11083               as_Register($src2$$reg),
11084               Assembler::LSL, 0);
11085   %}
11086 
11087   ins_pipe(ialu_reg_reg);
11088 %}
11089 
11090 instruct XorL_reg_not_reg(iRegLNoSp dst,
11091                          iRegL src1, iRegL src2, immL_M1 m1,
11092                          rFlagsReg cr) %{
11093   match(Set dst (XorL m1 (XorL src2 src1)));
11094   ins_cost(INSN_COST);
11095   format %{ "eon  $dst, $src1, $src2" %}
11096 
11097   ins_encode %{
11098     __ eon(as_Register($dst$$reg),
11099               as_Register($src1$$reg),
11100               as_Register($src2$$reg),
11101               Assembler::LSL, 0);
11102   %}
11103 
11104   ins_pipe(ialu_reg_reg);
11105 %}
11106 
11107 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11108                          iRegIorL2I src1, iRegIorL2I src2,
11109                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11110   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11111   ins_cost(1.9 * INSN_COST);
11112   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11113 
11114   ins_encode %{
11115     __ bicw(as_Register($dst$$reg),
11116               as_Register($src1$$reg),
11117               as_Register($src2$$reg),
11118               Assembler::LSR,
11119               $src3$$constant & 0x1f);
11120   %}
11121 
11122   ins_pipe(ialu_reg_reg_shift);
11123 %}
11124 
11125 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11126                          iRegL src1, iRegL src2,
11127                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11128   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11129   ins_cost(1.9 * INSN_COST);
11130   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11131 
11132   ins_encode %{
11133     __ bic(as_Register($dst$$reg),
11134               as_Register($src1$$reg),
11135               as_Register($src2$$reg),
11136               Assembler::LSR,
11137               $src3$$constant & 0x3f);
11138   %}
11139 
11140   ins_pipe(ialu_reg_reg_shift);
11141 %}
11142 
11143 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11144                          iRegIorL2I src1, iRegIorL2I src2,
11145                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11146   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11147   ins_cost(1.9 * INSN_COST);
11148   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11149 
11150   ins_encode %{
11151     __ bicw(as_Register($dst$$reg),
11152               as_Register($src1$$reg),
11153               as_Register($src2$$reg),
11154               Assembler::ASR,
11155               $src3$$constant & 0x1f);
11156   %}
11157 
11158   ins_pipe(ialu_reg_reg_shift);
11159 %}
11160 
11161 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11162                          iRegL src1, iRegL src2,
11163                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11164   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11165   ins_cost(1.9 * INSN_COST);
11166   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11167 
11168   ins_encode %{
11169     __ bic(as_Register($dst$$reg),
11170               as_Register($src1$$reg),
11171               as_Register($src2$$reg),
11172               Assembler::ASR,
11173               $src3$$constant & 0x3f);
11174   %}
11175 
11176   ins_pipe(ialu_reg_reg_shift);
11177 %}
11178 
11179 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11180                          iRegIorL2I src1, iRegIorL2I src2,
11181                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11182   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11183   ins_cost(1.9 * INSN_COST);
11184   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11185 
11186   ins_encode %{
11187     __ bicw(as_Register($dst$$reg),
11188               as_Register($src1$$reg),
11189               as_Register($src2$$reg),
11190               Assembler::LSL,
11191               $src3$$constant & 0x1f);
11192   %}
11193 
11194   ins_pipe(ialu_reg_reg_shift);
11195 %}
11196 
11197 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11198                          iRegL src1, iRegL src2,
11199                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11200   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11201   ins_cost(1.9 * INSN_COST);
11202   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11203 
11204   ins_encode %{
11205     __ bic(as_Register($dst$$reg),
11206               as_Register($src1$$reg),
11207               as_Register($src2$$reg),
11208               Assembler::LSL,
11209               $src3$$constant & 0x3f);
11210   %}
11211 
11212   ins_pipe(ialu_reg_reg_shift);
11213 %}
11214 
11215 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11216                          iRegIorL2I src1, iRegIorL2I src2,
11217                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11218   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11219   ins_cost(1.9 * INSN_COST);
11220   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11221 
11222   ins_encode %{
11223     __ eonw(as_Register($dst$$reg),
11224               as_Register($src1$$reg),
11225               as_Register($src2$$reg),
11226               Assembler::LSR,
11227               $src3$$constant & 0x1f);
11228   %}
11229 
11230   ins_pipe(ialu_reg_reg_shift);
11231 %}
11232 
11233 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11234                          iRegL src1, iRegL src2,
11235                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11236   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11237   ins_cost(1.9 * INSN_COST);
11238   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11239 
11240   ins_encode %{
11241     __ eon(as_Register($dst$$reg),
11242               as_Register($src1$$reg),
11243               as_Register($src2$$reg),
11244               Assembler::LSR,
11245               $src3$$constant & 0x3f);
11246   %}
11247 
11248   ins_pipe(ialu_reg_reg_shift);
11249 %}
11250 
11251 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11252                          iRegIorL2I src1, iRegIorL2I src2,
11253                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11254   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11255   ins_cost(1.9 * INSN_COST);
11256   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11257 
11258   ins_encode %{
11259     __ eonw(as_Register($dst$$reg),
11260               as_Register($src1$$reg),
11261               as_Register($src2$$reg),
11262               Assembler::ASR,
11263               $src3$$constant & 0x1f);
11264   %}
11265 
11266   ins_pipe(ialu_reg_reg_shift);
11267 %}
11268 
11269 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11270                          iRegL src1, iRegL src2,
11271                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11272   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11273   ins_cost(1.9 * INSN_COST);
11274   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
11275 
11276   ins_encode %{
11277     __ eon(as_Register($dst$$reg),
11278               as_Register($src1$$reg),
11279               as_Register($src2$$reg),
11280               Assembler::ASR,
11281               $src3$$constant & 0x3f);
11282   %}
11283 
11284   ins_pipe(ialu_reg_reg_shift);
11285 %}
11286 
11287 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11288                          iRegIorL2I src1, iRegIorL2I src2,
11289                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11290   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11291   ins_cost(1.9 * INSN_COST);
11292   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
11293 
11294   ins_encode %{
11295     __ eonw(as_Register($dst$$reg),
11296               as_Register($src1$$reg),
11297               as_Register($src2$$reg),
11298               Assembler::LSL,
11299               $src3$$constant & 0x1f);
11300   %}
11301 
11302   ins_pipe(ialu_reg_reg_shift);
11303 %}
11304 
11305 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11306                          iRegL src1, iRegL src2,
11307                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11308   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11309   ins_cost(1.9 * INSN_COST);
11310   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
11311 
11312   ins_encode %{
11313     __ eon(as_Register($dst$$reg),
11314               as_Register($src1$$reg),
11315               as_Register($src2$$reg),
11316               Assembler::LSL,
11317               $src3$$constant & 0x3f);
11318   %}
11319 
11320   ins_pipe(ialu_reg_reg_shift);
11321 %}
11322 
11323 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11324                          iRegIorL2I src1, iRegIorL2I src2,
11325                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11326   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11327   ins_cost(1.9 * INSN_COST);
11328   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
11329 
11330   ins_encode %{
11331     __ ornw(as_Register($dst$$reg),
11332               as_Register($src1$$reg),
11333               as_Register($src2$$reg),
11334               Assembler::LSR,
11335               $src3$$constant & 0x1f);
11336   %}
11337 
11338   ins_pipe(ialu_reg_reg_shift);
11339 %}
11340 
11341 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11342                          iRegL src1, iRegL src2,
11343                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11344   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11345   ins_cost(1.9 * INSN_COST);
11346   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
11347 
11348   ins_encode %{
11349     __ orn(as_Register($dst$$reg),
11350               as_Register($src1$$reg),
11351               as_Register($src2$$reg),
11352               Assembler::LSR,
11353               $src3$$constant & 0x3f);
11354   %}
11355 
11356   ins_pipe(ialu_reg_reg_shift);
11357 %}
11358 
11359 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11360                          iRegIorL2I src1, iRegIorL2I src2,
11361                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11362   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11363   ins_cost(1.9 * INSN_COST);
11364   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
11365 
11366   ins_encode %{
11367     __ ornw(as_Register($dst$$reg),
11368               as_Register($src1$$reg),
11369               as_Register($src2$$reg),
11370               Assembler::ASR,
11371               $src3$$constant & 0x1f);
11372   %}
11373 
11374   ins_pipe(ialu_reg_reg_shift);
11375 %}
11376 
11377 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11378                          iRegL src1, iRegL src2,
11379                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11380   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11381   ins_cost(1.9 * INSN_COST);
11382   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
11383 
11384   ins_encode %{
11385     __ orn(as_Register($dst$$reg),
11386               as_Register($src1$$reg),
11387               as_Register($src2$$reg),
11388               Assembler::ASR,
11389               $src3$$constant & 0x3f);
11390   %}
11391 
11392   ins_pipe(ialu_reg_reg_shift);
11393 %}
11394 
11395 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11396                          iRegIorL2I src1, iRegIorL2I src2,
11397                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11398   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11399   ins_cost(1.9 * INSN_COST);
11400   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
11401 
11402   ins_encode %{
11403     __ ornw(as_Register($dst$$reg),
11404               as_Register($src1$$reg),
11405               as_Register($src2$$reg),
11406               Assembler::LSL,
11407               $src3$$constant & 0x1f);
11408   %}
11409 
11410   ins_pipe(ialu_reg_reg_shift);
11411 %}
11412 
11413 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11414                          iRegL src1, iRegL src2,
11415                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11416   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11417   ins_cost(1.9 * INSN_COST);
11418   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
11419 
11420   ins_encode %{
11421     __ orn(as_Register($dst$$reg),
11422               as_Register($src1$$reg),
11423               as_Register($src2$$reg),
11424               Assembler::LSL,
11425               $src3$$constant & 0x3f);
11426   %}
11427 
11428   ins_pipe(ialu_reg_reg_shift);
11429 %}
11430 
11431 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11432                          iRegIorL2I src1, iRegIorL2I src2,
11433                          immI src3, rFlagsReg cr) %{
11434   match(Set dst (AndI src1 (URShiftI src2 src3)));
11435 
11436   ins_cost(1.9 * INSN_COST);
11437   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
11438 
11439   ins_encode %{
11440     __ andw(as_Register($dst$$reg),
11441               as_Register($src1$$reg),
11442               as_Register($src2$$reg),
11443               Assembler::LSR,
11444               $src3$$constant & 0x1f);
11445   %}
11446 
11447   ins_pipe(ialu_reg_reg_shift);
11448 %}
11449 
11450 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11451                          iRegL src1, iRegL src2,
11452                          immI src3, rFlagsReg cr) %{
11453   match(Set dst (AndL src1 (URShiftL src2 src3)));
11454 
11455   ins_cost(1.9 * INSN_COST);
11456   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
11457 
11458   ins_encode %{
11459     __ andr(as_Register($dst$$reg),
11460               as_Register($src1$$reg),
11461               as_Register($src2$$reg),
11462               Assembler::LSR,
11463               $src3$$constant & 0x3f);
11464   %}
11465 
11466   ins_pipe(ialu_reg_reg_shift);
11467 %}
11468 
11469 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11470                          iRegIorL2I src1, iRegIorL2I src2,
11471                          immI src3, rFlagsReg cr) %{
11472   match(Set dst (AndI src1 (RShiftI src2 src3)));
11473 
11474   ins_cost(1.9 * INSN_COST);
11475   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
11476 
11477   ins_encode %{
11478     __ andw(as_Register($dst$$reg),
11479               as_Register($src1$$reg),
11480               as_Register($src2$$reg),
11481               Assembler::ASR,
11482               $src3$$constant & 0x1f);
11483   %}
11484 
11485   ins_pipe(ialu_reg_reg_shift);
11486 %}
11487 
11488 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11489                          iRegL src1, iRegL src2,
11490                          immI src3, rFlagsReg cr) %{
11491   match(Set dst (AndL src1 (RShiftL src2 src3)));
11492 
11493   ins_cost(1.9 * INSN_COST);
11494   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
11495 
11496   ins_encode %{
11497     __ andr(as_Register($dst$$reg),
11498               as_Register($src1$$reg),
11499               as_Register($src2$$reg),
11500               Assembler::ASR,
11501               $src3$$constant & 0x3f);
11502   %}
11503 
11504   ins_pipe(ialu_reg_reg_shift);
11505 %}
11506 
11507 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11508                          iRegIorL2I src1, iRegIorL2I src2,
11509                          immI src3, rFlagsReg cr) %{
11510   match(Set dst (AndI src1 (LShiftI src2 src3)));
11511 
11512   ins_cost(1.9 * INSN_COST);
11513   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
11514 
11515   ins_encode %{
11516     __ andw(as_Register($dst$$reg),
11517               as_Register($src1$$reg),
11518               as_Register($src2$$reg),
11519               Assembler::LSL,
11520               $src3$$constant & 0x1f);
11521   %}
11522 
11523   ins_pipe(ialu_reg_reg_shift);
11524 %}
11525 
11526 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11527                          iRegL src1, iRegL src2,
11528                          immI src3, rFlagsReg cr) %{
11529   match(Set dst (AndL src1 (LShiftL src2 src3)));
11530 
11531   ins_cost(1.9 * INSN_COST);
11532   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
11533 
11534   ins_encode %{
11535     __ andr(as_Register($dst$$reg),
11536               as_Register($src1$$reg),
11537               as_Register($src2$$reg),
11538               Assembler::LSL,
11539               $src3$$constant & 0x3f);
11540   %}
11541 
11542   ins_pipe(ialu_reg_reg_shift);
11543 %}
11544 
11545 instruct XorI_reg_URShift_reg(iRegINoSp dst,
11546                          iRegIorL2I src1, iRegIorL2I src2,
11547                          immI src3, rFlagsReg cr) %{
11548   match(Set dst (XorI src1 (URShiftI src2 src3)));
11549 
11550   ins_cost(1.9 * INSN_COST);
11551   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
11552 
11553   ins_encode %{
11554     __ eorw(as_Register($dst$$reg),
11555               as_Register($src1$$reg),
11556               as_Register($src2$$reg),
11557               Assembler::LSR,
11558               $src3$$constant & 0x1f);
11559   %}
11560 
11561   ins_pipe(ialu_reg_reg_shift);
11562 %}
11563 
11564 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
11565                          iRegL src1, iRegL src2,
11566                          immI src3, rFlagsReg cr) %{
11567   match(Set dst (XorL src1 (URShiftL src2 src3)));
11568 
11569   ins_cost(1.9 * INSN_COST);
11570   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
11571 
11572   ins_encode %{
11573     __ eor(as_Register($dst$$reg),
11574               as_Register($src1$$reg),
11575               as_Register($src2$$reg),
11576               Assembler::LSR,
11577               $src3$$constant & 0x3f);
11578   %}
11579 
11580   ins_pipe(ialu_reg_reg_shift);
11581 %}
11582 
11583 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11584                          iRegIorL2I src1, iRegIorL2I src2,
11585                          immI src3, rFlagsReg cr) %{
11586   match(Set dst (XorI src1 (RShiftI src2 src3)));
11587 
11588   ins_cost(1.9 * INSN_COST);
11589   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11590 
11591   ins_encode %{
11592     __ eorw(as_Register($dst$$reg),
11593               as_Register($src1$$reg),
11594               as_Register($src2$$reg),
11595               Assembler::ASR,
11596               $src3$$constant & 0x1f);
11597   %}
11598 
11599   ins_pipe(ialu_reg_reg_shift);
11600 %}
11601 
11602 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11603                          iRegL src1, iRegL src2,
11604                          immI src3, rFlagsReg cr) %{
11605   match(Set dst (XorL src1 (RShiftL src2 src3)));
11606 
11607   ins_cost(1.9 * INSN_COST);
11608   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11609 
11610   ins_encode %{
11611     __ eor(as_Register($dst$$reg),
11612               as_Register($src1$$reg),
11613               as_Register($src2$$reg),
11614               Assembler::ASR,
11615               $src3$$constant & 0x3f);
11616   %}
11617 
11618   ins_pipe(ialu_reg_reg_shift);
11619 %}
11620 
11621 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11622                          iRegIorL2I src1, iRegIorL2I src2,
11623                          immI src3, rFlagsReg cr) %{
11624   match(Set dst (XorI src1 (LShiftI src2 src3)));
11625 
11626   ins_cost(1.9 * INSN_COST);
11627   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11628 
11629   ins_encode %{
11630     __ eorw(as_Register($dst$$reg),
11631               as_Register($src1$$reg),
11632               as_Register($src2$$reg),
11633               Assembler::LSL,
11634               $src3$$constant & 0x1f);
11635   %}
11636 
11637   ins_pipe(ialu_reg_reg_shift);
11638 %}
11639 
11640 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11641                          iRegL src1, iRegL src2,
11642                          immI src3, rFlagsReg cr) %{
11643   match(Set dst (XorL src1 (LShiftL src2 src3)));
11644 
11645   ins_cost(1.9 * INSN_COST);
11646   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11647 
11648   ins_encode %{
11649     __ eor(as_Register($dst$$reg),
11650               as_Register($src1$$reg),
11651               as_Register($src2$$reg),
11652               Assembler::LSL,
11653               $src3$$constant & 0x3f);
11654   %}
11655 
11656   ins_pipe(ialu_reg_reg_shift);
11657 %}
11658 
11659 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11660                          iRegIorL2I src1, iRegIorL2I src2,
11661                          immI src3, rFlagsReg cr) %{
11662   match(Set dst (OrI src1 (URShiftI src2 src3)));
11663 
11664   ins_cost(1.9 * INSN_COST);
11665   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11666 
11667   ins_encode %{
11668     __ orrw(as_Register($dst$$reg),
11669               as_Register($src1$$reg),
11670               as_Register($src2$$reg),
11671               Assembler::LSR,
11672               $src3$$constant & 0x1f);
11673   %}
11674 
11675   ins_pipe(ialu_reg_reg_shift);
11676 %}
11677 
11678 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11679                          iRegL src1, iRegL src2,
11680                          immI src3, rFlagsReg cr) %{
11681   match(Set dst (OrL src1 (URShiftL src2 src3)));
11682 
11683   ins_cost(1.9 * INSN_COST);
11684   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11685 
11686   ins_encode %{
11687     __ orr(as_Register($dst$$reg),
11688               as_Register($src1$$reg),
11689               as_Register($src2$$reg),
11690               Assembler::LSR,
11691               $src3$$constant & 0x3f);
11692   %}
11693 
11694   ins_pipe(ialu_reg_reg_shift);
11695 %}
11696 
11697 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11698                          iRegIorL2I src1, iRegIorL2I src2,
11699                          immI src3, rFlagsReg cr) %{
11700   match(Set dst (OrI src1 (RShiftI src2 src3)));
11701 
11702   ins_cost(1.9 * INSN_COST);
11703   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11704 
11705   ins_encode %{
11706     __ orrw(as_Register($dst$$reg),
11707               as_Register($src1$$reg),
11708               as_Register($src2$$reg),
11709               Assembler::ASR,
11710               $src3$$constant & 0x1f);
11711   %}
11712 
11713   ins_pipe(ialu_reg_reg_shift);
11714 %}
11715 
11716 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
11717                          iRegL src1, iRegL src2,
11718                          immI src3, rFlagsReg cr) %{
11719   match(Set dst (OrL src1 (RShiftL src2 src3)));
11720 
11721   ins_cost(1.9 * INSN_COST);
11722   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
11723 
11724   ins_encode %{
11725     __ orr(as_Register($dst$$reg),
11726               as_Register($src1$$reg),
11727               as_Register($src2$$reg),
11728               Assembler::ASR,
11729               $src3$$constant & 0x3f);
11730   %}
11731 
11732   ins_pipe(ialu_reg_reg_shift);
11733 %}
11734 
11735 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11736                          iRegIorL2I src1, iRegIorL2I src2,
11737                          immI src3, rFlagsReg cr) %{
11738   match(Set dst (OrI src1 (LShiftI src2 src3)));
11739 
11740   ins_cost(1.9 * INSN_COST);
11741   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11742 
11743   ins_encode %{
11744     __ orrw(as_Register($dst$$reg),
11745               as_Register($src1$$reg),
11746               as_Register($src2$$reg),
11747               Assembler::LSL,
11748               $src3$$constant & 0x1f);
11749   %}
11750 
11751   ins_pipe(ialu_reg_reg_shift);
11752 %}
11753 
11754 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11755                          iRegL src1, iRegL src2,
11756                          immI src3, rFlagsReg cr) %{
11757   match(Set dst (OrL src1 (LShiftL src2 src3)));
11758 
11759   ins_cost(1.9 * INSN_COST);
11760   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11761 
11762   ins_encode %{
11763     __ orr(as_Register($dst$$reg),
11764               as_Register($src1$$reg),
11765               as_Register($src2$$reg),
11766               Assembler::LSL,
11767               $src3$$constant & 0x3f);
11768   %}
11769 
11770   ins_pipe(ialu_reg_reg_shift);
11771 %}
11772 
11773 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11774                          iRegIorL2I src1, iRegIorL2I src2,
11775                          immI src3, rFlagsReg cr) %{
11776   match(Set dst (AddI src1 (URShiftI src2 src3)));
11777 
11778   ins_cost(1.9 * INSN_COST);
11779   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11780 
11781   ins_encode %{
11782     __ addw(as_Register($dst$$reg),
11783               as_Register($src1$$reg),
11784               as_Register($src2$$reg),
11785               Assembler::LSR,
11786               $src3$$constant & 0x1f);
11787   %}
11788 
11789   ins_pipe(ialu_reg_reg_shift);
11790 %}
11791 
11792 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11793                          iRegL src1, iRegL src2,
11794                          immI src3, rFlagsReg cr) %{
11795   match(Set dst (AddL src1 (URShiftL src2 src3)));
11796 
11797   ins_cost(1.9 * INSN_COST);
11798   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11799 
11800   ins_encode %{
11801     __ add(as_Register($dst$$reg),
11802               as_Register($src1$$reg),
11803               as_Register($src2$$reg),
11804               Assembler::LSR,
11805               $src3$$constant & 0x3f);
11806   %}
11807 
11808   ins_pipe(ialu_reg_reg_shift);
11809 %}
11810 
11811 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11812                          iRegIorL2I src1, iRegIorL2I src2,
11813                          immI src3, rFlagsReg cr) %{
11814   match(Set dst (AddI src1 (RShiftI src2 src3)));
11815 
11816   ins_cost(1.9 * INSN_COST);
11817   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11818 
11819   ins_encode %{
11820     __ addw(as_Register($dst$$reg),
11821               as_Register($src1$$reg),
11822               as_Register($src2$$reg),
11823               Assembler::ASR,
11824               $src3$$constant & 0x1f);
11825   %}
11826 
11827   ins_pipe(ialu_reg_reg_shift);
11828 %}
11829 
11830 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11831                          iRegL src1, iRegL src2,
11832                          immI src3, rFlagsReg cr) %{
11833   match(Set dst (AddL src1 (RShiftL src2 src3)));
11834 
11835   ins_cost(1.9 * INSN_COST);
11836   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11837 
11838   ins_encode %{
11839     __ add(as_Register($dst$$reg),
11840               as_Register($src1$$reg),
11841               as_Register($src2$$reg),
11842               Assembler::ASR,
11843               $src3$$constant & 0x3f);
11844   %}
11845 
11846   ins_pipe(ialu_reg_reg_shift);
11847 %}
11848 
11849 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11850                          iRegIorL2I src1, iRegIorL2I src2,
11851                          immI src3, rFlagsReg cr) %{
11852   match(Set dst (AddI src1 (LShiftI src2 src3)));
11853 
11854   ins_cost(1.9 * INSN_COST);
11855   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11856 
11857   ins_encode %{
11858     __ addw(as_Register($dst$$reg),
11859               as_Register($src1$$reg),
11860               as_Register($src2$$reg),
11861               Assembler::LSL,
11862               $src3$$constant & 0x1f);
11863   %}
11864 
11865   ins_pipe(ialu_reg_reg_shift);
11866 %}
11867 
11868 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11869                          iRegL src1, iRegL src2,
11870                          immI src3, rFlagsReg cr) %{
11871   match(Set dst (AddL src1 (LShiftL src2 src3)));
11872 
11873   ins_cost(1.9 * INSN_COST);
11874   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11875 
11876   ins_encode %{
11877     __ add(as_Register($dst$$reg),
11878               as_Register($src1$$reg),
11879               as_Register($src2$$reg),
11880               Assembler::LSL,
11881               $src3$$constant & 0x3f);
11882   %}
11883 
11884   ins_pipe(ialu_reg_reg_shift);
11885 %}
11886 
11887 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11888                          iRegIorL2I src1, iRegIorL2I src2,
11889                          immI src3, rFlagsReg cr) %{
11890   match(Set dst (SubI src1 (URShiftI src2 src3)));
11891 
11892   ins_cost(1.9 * INSN_COST);
11893   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11894 
11895   ins_encode %{
11896     __ subw(as_Register($dst$$reg),
11897               as_Register($src1$$reg),
11898               as_Register($src2$$reg),
11899               Assembler::LSR,
11900               $src3$$constant & 0x1f);
11901   %}
11902 
11903   ins_pipe(ialu_reg_reg_shift);
11904 %}
11905 
11906 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11907                          iRegL src1, iRegL src2,
11908                          immI src3, rFlagsReg cr) %{
11909   match(Set dst (SubL src1 (URShiftL src2 src3)));
11910 
11911   ins_cost(1.9 * INSN_COST);
11912   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11913 
11914   ins_encode %{
11915     __ sub(as_Register($dst$$reg),
11916               as_Register($src1$$reg),
11917               as_Register($src2$$reg),
11918               Assembler::LSR,
11919               $src3$$constant & 0x3f);
11920   %}
11921 
11922   ins_pipe(ialu_reg_reg_shift);
11923 %}
11924 
11925 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11926                          iRegIorL2I src1, iRegIorL2I src2,
11927                          immI src3, rFlagsReg cr) %{
11928   match(Set dst (SubI src1 (RShiftI src2 src3)));
11929 
11930   ins_cost(1.9 * INSN_COST);
11931   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11932 
11933   ins_encode %{
11934     __ subw(as_Register($dst$$reg),
11935               as_Register($src1$$reg),
11936               as_Register($src2$$reg),
11937               Assembler::ASR,
11938               $src3$$constant & 0x1f);
11939   %}
11940 
11941   ins_pipe(ialu_reg_reg_shift);
11942 %}
11943 
11944 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11945                          iRegL src1, iRegL src2,
11946                          immI src3, rFlagsReg cr) %{
11947   match(Set dst (SubL src1 (RShiftL src2 src3)));
11948 
11949   ins_cost(1.9 * INSN_COST);
11950   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11951 
11952   ins_encode %{
11953     __ sub(as_Register($dst$$reg),
11954               as_Register($src1$$reg),
11955               as_Register($src2$$reg),
11956               Assembler::ASR,
11957               $src3$$constant & 0x3f);
11958   %}
11959 
11960   ins_pipe(ialu_reg_reg_shift);
11961 %}
11962 
11963 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11964                          iRegIorL2I src1, iRegIorL2I src2,
11965                          immI src3, rFlagsReg cr) %{
11966   match(Set dst (SubI src1 (LShiftI src2 src3)));
11967 
11968   ins_cost(1.9 * INSN_COST);
11969   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11970 
11971   ins_encode %{
11972     __ subw(as_Register($dst$$reg),
11973               as_Register($src1$$reg),
11974               as_Register($src2$$reg),
11975               Assembler::LSL,
11976               $src3$$constant & 0x1f);
11977   %}
11978 
11979   ins_pipe(ialu_reg_reg_shift);
11980 %}
11981 
11982 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11983                          iRegL src1, iRegL src2,
11984                          immI src3, rFlagsReg cr) %{
11985   match(Set dst (SubL src1 (LShiftL src2 src3)));
11986 
11987   ins_cost(1.9 * INSN_COST);
11988   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11989 
11990   ins_encode %{
11991     __ sub(as_Register($dst$$reg),
11992               as_Register($src1$$reg),
11993               as_Register($src2$$reg),
11994               Assembler::LSL,
11995               $src3$$constant & 0x3f);
11996   %}
11997 
11998   ins_pipe(ialu_reg_reg_shift);
11999 %}
12000 
12001 
12002 
12003 // Shift Left followed by Shift Right.
12004 // This idiom is used by the compiler for the i2b bytecode etc.
12005 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12006 %{
12007   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12008   // Make sure we are not going to exceed what sbfm can do.
12009   predicate((unsigned int)n->in(2)->get_int() <= 63
12010             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12011 
12012   ins_cost(INSN_COST * 2);
12013   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12014   ins_encode %{
12015     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12016     int s = 63 - lshift;
12017     int r = (rshift - lshift) & 63;
12018     __ sbfm(as_Register($dst$$reg),
12019             as_Register($src$$reg),
12020             r, s);
12021   %}
12022 
12023   ins_pipe(ialu_reg_shift);
12024 %}
12025 
12026 // Shift Left followed by Shift Right.
12027 // This idiom is used by the compiler for the i2b bytecode etc.
12028 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12029 %{
12030   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12031   // Make sure we are not going to exceed what sbfmw can do.
12032   predicate((unsigned int)n->in(2)->get_int() <= 31
12033             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12034 
12035   ins_cost(INSN_COST * 2);
12036   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12037   ins_encode %{
12038     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12039     int s = 31 - lshift;
12040     int r = (rshift - lshift) & 31;
12041     __ sbfmw(as_Register($dst$$reg),
12042             as_Register($src$$reg),
12043             r, s);
12044   %}
12045 
12046   ins_pipe(ialu_reg_shift);
12047 %}
12048 
12049 // Shift Left followed by Shift Right.
12050 // This idiom is used by the compiler for the i2b bytecode etc.
12051 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12052 %{
12053   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12054   // Make sure we are not going to exceed what ubfm can do.
12055   predicate((unsigned int)n->in(2)->get_int() <= 63
12056             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12057 
12058   ins_cost(INSN_COST * 2);
12059   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12060   ins_encode %{
12061     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12062     int s = 63 - lshift;
12063     int r = (rshift - lshift) & 63;
12064     __ ubfm(as_Register($dst$$reg),
12065             as_Register($src$$reg),
12066             r, s);
12067   %}
12068 
12069   ins_pipe(ialu_reg_shift);
12070 %}
12071 
12072 // Shift Left followed by Shift Right.
12073 // This idiom is used by the compiler for the i2b bytecode etc.
12074 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12075 %{
12076   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12077   // Make sure we are not going to exceed what ubfmw can do.
12078   predicate((unsigned int)n->in(2)->get_int() <= 31
12079             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12080 
12081   ins_cost(INSN_COST * 2);
12082   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12083   ins_encode %{
12084     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12085     int s = 31 - lshift;
12086     int r = (rshift - lshift) & 31;
12087     __ ubfmw(as_Register($dst$$reg),
12088             as_Register($src$$reg),
12089             r, s);
12090   %}
12091 
12092   ins_pipe(ialu_reg_shift);
12093 %}
12094 // Bitfield extract with shift & mask
12095 
12096 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12097 %{
12098   match(Set dst (AndI (URShiftI src rshift) mask));
12099 
12100   ins_cost(INSN_COST);
12101   format %{ "ubfxw $dst, $src, $mask" %}
12102   ins_encode %{
12103     int rshift = $rshift$$constant;
12104     long mask = $mask$$constant;
12105     int width = exact_log2(mask+1);
12106     __ ubfxw(as_Register($dst$$reg),
12107             as_Register($src$$reg), rshift, width);
12108   %}
12109   ins_pipe(ialu_reg_shift);
12110 %}
12111 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12112 %{
12113   match(Set dst (AndL (URShiftL src rshift) mask));
12114 
12115   ins_cost(INSN_COST);
12116   format %{ "ubfx $dst, $src, $mask" %}
12117   ins_encode %{
12118     int rshift = $rshift$$constant;
12119     long mask = $mask$$constant;
12120     int width = exact_log2(mask+1);
12121     __ ubfx(as_Register($dst$$reg),
12122             as_Register($src$$reg), rshift, width);
12123   %}
12124   ins_pipe(ialu_reg_shift);
12125 %}
12126 
12127 // We can use ubfx when extending an And with a mask when we know mask
12128 // is positive.  We know that because immI_bitmask guarantees it.
12129 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12130 %{
12131   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12132 
12133   ins_cost(INSN_COST * 2);
12134   format %{ "ubfx $dst, $src, $mask" %}
12135   ins_encode %{
12136     int rshift = $rshift$$constant;
12137     long mask = $mask$$constant;
12138     int width = exact_log2(mask+1);
12139     __ ubfx(as_Register($dst$$reg),
12140             as_Register($src$$reg), rshift, width);
12141   %}
12142   ins_pipe(ialu_reg_shift);
12143 %}
12144 
12145 // Rotations
12146 
12147 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12148 %{
12149   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12150   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12151 
12152   ins_cost(INSN_COST);
12153   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12154 
12155   ins_encode %{
12156     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12157             $rshift$$constant & 63);
12158   %}
12159   ins_pipe(ialu_reg_reg_extr);
12160 %}
12161 
12162 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12163 %{
12164   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12165   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12166 
12167   ins_cost(INSN_COST);
12168   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12169 
12170   ins_encode %{
12171     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12172             $rshift$$constant & 31);
12173   %}
12174   ins_pipe(ialu_reg_reg_extr);
12175 %}
12176 
12177 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12178 %{
12179   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12180   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12181 
12182   ins_cost(INSN_COST);
12183   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12184 
12185   ins_encode %{
12186     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12187             $rshift$$constant & 63);
12188   %}
12189   ins_pipe(ialu_reg_reg_extr);
12190 %}
12191 
12192 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12193 %{
12194   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12195   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12196 
12197   ins_cost(INSN_COST);
12198   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12199 
12200   ins_encode %{
12201     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12202             $rshift$$constant & 31);
12203   %}
12204   ins_pipe(ialu_reg_reg_extr);
12205 %}
12206 
12207 
12208 // rol expander
12209 
12210 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12211 %{
12212   effect(DEF dst, USE src, USE shift);
12213 
12214   format %{ "rol    $dst, $src, $shift" %}
12215   ins_cost(INSN_COST * 3);
12216   ins_encode %{
12217     __ subw(rscratch1, zr, as_Register($shift$$reg));
12218     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12219             rscratch1);
12220     %}
12221   ins_pipe(ialu_reg_reg_vshift);
12222 %}
12223 
12224 // rol expander
12225 
12226 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12227 %{
12228   effect(DEF dst, USE src, USE shift);
12229 
12230   format %{ "rol    $dst, $src, $shift" %}
12231   ins_cost(INSN_COST * 3);
12232   ins_encode %{
12233     __ subw(rscratch1, zr, as_Register($shift$$reg));
12234     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12235             rscratch1);
12236     %}
12237   ins_pipe(ialu_reg_reg_vshift);
12238 %}
12239 
12240 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12241 %{
12242   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12243 
12244   expand %{
12245     rolL_rReg(dst, src, shift, cr);
12246   %}
12247 %}
12248 
12249 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12250 %{
12251   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12252 
12253   expand %{
12254     rolL_rReg(dst, src, shift, cr);
12255   %}
12256 %}
12257 
12258 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12259 %{
12260   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12261 
12262   expand %{
12263     rolI_rReg(dst, src, shift, cr);
12264   %}
12265 %}
12266 
12267 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12268 %{
12269   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12270 
12271   expand %{
12272     rolI_rReg(dst, src, shift, cr);
12273   %}
12274 %}
12275 
12276 // ror expander
12277 
12278 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12279 %{
12280   effect(DEF dst, USE src, USE shift);
12281 
12282   format %{ "ror    $dst, $src, $shift" %}
12283   ins_cost(INSN_COST);
12284   ins_encode %{
12285     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12286             as_Register($shift$$reg));
12287     %}
12288   ins_pipe(ialu_reg_reg_vshift);
12289 %}
12290 
12291 // ror expander
12292 
12293 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12294 %{
12295   effect(DEF dst, USE src, USE shift);
12296 
12297   format %{ "ror    $dst, $src, $shift" %}
12298   ins_cost(INSN_COST);
12299   ins_encode %{
12300     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12301             as_Register($shift$$reg));
12302     %}
12303   ins_pipe(ialu_reg_reg_vshift);
12304 %}
12305 
12306 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12307 %{
12308   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12309 
12310   expand %{
12311     rorL_rReg(dst, src, shift, cr);
12312   %}
12313 %}
12314 
12315 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12316 %{
12317   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12318 
12319   expand %{
12320     rorL_rReg(dst, src, shift, cr);
12321   %}
12322 %}
12323 
12324 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12325 %{
12326   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12327 
12328   expand %{
12329     rorI_rReg(dst, src, shift, cr);
12330   %}
12331 %}
12332 
12333 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12334 %{
12335   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12336 
12337   expand %{
12338     rorI_rReg(dst, src, shift, cr);
12339   %}
12340 %}
12341 
12342 // Add/subtract (extended)
12343 
12344 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12345 %{
12346   match(Set dst (AddL src1 (ConvI2L src2)));
12347   ins_cost(INSN_COST);
12348   format %{ "add  $dst, $src1, sxtw $src2" %}
12349 
12350    ins_encode %{
12351      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12352             as_Register($src2$$reg), ext::sxtw);
12353    %}
12354   ins_pipe(ialu_reg_reg);
12355 %};
12356 
12357 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12358 %{
12359   match(Set dst (SubL src1 (ConvI2L src2)));
12360   ins_cost(INSN_COST);
12361   format %{ "sub  $dst, $src1, sxtw $src2" %}
12362 
12363    ins_encode %{
12364      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12365             as_Register($src2$$reg), ext::sxtw);
12366    %}
12367   ins_pipe(ialu_reg_reg);
12368 %};
12369 
12370 
12371 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12372 %{
12373   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12374   ins_cost(INSN_COST);
12375   format %{ "add  $dst, $src1, sxth $src2" %}
12376 
12377    ins_encode %{
12378      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12379             as_Register($src2$$reg), ext::sxth);
12380    %}
12381   ins_pipe(ialu_reg_reg);
12382 %}
12383 
12384 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12385 %{
12386   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12387   ins_cost(INSN_COST);
12388   format %{ "add  $dst, $src1, sxtb $src2" %}
12389 
12390    ins_encode %{
12391      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12392             as_Register($src2$$reg), ext::sxtb);
12393    %}
12394   ins_pipe(ialu_reg_reg);
12395 %}
12396 
12397 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12398 %{
12399   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12400   ins_cost(INSN_COST);
12401   format %{ "add  $dst, $src1, uxtb $src2" %}
12402 
12403    ins_encode %{
12404      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12405             as_Register($src2$$reg), ext::uxtb);
12406    %}
12407   ins_pipe(ialu_reg_reg);
12408 %}
12409 
12410 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12411 %{
12412   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12413   ins_cost(INSN_COST);
12414   format %{ "add  $dst, $src1, sxth $src2" %}
12415 
12416    ins_encode %{
12417      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12418             as_Register($src2$$reg), ext::sxth);
12419    %}
12420   ins_pipe(ialu_reg_reg);
12421 %}
12422 
12423 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12424 %{
12425   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12426   ins_cost(INSN_COST);
12427   format %{ "add  $dst, $src1, sxtw $src2" %}
12428 
12429    ins_encode %{
12430      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12431             as_Register($src2$$reg), ext::sxtw);
12432    %}
12433   ins_pipe(ialu_reg_reg);
12434 %}
12435 
12436 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12437 %{
12438   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12439   ins_cost(INSN_COST);
12440   format %{ "add  $dst, $src1, sxtb $src2" %}
12441 
12442    ins_encode %{
12443      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12444             as_Register($src2$$reg), ext::sxtb);
12445    %}
12446   ins_pipe(ialu_reg_reg);
12447 %}
12448 
12449 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12450 %{
12451   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12452   ins_cost(INSN_COST);
12453   format %{ "add  $dst, $src1, uxtb $src2" %}
12454 
12455    ins_encode %{
12456      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12457             as_Register($src2$$reg), ext::uxtb);
12458    %}
12459   ins_pipe(ialu_reg_reg);
12460 %}
12461 
12462 
12463 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12464 %{
12465   match(Set dst (AddI src1 (AndI src2 mask)));
12466   ins_cost(INSN_COST);
12467   format %{ "addw  $dst, $src1, $src2, uxtb" %}
12468 
12469    ins_encode %{
12470      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12471             as_Register($src2$$reg), ext::uxtb);
12472    %}
12473   ins_pipe(ialu_reg_reg);
12474 %}
12475 
12476 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12477 %{
12478   match(Set dst (AddI src1 (AndI src2 mask)));
12479   ins_cost(INSN_COST);
12480   format %{ "addw  $dst, $src1, $src2, uxth" %}
12481 
12482    ins_encode %{
12483      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12484             as_Register($src2$$reg), ext::uxth);
12485    %}
12486   ins_pipe(ialu_reg_reg);
12487 %}
12488 
12489 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12490 %{
12491   match(Set dst (AddL src1 (AndL src2 mask)));
12492   ins_cost(INSN_COST);
12493   format %{ "add  $dst, $src1, $src2, uxtb" %}
12494 
12495    ins_encode %{
12496      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12497             as_Register($src2$$reg), ext::uxtb);
12498    %}
12499   ins_pipe(ialu_reg_reg);
12500 %}
12501 
12502 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12503 %{
12504   match(Set dst (AddL src1 (AndL src2 mask)));
12505   ins_cost(INSN_COST);
12506   format %{ "add  $dst, $src1, $src2, uxth" %}
12507 
12508    ins_encode %{
12509      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12510             as_Register($src2$$reg), ext::uxth);
12511    %}
12512   ins_pipe(ialu_reg_reg);
12513 %}
12514 
12515 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12516 %{
12517   match(Set dst (AddL src1 (AndL src2 mask)));
12518   ins_cost(INSN_COST);
12519   format %{ "add  $dst, $src1, $src2, uxtw" %}
12520 
12521    ins_encode %{
12522      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12523             as_Register($src2$$reg), ext::uxtw);
12524    %}
12525   ins_pipe(ialu_reg_reg);
12526 %}
12527 
12528 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12529 %{
12530   match(Set dst (SubI src1 (AndI src2 mask)));
12531   ins_cost(INSN_COST);
12532   format %{ "subw  $dst, $src1, $src2, uxtb" %}
12533 
12534    ins_encode %{
12535      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12536             as_Register($src2$$reg), ext::uxtb);
12537    %}
12538   ins_pipe(ialu_reg_reg);
12539 %}
12540 
12541 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12542 %{
12543   match(Set dst (SubI src1 (AndI src2 mask)));
12544   ins_cost(INSN_COST);
12545   format %{ "subw  $dst, $src1, $src2, uxth" %}
12546 
12547    ins_encode %{
12548      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12549             as_Register($src2$$reg), ext::uxth);
12550    %}
12551   ins_pipe(ialu_reg_reg);
12552 %}
12553 
12554 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12555 %{
12556   match(Set dst (SubL src1 (AndL src2 mask)));
12557   ins_cost(INSN_COST);
12558   format %{ "sub  $dst, $src1, $src2, uxtb" %}
12559 
12560    ins_encode %{
12561      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12562             as_Register($src2$$reg), ext::uxtb);
12563    %}
12564   ins_pipe(ialu_reg_reg);
12565 %}
12566 
12567 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12568 %{
12569   match(Set dst (SubL src1 (AndL src2 mask)));
12570   ins_cost(INSN_COST);
12571   format %{ "sub  $dst, $src1, $src2, uxth" %}
12572 
12573    ins_encode %{
12574      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12575             as_Register($src2$$reg), ext::uxth);
12576    %}
12577   ins_pipe(ialu_reg_reg);
12578 %}
12579 
12580 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12581 %{
12582   match(Set dst (SubL src1 (AndL src2 mask)));
12583   ins_cost(INSN_COST);
12584   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12585 
12586    ins_encode %{
12587      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12588             as_Register($src2$$reg), ext::uxtw);
12589    %}
12590   ins_pipe(ialu_reg_reg);
12591 %}
12592 
12593 // END This section of the file is automatically generated. Do not edit --------------
12594 
12595 // ============================================================================
12596 // Floating Point Arithmetic Instructions
12597 
12598 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12599   match(Set dst (AddF src1 src2));
12600 
12601   ins_cost(INSN_COST * 5);
12602   format %{ "fadds   $dst, $src1, $src2" %}
12603 
12604   ins_encode %{
12605     __ fadds(as_FloatRegister($dst$$reg),
12606              as_FloatRegister($src1$$reg),
12607              as_FloatRegister($src2$$reg));
12608   %}
12609 
12610   ins_pipe(fp_dop_reg_reg_s);
12611 %}
12612 
12613 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12614   match(Set dst (AddD src1 src2));
12615 
12616   ins_cost(INSN_COST * 5);
12617   format %{ "faddd   $dst, $src1, $src2" %}
12618 
12619   ins_encode %{
12620     __ faddd(as_FloatRegister($dst$$reg),
12621              as_FloatRegister($src1$$reg),
12622              as_FloatRegister($src2$$reg));
12623   %}
12624 
12625   ins_pipe(fp_dop_reg_reg_d);
12626 %}
12627 
12628 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12629   match(Set dst (SubF src1 src2));
12630 
12631   ins_cost(INSN_COST * 5);
12632   format %{ "fsubs   $dst, $src1, $src2" %}
12633 
12634   ins_encode %{
12635     __ fsubs(as_FloatRegister($dst$$reg),
12636              as_FloatRegister($src1$$reg),
12637              as_FloatRegister($src2$$reg));
12638   %}
12639 
12640   ins_pipe(fp_dop_reg_reg_s);
12641 %}
12642 
12643 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12644   match(Set dst (SubD src1 src2));
12645 
12646   ins_cost(INSN_COST * 5);
12647   format %{ "fsubd   $dst, $src1, $src2" %}
12648 
12649   ins_encode %{
12650     __ fsubd(as_FloatRegister($dst$$reg),
12651              as_FloatRegister($src1$$reg),
12652              as_FloatRegister($src2$$reg));
12653   %}
12654 
12655   ins_pipe(fp_dop_reg_reg_d);
12656 %}
12657 
12658 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12659   match(Set dst (MulF src1 src2));
12660 
12661   ins_cost(INSN_COST * 6);
12662   format %{ "fmuls   $dst, $src1, $src2" %}
12663 
12664   ins_encode %{
12665     __ fmuls(as_FloatRegister($dst$$reg),
12666              as_FloatRegister($src1$$reg),
12667              as_FloatRegister($src2$$reg));
12668   %}
12669 
12670   ins_pipe(fp_dop_reg_reg_s);
12671 %}
12672 
12673 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12674   match(Set dst (MulD src1 src2));
12675 
12676   ins_cost(INSN_COST * 6);
12677   format %{ "fmuld   $dst, $src1, $src2" %}
12678 
12679   ins_encode %{
12680     __ fmuld(as_FloatRegister($dst$$reg),
12681              as_FloatRegister($src1$$reg),
12682              as_FloatRegister($src2$$reg));
12683   %}
12684 
12685   ins_pipe(fp_dop_reg_reg_d);
12686 %}
12687 
12688 // We cannot use these fused mul w add/sub ops because they don't
12689 // produce the same result as the equivalent separated ops
12690 // (essentially they don't round the intermediate result). that's a
12691 // shame. leaving them here in case we can idenitfy cases where it is
12692 // legitimate to use them
12693 
12694 
12695 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12696 //   match(Set dst (AddF (MulF src1 src2) src3));
12697 
12698 //   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12699 
12700 //   ins_encode %{
12701 //     __ fmadds(as_FloatRegister($dst$$reg),
12702 //              as_FloatRegister($src1$$reg),
12703 //              as_FloatRegister($src2$$reg),
12704 //              as_FloatRegister($src3$$reg));
12705 //   %}
12706 
12707 //   ins_pipe(pipe_class_default);
12708 // %}
12709 
12710 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12711 //   match(Set dst (AddD (MulD src1 src2) src3));
12712 
12713 //   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12714 
12715 //   ins_encode %{
12716 //     __ fmaddd(as_FloatRegister($dst$$reg),
12717 //              as_FloatRegister($src1$$reg),
12718 //              as_FloatRegister($src2$$reg),
12719 //              as_FloatRegister($src3$$reg));
12720 //   %}
12721 
12722 //   ins_pipe(pipe_class_default);
12723 // %}
12724 
12725 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12726 //   match(Set dst (AddF (MulF (NegF src1) src2) src3));
12727 //   match(Set dst (AddF (NegF (MulF src1 src2)) src3));
12728 
12729 //   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12730 
12731 //   ins_encode %{
12732 //     __ fmsubs(as_FloatRegister($dst$$reg),
12733 //               as_FloatRegister($src1$$reg),
12734 //               as_FloatRegister($src2$$reg),
12735 //              as_FloatRegister($src3$$reg));
12736 //   %}
12737 
12738 //   ins_pipe(pipe_class_default);
12739 // %}
12740 
12741 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12742 //   match(Set dst (AddD (MulD (NegD src1) src2) src3));
12743 //   match(Set dst (AddD (NegD (MulD src1 src2)) src3));
12744 
12745 //   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12746 
12747 //   ins_encode %{
12748 //     __ fmsubd(as_FloatRegister($dst$$reg),
12749 //               as_FloatRegister($src1$$reg),
12750 //               as_FloatRegister($src2$$reg),
12751 //               as_FloatRegister($src3$$reg));
12752 //   %}
12753 
12754 //   ins_pipe(pipe_class_default);
12755 // %}
12756 
12757 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12758 //   match(Set dst (SubF (MulF (NegF src1) src2) src3));
12759 //   match(Set dst (SubF (NegF (MulF src1 src2)) src3));
12760 
12761 //   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12762 
12763 //   ins_encode %{
12764 //     __ fnmadds(as_FloatRegister($dst$$reg),
12765 //                as_FloatRegister($src1$$reg),
12766 //                as_FloatRegister($src2$$reg),
12767 //                as_FloatRegister($src3$$reg));
12768 //   %}
12769 
12770 //   ins_pipe(pipe_class_default);
12771 // %}
12772 
12773 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12774 //   match(Set dst (SubD (MulD (NegD src1) src2) src3));
12775 //   match(Set dst (SubD (NegD (MulD src1 src2)) src3));
12776 
12777 //   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12778 
12779 //   ins_encode %{
12780 //     __ fnmaddd(as_FloatRegister($dst$$reg),
12781 //                as_FloatRegister($src1$$reg),
12782 //                as_FloatRegister($src2$$reg),
12783 //                as_FloatRegister($src3$$reg));
12784 //   %}
12785 
12786 //   ins_pipe(pipe_class_default);
12787 // %}
12788 
12789 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12790 //   match(Set dst (SubF (MulF src1 src2) src3));
12791 
12792 //   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12793 
12794 //   ins_encode %{
12795 //     __ fnmsubs(as_FloatRegister($dst$$reg),
12796 //                as_FloatRegister($src1$$reg),
12797 //                as_FloatRegister($src2$$reg),
12798 //                as_FloatRegister($src3$$reg));
12799 //   %}
12800 
12801 //   ins_pipe(pipe_class_default);
12802 // %}
12803 
12804 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12805 //   match(Set dst (SubD (MulD src1 src2) src3));
12806 
12807 //   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12808 
12809 //   ins_encode %{
12810 //   // n.b. insn name should be fnmsubd
12811 //     __ fnmsub(as_FloatRegister($dst$$reg),
12812 //                as_FloatRegister($src1$$reg),
12813 //                as_FloatRegister($src2$$reg),
12814 //                as_FloatRegister($src3$$reg));
12815 //   %}
12816 
12817 //   ins_pipe(pipe_class_default);
12818 // %}
12819 
12820 
12821 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12822   match(Set dst (DivF src1  src2));
12823 
12824   ins_cost(INSN_COST * 18);
12825   format %{ "fdivs   $dst, $src1, $src2" %}
12826 
12827   ins_encode %{
12828     __ fdivs(as_FloatRegister($dst$$reg),
12829              as_FloatRegister($src1$$reg),
12830              as_FloatRegister($src2$$reg));
12831   %}
12832 
12833   ins_pipe(fp_div_s);
12834 %}
12835 
12836 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12837   match(Set dst (DivD src1  src2));
12838 
12839   ins_cost(INSN_COST * 32);
12840   format %{ "fdivd   $dst, $src1, $src2" %}
12841 
12842   ins_encode %{
12843     __ fdivd(as_FloatRegister($dst$$reg),
12844              as_FloatRegister($src1$$reg),
12845              as_FloatRegister($src2$$reg));
12846   %}
12847 
12848   ins_pipe(fp_div_d);
12849 %}
12850 
12851 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12852   match(Set dst (NegF src));
12853 
12854   ins_cost(INSN_COST * 3);
12855   format %{ "fneg   $dst, $src" %}
12856 
12857   ins_encode %{
12858     __ fnegs(as_FloatRegister($dst$$reg),
12859              as_FloatRegister($src$$reg));
12860   %}
12861 
12862   ins_pipe(fp_uop_s);
12863 %}
12864 
12865 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12866   match(Set dst (NegD src));
12867 
12868   ins_cost(INSN_COST * 3);
12869   format %{ "fnegd   $dst, $src" %}
12870 
12871   ins_encode %{
12872     __ fnegd(as_FloatRegister($dst$$reg),
12873              as_FloatRegister($src$$reg));
12874   %}
12875 
12876   ins_pipe(fp_uop_d);
12877 %}
12878 
12879 instruct absF_reg(vRegF dst, vRegF src) %{
12880   match(Set dst (AbsF src));
12881 
12882   ins_cost(INSN_COST * 3);
12883   format %{ "fabss   $dst, $src" %}
12884   ins_encode %{
12885     __ fabss(as_FloatRegister($dst$$reg),
12886              as_FloatRegister($src$$reg));
12887   %}
12888 
12889   ins_pipe(fp_uop_s);
12890 %}
12891 
12892 instruct absD_reg(vRegD dst, vRegD src) %{
12893   match(Set dst (AbsD src));
12894 
12895   ins_cost(INSN_COST * 3);
12896   format %{ "fabsd   $dst, $src" %}
12897   ins_encode %{
12898     __ fabsd(as_FloatRegister($dst$$reg),
12899              as_FloatRegister($src$$reg));
12900   %}
12901 
12902   ins_pipe(fp_uop_d);
12903 %}
12904 
12905 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12906   match(Set dst (SqrtD src));
12907 
12908   ins_cost(INSN_COST * 50);
12909   format %{ "fsqrtd  $dst, $src" %}
12910   ins_encode %{
12911     __ fsqrtd(as_FloatRegister($dst$$reg),
12912              as_FloatRegister($src$$reg));
12913   %}
12914 
12915   ins_pipe(fp_div_s);
12916 %}
12917 
12918 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12919   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12920 
12921   ins_cost(INSN_COST * 50);
12922   format %{ "fsqrts  $dst, $src" %}
12923   ins_encode %{
12924     __ fsqrts(as_FloatRegister($dst$$reg),
12925              as_FloatRegister($src$$reg));
12926   %}
12927 
12928   ins_pipe(fp_div_d);
12929 %}
12930 
12931 // ============================================================================
12932 // Logical Instructions
12933 
12934 // Integer Logical Instructions
12935 
12936 // And Instructions
12937 
12938 
12939 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12940   match(Set dst (AndI src1 src2));
12941 
12942   format %{ "andw  $dst, $src1, $src2\t# int" %}
12943 
12944   ins_cost(INSN_COST);
12945   ins_encode %{
12946     __ andw(as_Register($dst$$reg),
12947             as_Register($src1$$reg),
12948             as_Register($src2$$reg));
12949   %}
12950 
12951   ins_pipe(ialu_reg_reg);
12952 %}
12953 
12954 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12955   match(Set dst (AndI src1 src2));
12956 
12957   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12958 
12959   ins_cost(INSN_COST);
12960   ins_encode %{
12961     __ andw(as_Register($dst$$reg),
12962             as_Register($src1$$reg),
12963             (unsigned long)($src2$$constant));
12964   %}
12965 
12966   ins_pipe(ialu_reg_imm);
12967 %}
12968 
12969 // Or Instructions
12970 
12971 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12972   match(Set dst (OrI src1 src2));
12973 
12974   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12975 
12976   ins_cost(INSN_COST);
12977   ins_encode %{
12978     __ orrw(as_Register($dst$$reg),
12979             as_Register($src1$$reg),
12980             as_Register($src2$$reg));
12981   %}
12982 
12983   ins_pipe(ialu_reg_reg);
12984 %}
12985 
12986 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12987   match(Set dst (OrI src1 src2));
12988 
12989   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12990 
12991   ins_cost(INSN_COST);
12992   ins_encode %{
12993     __ orrw(as_Register($dst$$reg),
12994             as_Register($src1$$reg),
12995             (unsigned long)($src2$$constant));
12996   %}
12997 
12998   ins_pipe(ialu_reg_imm);
12999 %}
13000 
13001 // Xor Instructions
13002 
13003 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13004   match(Set dst (XorI src1 src2));
13005 
13006   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13007 
13008   ins_cost(INSN_COST);
13009   ins_encode %{
13010     __ eorw(as_Register($dst$$reg),
13011             as_Register($src1$$reg),
13012             as_Register($src2$$reg));
13013   %}
13014 
13015   ins_pipe(ialu_reg_reg);
13016 %}
13017 
13018 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13019   match(Set dst (XorI src1 src2));
13020 
13021   format %{ "eorw  $dst, $src1, $src2\t# int" %}
13022 
13023   ins_cost(INSN_COST);
13024   ins_encode %{
13025     __ eorw(as_Register($dst$$reg),
13026             as_Register($src1$$reg),
13027             (unsigned long)($src2$$constant));
13028   %}
13029 
13030   ins_pipe(ialu_reg_imm);
13031 %}
13032 
13033 // Long Logical Instructions
13034 // TODO
13035 
13036 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13037   match(Set dst (AndL src1 src2));
13038 
13039   format %{ "and  $dst, $src1, $src2\t# int" %}
13040 
13041   ins_cost(INSN_COST);
13042   ins_encode %{
13043     __ andr(as_Register($dst$$reg),
13044             as_Register($src1$$reg),
13045             as_Register($src2$$reg));
13046   %}
13047 
13048   ins_pipe(ialu_reg_reg);
13049 %}
13050 
13051 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13052   match(Set dst (AndL src1 src2));
13053 
13054   format %{ "and  $dst, $src1, $src2\t# int" %}
13055 
13056   ins_cost(INSN_COST);
13057   ins_encode %{
13058     __ andr(as_Register($dst$$reg),
13059             as_Register($src1$$reg),
13060             (unsigned long)($src2$$constant));
13061   %}
13062 
13063   ins_pipe(ialu_reg_imm);
13064 %}
13065 
13066 // Or Instructions
13067 
13068 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13069   match(Set dst (OrL src1 src2));
13070 
13071   format %{ "orr  $dst, $src1, $src2\t# int" %}
13072 
13073   ins_cost(INSN_COST);
13074   ins_encode %{
13075     __ orr(as_Register($dst$$reg),
13076            as_Register($src1$$reg),
13077            as_Register($src2$$reg));
13078   %}
13079 
13080   ins_pipe(ialu_reg_reg);
13081 %}
13082 
13083 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13084   match(Set dst (OrL src1 src2));
13085 
13086   format %{ "orr  $dst, $src1, $src2\t# int" %}
13087 
13088   ins_cost(INSN_COST);
13089   ins_encode %{
13090     __ orr(as_Register($dst$$reg),
13091            as_Register($src1$$reg),
13092            (unsigned long)($src2$$constant));
13093   %}
13094 
13095   ins_pipe(ialu_reg_imm);
13096 %}
13097 
13098 // Xor Instructions
13099 
13100 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13101   match(Set dst (XorL src1 src2));
13102 
13103   format %{ "eor  $dst, $src1, $src2\t# int" %}
13104 
13105   ins_cost(INSN_COST);
13106   ins_encode %{
13107     __ eor(as_Register($dst$$reg),
13108            as_Register($src1$$reg),
13109            as_Register($src2$$reg));
13110   %}
13111 
13112   ins_pipe(ialu_reg_reg);
13113 %}
13114 
13115 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13116   match(Set dst (XorL src1 src2));
13117 
13118   ins_cost(INSN_COST);
13119   format %{ "eor  $dst, $src1, $src2\t# int" %}
13120 
13121   ins_encode %{
13122     __ eor(as_Register($dst$$reg),
13123            as_Register($src1$$reg),
13124            (unsigned long)($src2$$constant));
13125   %}
13126 
13127   ins_pipe(ialu_reg_imm);
13128 %}
13129 
13130 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13131 %{
13132   match(Set dst (ConvI2L src));
13133 
13134   ins_cost(INSN_COST);
13135   format %{ "sxtw  $dst, $src\t# i2l" %}
13136   ins_encode %{
13137     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13138   %}
13139   ins_pipe(ialu_reg_shift);
13140 %}
13141 
13142 // this pattern occurs in bigmath arithmetic
13143 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13144 %{
13145   match(Set dst (AndL (ConvI2L src) mask));
13146 
13147   ins_cost(INSN_COST);
13148   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13149   ins_encode %{
13150     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13151   %}
13152 
13153   ins_pipe(ialu_reg_shift);
13154 %}
13155 
13156 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13157   match(Set dst (ConvL2I src));
13158 
13159   ins_cost(INSN_COST);
13160   format %{ "movw  $dst, $src \t// l2i" %}
13161 
13162   ins_encode %{
13163     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13164   %}
13165 
13166   ins_pipe(ialu_reg);
13167 %}
13168 
13169 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13170 %{
13171   match(Set dst (Conv2B src));
13172   effect(KILL cr);
13173 
13174   format %{
13175     "cmpw $src, zr\n\t"
13176     "cset $dst, ne"
13177   %}
13178 
13179   ins_encode %{
13180     __ cmpw(as_Register($src$$reg), zr);
13181     __ cset(as_Register($dst$$reg), Assembler::NE);
13182   %}
13183 
13184   ins_pipe(ialu_reg);
13185 %}
13186 
13187 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13188 %{
13189   match(Set dst (Conv2B src));
13190   effect(KILL cr);
13191 
13192   format %{
13193     "cmp  $src, zr\n\t"
13194     "cset $dst, ne"
13195   %}
13196 
13197   ins_encode %{
13198     __ cmp(as_Register($src$$reg), zr);
13199     __ cset(as_Register($dst$$reg), Assembler::NE);
13200   %}
13201 
13202   ins_pipe(ialu_reg);
13203 %}
13204 
13205 instruct convD2F_reg(vRegF dst, vRegD src) %{
13206   match(Set dst (ConvD2F src));
13207 
13208   ins_cost(INSN_COST * 5);
13209   format %{ "fcvtd  $dst, $src \t// d2f" %}
13210 
13211   ins_encode %{
13212     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13213   %}
13214 
13215   ins_pipe(fp_d2f);
13216 %}
13217 
13218 instruct convF2D_reg(vRegD dst, vRegF src) %{
13219   match(Set dst (ConvF2D src));
13220 
13221   ins_cost(INSN_COST * 5);
13222   format %{ "fcvts  $dst, $src \t// f2d" %}
13223 
13224   ins_encode %{
13225     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13226   %}
13227 
13228   ins_pipe(fp_f2d);
13229 %}
13230 
13231 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13232   match(Set dst (ConvF2I src));
13233 
13234   ins_cost(INSN_COST * 5);
13235   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13236 
13237   ins_encode %{
13238     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13239   %}
13240 
13241   ins_pipe(fp_f2i);
13242 %}
13243 
13244 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13245   match(Set dst (ConvF2L src));
13246 
13247   ins_cost(INSN_COST * 5);
13248   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13249 
13250   ins_encode %{
13251     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13252   %}
13253 
13254   ins_pipe(fp_f2l);
13255 %}
13256 
13257 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13258   match(Set dst (ConvI2F src));
13259 
13260   ins_cost(INSN_COST * 5);
13261   format %{ "scvtfws  $dst, $src \t// i2f" %}
13262 
13263   ins_encode %{
13264     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13265   %}
13266 
13267   ins_pipe(fp_i2f);
13268 %}
13269 
13270 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13271   match(Set dst (ConvL2F src));
13272 
13273   ins_cost(INSN_COST * 5);
13274   format %{ "scvtfs  $dst, $src \t// l2f" %}
13275 
13276   ins_encode %{
13277     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13278   %}
13279 
13280   ins_pipe(fp_l2f);
13281 %}
13282 
13283 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13284   match(Set dst (ConvD2I src));
13285 
13286   ins_cost(INSN_COST * 5);
13287   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13288 
13289   ins_encode %{
13290     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13291   %}
13292 
13293   ins_pipe(fp_d2i);
13294 %}
13295 
13296 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13297   match(Set dst (ConvD2L src));
13298 
13299   ins_cost(INSN_COST * 5);
13300   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13301 
13302   ins_encode %{
13303     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13304   %}
13305 
13306   ins_pipe(fp_d2l);
13307 %}
13308 
13309 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13310   match(Set dst (ConvI2D src));
13311 
13312   ins_cost(INSN_COST * 5);
13313   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13314 
13315   ins_encode %{
13316     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13317   %}
13318 
13319   ins_pipe(fp_i2d);
13320 %}
13321 
13322 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13323   match(Set dst (ConvL2D src));
13324 
13325   ins_cost(INSN_COST * 5);
13326   format %{ "scvtfd  $dst, $src \t// l2d" %}
13327 
13328   ins_encode %{
13329     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13330   %}
13331 
13332   ins_pipe(fp_l2d);
13333 %}
13334 
13335 // stack <-> reg and reg <-> reg shuffles with no conversion
13336 
13337 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13338 
13339   match(Set dst (MoveF2I src));
13340 
13341   effect(DEF dst, USE src);
13342 
13343   ins_cost(4 * INSN_COST);
13344 
13345   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13346 
13347   ins_encode %{
13348     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13349   %}
13350 
13351   ins_pipe(iload_reg_reg);
13352 
13353 %}
13354 
13355 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13356 
13357   match(Set dst (MoveI2F src));
13358 
13359   effect(DEF dst, USE src);
13360 
13361   ins_cost(4 * INSN_COST);
13362 
13363   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13364 
13365   ins_encode %{
13366     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13367   %}
13368 
13369   ins_pipe(pipe_class_memory);
13370 
13371 %}
13372 
13373 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13374 
13375   match(Set dst (MoveD2L src));
13376 
13377   effect(DEF dst, USE src);
13378 
13379   ins_cost(4 * INSN_COST);
13380 
13381   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13382 
13383   ins_encode %{
13384     __ ldr($dst$$Register, Address(sp, $src$$disp));
13385   %}
13386 
13387   ins_pipe(iload_reg_reg);
13388 
13389 %}
13390 
13391 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13392 
13393   match(Set dst (MoveL2D src));
13394 
13395   effect(DEF dst, USE src);
13396 
13397   ins_cost(4 * INSN_COST);
13398 
13399   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13400 
13401   ins_encode %{
13402     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13403   %}
13404 
13405   ins_pipe(pipe_class_memory);
13406 
13407 %}
13408 
13409 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13410 
13411   match(Set dst (MoveF2I src));
13412 
13413   effect(DEF dst, USE src);
13414 
13415   ins_cost(INSN_COST);
13416 
13417   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13418 
13419   ins_encode %{
13420     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13421   %}
13422 
13423   ins_pipe(pipe_class_memory);
13424 
13425 %}
13426 
13427 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13428 
13429   match(Set dst (MoveI2F src));
13430 
13431   effect(DEF dst, USE src);
13432 
13433   ins_cost(INSN_COST);
13434 
13435   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13436 
13437   ins_encode %{
13438     __ strw($src$$Register, Address(sp, $dst$$disp));
13439   %}
13440 
13441   ins_pipe(istore_reg_reg);
13442 
13443 %}
13444 
13445 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13446 
13447   match(Set dst (MoveD2L src));
13448 
13449   effect(DEF dst, USE src);
13450 
13451   ins_cost(INSN_COST);
13452 
13453   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13454 
13455   ins_encode %{
13456     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13457   %}
13458 
13459   ins_pipe(pipe_class_memory);
13460 
13461 %}
13462 
13463 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13464 
13465   match(Set dst (MoveL2D src));
13466 
13467   effect(DEF dst, USE src);
13468 
13469   ins_cost(INSN_COST);
13470 
13471   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13472 
13473   ins_encode %{
13474     __ str($src$$Register, Address(sp, $dst$$disp));
13475   %}
13476 
13477   ins_pipe(istore_reg_reg);
13478 
13479 %}
13480 
13481 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13482 
13483   match(Set dst (MoveF2I src));
13484 
13485   effect(DEF dst, USE src);
13486 
13487   ins_cost(INSN_COST);
13488 
13489   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13490 
13491   ins_encode %{
13492     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13493   %}
13494 
13495   ins_pipe(fp_f2i);
13496 
13497 %}
13498 
13499 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13500 
13501   match(Set dst (MoveI2F src));
13502 
13503   effect(DEF dst, USE src);
13504 
13505   ins_cost(INSN_COST);
13506 
13507   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13508 
13509   ins_encode %{
13510     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13511   %}
13512 
13513   ins_pipe(fp_i2f);
13514 
13515 %}
13516 
13517 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13518 
13519   match(Set dst (MoveD2L src));
13520 
13521   effect(DEF dst, USE src);
13522 
13523   ins_cost(INSN_COST);
13524 
13525   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13526 
13527   ins_encode %{
13528     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13529   %}
13530 
13531   ins_pipe(fp_d2l);
13532 
13533 %}
13534 
13535 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13536 
13537   match(Set dst (MoveL2D src));
13538 
13539   effect(DEF dst, USE src);
13540 
13541   ins_cost(INSN_COST);
13542 
13543   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13544 
13545   ins_encode %{
13546     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13547   %}
13548 
13549   ins_pipe(fp_l2d);
13550 
13551 %}
13552 
13553 // ============================================================================
13554 // clearing of an array
13555 
13556 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13557 %{
13558   match(Set dummy (ClearArray cnt base));
13559   effect(USE_KILL cnt, USE_KILL base);
13560 
13561   ins_cost(4 * INSN_COST);
13562   format %{ "ClearArray $cnt, $base" %}
13563 
13564   ins_encode %{
13565     __ zero_words($base$$Register, $cnt$$Register);
13566   %}
13567 
13568   ins_pipe(pipe_class_memory);
13569 %}
13570 
13571 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
13572 %{
13573   match(Set dummy (ClearArray cnt base));
13574   effect(USE_KILL base, TEMP tmp);
13575 
13576   ins_cost(4 * INSN_COST);
13577   format %{ "ClearArray $cnt, $base" %}
13578 
13579   ins_encode %{
13580     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13581   %}
13582 
13583   ins_pipe(pipe_class_memory);
13584 %}
13585 
13586 // ============================================================================
13587 // Overflow Math Instructions
13588 
13589 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13590 %{
13591   match(Set cr (OverflowAddI op1 op2));
13592 
13593   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13594   ins_cost(INSN_COST);
13595   ins_encode %{
13596     __ cmnw($op1$$Register, $op2$$Register);
13597   %}
13598 
13599   ins_pipe(icmp_reg_reg);
13600 %}
13601 
13602 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13603 %{
13604   match(Set cr (OverflowAddI op1 op2));
13605 
13606   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13607   ins_cost(INSN_COST);
13608   ins_encode %{
13609     __ cmnw($op1$$Register, $op2$$constant);
13610   %}
13611 
13612   ins_pipe(icmp_reg_imm);
13613 %}
13614 
13615 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13616 %{
13617   match(Set cr (OverflowAddL op1 op2));
13618 
13619   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13620   ins_cost(INSN_COST);
13621   ins_encode %{
13622     __ cmn($op1$$Register, $op2$$Register);
13623   %}
13624 
13625   ins_pipe(icmp_reg_reg);
13626 %}
13627 
13628 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13629 %{
13630   match(Set cr (OverflowAddL op1 op2));
13631 
13632   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13633   ins_cost(INSN_COST);
13634   ins_encode %{
13635     __ cmn($op1$$Register, $op2$$constant);
13636   %}
13637 
13638   ins_pipe(icmp_reg_imm);
13639 %}
13640 
13641 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13642 %{
13643   match(Set cr (OverflowSubI op1 op2));
13644 
13645   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13646   ins_cost(INSN_COST);
13647   ins_encode %{
13648     __ cmpw($op1$$Register, $op2$$Register);
13649   %}
13650 
13651   ins_pipe(icmp_reg_reg);
13652 %}
13653 
13654 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13655 %{
13656   match(Set cr (OverflowSubI op1 op2));
13657 
13658   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13659   ins_cost(INSN_COST);
13660   ins_encode %{
13661     __ cmpw($op1$$Register, $op2$$constant);
13662   %}
13663 
13664   ins_pipe(icmp_reg_imm);
13665 %}
13666 
13667 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13668 %{
13669   match(Set cr (OverflowSubL op1 op2));
13670 
13671   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13672   ins_cost(INSN_COST);
13673   ins_encode %{
13674     __ cmp($op1$$Register, $op2$$Register);
13675   %}
13676 
13677   ins_pipe(icmp_reg_reg);
13678 %}
13679 
13680 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13681 %{
13682   match(Set cr (OverflowSubL op1 op2));
13683 
13684   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13685   ins_cost(INSN_COST);
13686   ins_encode %{
13687     __ cmp($op1$$Register, $op2$$constant);
13688   %}
13689 
13690   ins_pipe(icmp_reg_imm);
13691 %}
13692 
13693 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13694 %{
13695   match(Set cr (OverflowSubI zero op1));
13696 
13697   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13698   ins_cost(INSN_COST);
13699   ins_encode %{
13700     __ cmpw(zr, $op1$$Register);
13701   %}
13702 
13703   ins_pipe(icmp_reg_imm);
13704 %}
13705 
13706 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13707 %{
13708   match(Set cr (OverflowSubL zero op1));
13709 
13710   format %{ "cmp   zr, $op1\t# overflow check long" %}
13711   ins_cost(INSN_COST);
13712   ins_encode %{
13713     __ cmp(zr, $op1$$Register);
13714   %}
13715 
13716   ins_pipe(icmp_reg_imm);
13717 %}
13718 
13719 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13720 %{
13721   match(Set cr (OverflowMulI op1 op2));
13722 
13723   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13724             "cmp   rscratch1, rscratch1, sxtw\n\t"
13725             "movw  rscratch1, #0x80000000\n\t"
13726             "cselw rscratch1, rscratch1, zr, NE\n\t"
13727             "cmpw  rscratch1, #1" %}
13728   ins_cost(5 * INSN_COST);
13729   ins_encode %{
13730     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13731     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13732     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13733     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13734     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13735   %}
13736 
13737   ins_pipe(pipe_slow);
13738 %}
13739 
13740 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13741 %{
13742   match(If cmp (OverflowMulI op1 op2));
13743   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13744             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13745   effect(USE labl, KILL cr);
13746 
13747   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13748             "cmp   rscratch1, rscratch1, sxtw\n\t"
13749             "b$cmp   $labl" %}
13750   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13751   ins_encode %{
13752     Label* L = $labl$$label;
13753     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13754     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13755     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13756     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13757   %}
13758 
13759   ins_pipe(pipe_serial);
13760 %}
13761 
13762 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13763 %{
13764   match(Set cr (OverflowMulL op1 op2));
13765 
13766   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13767             "smulh rscratch2, $op1, $op2\n\t"
13768             "cmp   rscratch2, rscratch1, ASR #31\n\t"
13769             "movw  rscratch1, #0x80000000\n\t"
13770             "cselw rscratch1, rscratch1, zr, NE\n\t"
13771             "cmpw  rscratch1, #1" %}
13772   ins_cost(6 * INSN_COST);
13773   ins_encode %{
13774     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13775     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13776     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
13777     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13778     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13779     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13780   %}
13781 
13782   ins_pipe(pipe_slow);
13783 %}
13784 
13785 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13786 %{
13787   match(If cmp (OverflowMulL op1 op2));
13788   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13789             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13790   effect(USE labl, KILL cr);
13791 
13792   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13793             "smulh rscratch2, $op1, $op2\n\t"
13794             "cmp   rscratch2, rscratch1, ASR #31\n\t"
13795             "b$cmp $labl" %}
13796   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13797   ins_encode %{
13798     Label* L = $labl$$label;
13799     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13800     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13801     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13802     __ cmp(rscratch2, rscratch1, Assembler::ASR, 31);    // Top is pure sign ext
13803     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13804   %}
13805 
13806   ins_pipe(pipe_serial);
13807 %}
13808 
13809 // ============================================================================
13810 // Compare Instructions
13811 
13812 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13813 %{
13814   match(Set cr (CmpI op1 op2));
13815 
13816   effect(DEF cr, USE op1, USE op2);
13817 
13818   ins_cost(INSN_COST);
13819   format %{ "cmpw  $op1, $op2" %}
13820 
13821   ins_encode(aarch64_enc_cmpw(op1, op2));
13822 
13823   ins_pipe(icmp_reg_reg);
13824 %}
13825 
13826 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13827 %{
13828   match(Set cr (CmpI op1 zero));
13829 
13830   effect(DEF cr, USE op1);
13831 
13832   ins_cost(INSN_COST);
13833   format %{ "cmpw $op1, 0" %}
13834 
13835   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13836 
13837   ins_pipe(icmp_reg_imm);
13838 %}
13839 
13840 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13841 %{
13842   match(Set cr (CmpI op1 op2));
13843 
13844   effect(DEF cr, USE op1);
13845 
13846   ins_cost(INSN_COST);
13847   format %{ "cmpw  $op1, $op2" %}
13848 
13849   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13850 
13851   ins_pipe(icmp_reg_imm);
13852 %}
13853 
13854 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13855 %{
13856   match(Set cr (CmpI op1 op2));
13857 
13858   effect(DEF cr, USE op1);
13859 
13860   ins_cost(INSN_COST * 2);
13861   format %{ "cmpw  $op1, $op2" %}
13862 
13863   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13864 
13865   ins_pipe(icmp_reg_imm);
13866 %}
13867 
13868 // Unsigned compare Instructions; really, same as signed compare
13869 // except it should only be used to feed an If or a CMovI which takes a
13870 // cmpOpU.
13871 
13872 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13873 %{
13874   match(Set cr (CmpU op1 op2));
13875 
13876   effect(DEF cr, USE op1, USE op2);
13877 
13878   ins_cost(INSN_COST);
13879   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13880 
13881   ins_encode(aarch64_enc_cmpw(op1, op2));
13882 
13883   ins_pipe(icmp_reg_reg);
13884 %}
13885 
13886 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13887 %{
13888   match(Set cr (CmpU op1 zero));
13889 
13890   effect(DEF cr, USE op1);
13891 
13892   ins_cost(INSN_COST);
13893   format %{ "cmpw $op1, #0\t# unsigned" %}
13894 
13895   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13896 
13897   ins_pipe(icmp_reg_imm);
13898 %}
13899 
13900 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13901 %{
13902   match(Set cr (CmpU op1 op2));
13903 
13904   effect(DEF cr, USE op1);
13905 
13906   ins_cost(INSN_COST);
13907   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13908 
13909   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13910 
13911   ins_pipe(icmp_reg_imm);
13912 %}
13913 
13914 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13915 %{
13916   match(Set cr (CmpU op1 op2));
13917 
13918   effect(DEF cr, USE op1);
13919 
13920   ins_cost(INSN_COST * 2);
13921   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13922 
13923   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13924 
13925   ins_pipe(icmp_reg_imm);
13926 %}
13927 
13928 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13929 %{
13930   match(Set cr (CmpL op1 op2));
13931 
13932   effect(DEF cr, USE op1, USE op2);
13933 
13934   ins_cost(INSN_COST);
13935   format %{ "cmp  $op1, $op2" %}
13936 
13937   ins_encode(aarch64_enc_cmp(op1, op2));
13938 
13939   ins_pipe(icmp_reg_reg);
13940 %}
13941 
13942 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
13943 %{
13944   match(Set cr (CmpL op1 zero));
13945 
13946   effect(DEF cr, USE op1);
13947 
13948   ins_cost(INSN_COST);
13949   format %{ "tst  $op1" %}
13950 
13951   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13952 
13953   ins_pipe(icmp_reg_imm);
13954 %}
13955 
13956 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13957 %{
13958   match(Set cr (CmpL op1 op2));
13959 
13960   effect(DEF cr, USE op1);
13961 
13962   ins_cost(INSN_COST);
13963   format %{ "cmp  $op1, $op2" %}
13964 
13965   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13966 
13967   ins_pipe(icmp_reg_imm);
13968 %}
13969 
13970 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13971 %{
13972   match(Set cr (CmpL op1 op2));
13973 
13974   effect(DEF cr, USE op1);
13975 
13976   ins_cost(INSN_COST * 2);
13977   format %{ "cmp  $op1, $op2" %}
13978 
13979   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13980 
13981   ins_pipe(icmp_reg_imm);
13982 %}
13983 
13984 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13985 %{
13986   match(Set cr (CmpP op1 op2));
13987 
13988   effect(DEF cr, USE op1, USE op2);
13989 
13990   ins_cost(INSN_COST);
13991   format %{ "cmp  $op1, $op2\t // ptr" %}
13992 
13993   ins_encode(aarch64_enc_cmpp(op1, op2));
13994 
13995   ins_pipe(icmp_reg_reg);
13996 %}
13997 
13998 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13999 %{
14000   match(Set cr (CmpN op1 op2));
14001 
14002   effect(DEF cr, USE op1, USE op2);
14003 
14004   ins_cost(INSN_COST);
14005   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
14006 
14007   ins_encode(aarch64_enc_cmpn(op1, op2));
14008 
14009   ins_pipe(icmp_reg_reg);
14010 %}
14011 
14012 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14013 %{
14014   match(Set cr (CmpP op1 zero));
14015 
14016   effect(DEF cr, USE op1, USE zero);
14017 
14018   ins_cost(INSN_COST);
14019   format %{ "cmp  $op1, 0\t // ptr" %}
14020 
14021   ins_encode(aarch64_enc_testp(op1));
14022 
14023   ins_pipe(icmp_reg_imm);
14024 %}
14025 
14026 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14027 %{
14028   match(Set cr (CmpN op1 zero));
14029 
14030   effect(DEF cr, USE op1, USE zero);
14031 
14032   ins_cost(INSN_COST);
14033   format %{ "cmp  $op1, 0\t // compressed ptr" %}
14034 
14035   ins_encode(aarch64_enc_testn(op1));
14036 
14037   ins_pipe(icmp_reg_imm);
14038 %}
14039 
14040 // FP comparisons
14041 //
14042 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14043 // using normal cmpOp. See declaration of rFlagsReg for details.
14044 
14045 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14046 %{
14047   match(Set cr (CmpF src1 src2));
14048 
14049   ins_cost(3 * INSN_COST);
14050   format %{ "fcmps $src1, $src2" %}
14051 
14052   ins_encode %{
14053     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14054   %}
14055 
14056   ins_pipe(pipe_class_compare);
14057 %}
14058 
14059 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14060 %{
14061   match(Set cr (CmpF src1 src2));
14062 
14063   ins_cost(3 * INSN_COST);
14064   format %{ "fcmps $src1, 0.0" %}
14065 
14066   ins_encode %{
14067     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14068   %}
14069 
14070   ins_pipe(pipe_class_compare);
14071 %}
14072 // FROM HERE
14073 
14074 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14075 %{
14076   match(Set cr (CmpD src1 src2));
14077 
14078   ins_cost(3 * INSN_COST);
14079   format %{ "fcmpd $src1, $src2" %}
14080 
14081   ins_encode %{
14082     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14083   %}
14084 
14085   ins_pipe(pipe_class_compare);
14086 %}
14087 
14088 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14089 %{
14090   match(Set cr (CmpD src1 src2));
14091 
14092   ins_cost(3 * INSN_COST);
14093   format %{ "fcmpd $src1, 0.0" %}
14094 
14095   ins_encode %{
14096     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14097   %}
14098 
14099   ins_pipe(pipe_class_compare);
14100 %}
14101 
14102 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14103 %{
14104   match(Set dst (CmpF3 src1 src2));
14105   effect(KILL cr);
14106 
14107   ins_cost(5 * INSN_COST);
14108   format %{ "fcmps $src1, $src2\n\t"
14109             "csinvw($dst, zr, zr, eq\n\t"
14110             "csnegw($dst, $dst, $dst, lt)"
14111   %}
14112 
14113   ins_encode %{
14114     Label done;
14115     FloatRegister s1 = as_FloatRegister($src1$$reg);
14116     FloatRegister s2 = as_FloatRegister($src2$$reg);
14117     Register d = as_Register($dst$$reg);
14118     __ fcmps(s1, s2);
14119     // installs 0 if EQ else -1
14120     __ csinvw(d, zr, zr, Assembler::EQ);
14121     // keeps -1 if less or unordered else installs 1
14122     __ csnegw(d, d, d, Assembler::LT);
14123     __ bind(done);
14124   %}
14125 
14126   ins_pipe(pipe_class_default);
14127 
14128 %}
14129 
14130 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14131 %{
14132   match(Set dst (CmpD3 src1 src2));
14133   effect(KILL cr);
14134 
14135   ins_cost(5 * INSN_COST);
14136   format %{ "fcmpd $src1, $src2\n\t"
14137             "csinvw($dst, zr, zr, eq\n\t"
14138             "csnegw($dst, $dst, $dst, lt)"
14139   %}
14140 
14141   ins_encode %{
14142     Label done;
14143     FloatRegister s1 = as_FloatRegister($src1$$reg);
14144     FloatRegister s2 = as_FloatRegister($src2$$reg);
14145     Register d = as_Register($dst$$reg);
14146     __ fcmpd(s1, s2);
14147     // installs 0 if EQ else -1
14148     __ csinvw(d, zr, zr, Assembler::EQ);
14149     // keeps -1 if less or unordered else installs 1
14150     __ csnegw(d, d, d, Assembler::LT);
14151     __ bind(done);
14152   %}
14153   ins_pipe(pipe_class_default);
14154 
14155 %}
14156 
14157 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14158 %{
14159   match(Set dst (CmpF3 src1 zero));
14160   effect(KILL cr);
14161 
14162   ins_cost(5 * INSN_COST);
14163   format %{ "fcmps $src1, 0.0\n\t"
14164             "csinvw($dst, zr, zr, eq\n\t"
14165             "csnegw($dst, $dst, $dst, lt)"
14166   %}
14167 
14168   ins_encode %{
14169     Label done;
14170     FloatRegister s1 = as_FloatRegister($src1$$reg);
14171     Register d = as_Register($dst$$reg);
14172     __ fcmps(s1, 0.0D);
14173     // installs 0 if EQ else -1
14174     __ csinvw(d, zr, zr, Assembler::EQ);
14175     // keeps -1 if less or unordered else installs 1
14176     __ csnegw(d, d, d, Assembler::LT);
14177     __ bind(done);
14178   %}
14179 
14180   ins_pipe(pipe_class_default);
14181 
14182 %}
14183 
14184 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14185 %{
14186   match(Set dst (CmpD3 src1 zero));
14187   effect(KILL cr);
14188 
14189   ins_cost(5 * INSN_COST);
14190   format %{ "fcmpd $src1, 0.0\n\t"
14191             "csinvw($dst, zr, zr, eq\n\t"
14192             "csnegw($dst, $dst, $dst, lt)"
14193   %}
14194 
14195   ins_encode %{
14196     Label done;
14197     FloatRegister s1 = as_FloatRegister($src1$$reg);
14198     Register d = as_Register($dst$$reg);
14199     __ fcmpd(s1, 0.0D);
14200     // installs 0 if EQ else -1
14201     __ csinvw(d, zr, zr, Assembler::EQ);
14202     // keeps -1 if less or unordered else installs 1
14203     __ csnegw(d, d, d, Assembler::LT);
14204     __ bind(done);
14205   %}
14206   ins_pipe(pipe_class_default);
14207 
14208 %}
14209 
14210 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14211 %{
14212   match(Set dst (CmpLTMask p q));
14213   effect(KILL cr);
14214 
14215   ins_cost(3 * INSN_COST);
14216 
14217   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14218             "csetw $dst, lt\n\t"
14219             "subw $dst, zr, $dst"
14220   %}
14221 
14222   ins_encode %{
14223     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14224     __ csetw(as_Register($dst$$reg), Assembler::LT);
14225     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14226   %}
14227 
14228   ins_pipe(ialu_reg_reg);
14229 %}
14230 
14231 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14232 %{
14233   match(Set dst (CmpLTMask src zero));
14234   effect(KILL cr);
14235 
14236   ins_cost(INSN_COST);
14237 
14238   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14239 
14240   ins_encode %{
14241     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14242   %}
14243 
14244   ins_pipe(ialu_reg_shift);
14245 %}
14246 
14247 // ============================================================================
14248 // Max and Min
14249 
14250 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14251 %{
14252   match(Set dst (MinI src1 src2));
14253 
14254   effect(DEF dst, USE src1, USE src2, KILL cr);
14255   size(8);
14256 
14257   ins_cost(INSN_COST * 3);
14258   format %{
14259     "cmpw $src1 $src2\t signed int\n\t"
14260     "cselw $dst, $src1, $src2 lt\t"
14261   %}
14262 
14263   ins_encode %{
14264     __ cmpw(as_Register($src1$$reg),
14265             as_Register($src2$$reg));
14266     __ cselw(as_Register($dst$$reg),
14267              as_Register($src1$$reg),
14268              as_Register($src2$$reg),
14269              Assembler::LT);
14270   %}
14271 
14272   ins_pipe(ialu_reg_reg);
14273 %}
14274 // FROM HERE
14275 
14276 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14277 %{
14278   match(Set dst (MaxI src1 src2));
14279 
14280   effect(DEF dst, USE src1, USE src2, KILL cr);
14281   size(8);
14282 
14283   ins_cost(INSN_COST * 3);
14284   format %{
14285     "cmpw $src1 $src2\t signed int\n\t"
14286     "cselw $dst, $src1, $src2 gt\t"
14287   %}
14288 
14289   ins_encode %{
14290     __ cmpw(as_Register($src1$$reg),
14291             as_Register($src2$$reg));
14292     __ cselw(as_Register($dst$$reg),
14293              as_Register($src1$$reg),
14294              as_Register($src2$$reg),
14295              Assembler::GT);
14296   %}
14297 
14298   ins_pipe(ialu_reg_reg);
14299 %}
14300 
14301 // ============================================================================
14302 // Branch Instructions
14303 
14304 // Direct Branch.
14305 instruct branch(label lbl)
14306 %{
14307   match(Goto);
14308 
14309   effect(USE lbl);
14310 
14311   ins_cost(BRANCH_COST);
14312   format %{ "b  $lbl" %}
14313 
14314   ins_encode(aarch64_enc_b(lbl));
14315 
14316   ins_pipe(pipe_branch);
14317 %}
14318 
14319 // Conditional Near Branch
14320 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14321 %{
14322   // Same match rule as `branchConFar'.
14323   match(If cmp cr);
14324 
14325   effect(USE lbl);
14326 
14327   ins_cost(BRANCH_COST);
14328   // If set to 1 this indicates that the current instruction is a
14329   // short variant of a long branch. This avoids using this
14330   // instruction in first-pass matching. It will then only be used in
14331   // the `Shorten_branches' pass.
14332   // ins_short_branch(1);
14333   format %{ "b$cmp  $lbl" %}
14334 
14335   ins_encode(aarch64_enc_br_con(cmp, lbl));
14336 
14337   ins_pipe(pipe_branch_cond);
14338 %}
14339 
14340 // Conditional Near Branch Unsigned
14341 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14342 %{
14343   // Same match rule as `branchConFar'.
14344   match(If cmp cr);
14345 
14346   effect(USE lbl);
14347 
14348   ins_cost(BRANCH_COST);
14349   // If set to 1 this indicates that the current instruction is a
14350   // short variant of a long branch. This avoids using this
14351   // instruction in first-pass matching. It will then only be used in
14352   // the `Shorten_branches' pass.
14353   // ins_short_branch(1);
14354   format %{ "b$cmp  $lbl\t# unsigned" %}
14355 
14356   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14357 
14358   ins_pipe(pipe_branch_cond);
14359 %}
14360 
14361 // Make use of CBZ and CBNZ.  These instructions, as well as being
14362 // shorter than (cmp; branch), have the additional benefit of not
14363 // killing the flags.
14364 
14365 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14366   match(If cmp (CmpI op1 op2));
14367   effect(USE labl);
14368 
14369   ins_cost(BRANCH_COST);
14370   format %{ "cbw$cmp   $op1, $labl" %}
14371   ins_encode %{
14372     Label* L = $labl$$label;
14373     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14374     if (cond == Assembler::EQ)
14375       __ cbzw($op1$$Register, *L);
14376     else
14377       __ cbnzw($op1$$Register, *L);
14378   %}
14379   ins_pipe(pipe_cmp_branch);
14380 %}
14381 
14382 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14383   match(If cmp (CmpL op1 op2));
14384   effect(USE labl);
14385 
14386   ins_cost(BRANCH_COST);
14387   format %{ "cb$cmp   $op1, $labl" %}
14388   ins_encode %{
14389     Label* L = $labl$$label;
14390     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14391     if (cond == Assembler::EQ)
14392       __ cbz($op1$$Register, *L);
14393     else
14394       __ cbnz($op1$$Register, *L);
14395   %}
14396   ins_pipe(pipe_cmp_branch);
14397 %}
14398 
14399 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14400   match(If cmp (CmpP op1 op2));
14401   effect(USE labl);
14402 
14403   ins_cost(BRANCH_COST);
14404   format %{ "cb$cmp   $op1, $labl" %}
14405   ins_encode %{
14406     Label* L = $labl$$label;
14407     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14408     if (cond == Assembler::EQ)
14409       __ cbz($op1$$Register, *L);
14410     else
14411       __ cbnz($op1$$Register, *L);
14412   %}
14413   ins_pipe(pipe_cmp_branch);
14414 %}
14415 
14416 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14417   match(If cmp (CmpN op1 op2));
14418   effect(USE labl);
14419 
14420   ins_cost(BRANCH_COST);
14421   format %{ "cbw$cmp   $op1, $labl" %}
14422   ins_encode %{
14423     Label* L = $labl$$label;
14424     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14425     if (cond == Assembler::EQ)
14426       __ cbzw($op1$$Register, *L);
14427     else
14428       __ cbnzw($op1$$Register, *L);
14429   %}
14430   ins_pipe(pipe_cmp_branch);
14431 %}
14432 
14433 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14434   match(If cmp (CmpP (DecodeN oop) zero));
14435   effect(USE labl);
14436 
14437   ins_cost(BRANCH_COST);
14438   format %{ "cb$cmp   $oop, $labl" %}
14439   ins_encode %{
14440     Label* L = $labl$$label;
14441     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14442     if (cond == Assembler::EQ)
14443       __ cbzw($oop$$Register, *L);
14444     else
14445       __ cbnzw($oop$$Register, *L);
14446   %}
14447   ins_pipe(pipe_cmp_branch);
14448 %}
14449 
14450 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14451   match(If cmp (CmpU op1 op2));
14452   effect(USE labl);
14453 
14454   ins_cost(BRANCH_COST);
14455   format %{ "cbw$cmp   $op1, $labl" %}
14456   ins_encode %{
14457     Label* L = $labl$$label;
14458     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14459     if (cond == Assembler::EQ || cond == Assembler::LS)
14460       __ cbzw($op1$$Register, *L);
14461     else
14462       __ cbnzw($op1$$Register, *L);
14463   %}
14464   ins_pipe(pipe_cmp_branch);
14465 %}
14466 
14467 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14468   match(If cmp (CmpU op1 op2));
14469   effect(USE labl);
14470 
14471   ins_cost(BRANCH_COST);
14472   format %{ "cb$cmp   $op1, $labl" %}
14473   ins_encode %{
14474     Label* L = $labl$$label;
14475     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14476     if (cond == Assembler::EQ || cond == Assembler::LS)
14477       __ cbz($op1$$Register, *L);
14478     else
14479       __ cbnz($op1$$Register, *L);
14480   %}
14481   ins_pipe(pipe_cmp_branch);
14482 %}
14483 
14484 // Test bit and Branch
14485 
14486 // Patterns for short (< 32KiB) variants
14487 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14488   match(If cmp (CmpL op1 op2));
14489   effect(USE labl);
14490 
14491   ins_cost(BRANCH_COST);
14492   format %{ "cb$cmp   $op1, $labl # long" %}
14493   ins_encode %{
14494     Label* L = $labl$$label;
14495     Assembler::Condition cond =
14496       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14497     __ tbr(cond, $op1$$Register, 63, *L);
14498   %}
14499   ins_pipe(pipe_cmp_branch);
14500   ins_short_branch(1);
14501 %}
14502 
14503 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14504   match(If cmp (CmpI op1 op2));
14505   effect(USE labl);
14506 
14507   ins_cost(BRANCH_COST);
14508   format %{ "cb$cmp   $op1, $labl # int" %}
14509   ins_encode %{
14510     Label* L = $labl$$label;
14511     Assembler::Condition cond =
14512       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14513     __ tbr(cond, $op1$$Register, 31, *L);
14514   %}
14515   ins_pipe(pipe_cmp_branch);
14516   ins_short_branch(1);
14517 %}
14518 
14519 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14520   match(If cmp (CmpL (AndL op1 op2) op3));
14521   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14522   effect(USE labl);
14523 
14524   ins_cost(BRANCH_COST);
14525   format %{ "tb$cmp   $op1, $op2, $labl" %}
14526   ins_encode %{
14527     Label* L = $labl$$label;
14528     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14529     int bit = exact_log2($op2$$constant);
14530     __ tbr(cond, $op1$$Register, bit, *L);
14531   %}
14532   ins_pipe(pipe_cmp_branch);
14533   ins_short_branch(1);
14534 %}
14535 
14536 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14537   match(If cmp (CmpI (AndI op1 op2) op3));
14538   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14539   effect(USE labl);
14540 
14541   ins_cost(BRANCH_COST);
14542   format %{ "tb$cmp   $op1, $op2, $labl" %}
14543   ins_encode %{
14544     Label* L = $labl$$label;
14545     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14546     int bit = exact_log2($op2$$constant);
14547     __ tbr(cond, $op1$$Register, bit, *L);
14548   %}
14549   ins_pipe(pipe_cmp_branch);
14550   ins_short_branch(1);
14551 %}
14552 
14553 // And far variants
14554 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14555   match(If cmp (CmpL op1 op2));
14556   effect(USE labl);
14557 
14558   ins_cost(BRANCH_COST);
14559   format %{ "cb$cmp   $op1, $labl # long" %}
14560   ins_encode %{
14561     Label* L = $labl$$label;
14562     Assembler::Condition cond =
14563       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14564     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14565   %}
14566   ins_pipe(pipe_cmp_branch);
14567 %}
14568 
14569 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14570   match(If cmp (CmpI op1 op2));
14571   effect(USE labl);
14572 
14573   ins_cost(BRANCH_COST);
14574   format %{ "cb$cmp   $op1, $labl # int" %}
14575   ins_encode %{
14576     Label* L = $labl$$label;
14577     Assembler::Condition cond =
14578       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14579     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14580   %}
14581   ins_pipe(pipe_cmp_branch);
14582 %}
14583 
14584 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14585   match(If cmp (CmpL (AndL op1 op2) op3));
14586   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14587   effect(USE labl);
14588 
14589   ins_cost(BRANCH_COST);
14590   format %{ "tb$cmp   $op1, $op2, $labl" %}
14591   ins_encode %{
14592     Label* L = $labl$$label;
14593     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14594     int bit = exact_log2($op2$$constant);
14595     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14596   %}
14597   ins_pipe(pipe_cmp_branch);
14598 %}
14599 
14600 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14601   match(If cmp (CmpI (AndI op1 op2) op3));
14602   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14603   effect(USE labl);
14604 
14605   ins_cost(BRANCH_COST);
14606   format %{ "tb$cmp   $op1, $op2, $labl" %}
14607   ins_encode %{
14608     Label* L = $labl$$label;
14609     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14610     int bit = exact_log2($op2$$constant);
14611     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14612   %}
14613   ins_pipe(pipe_cmp_branch);
14614 %}
14615 
14616 // Test bits
14617 
14618 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14619   match(Set cr (CmpL (AndL op1 op2) op3));
14620   predicate(Assembler::operand_valid_for_logical_immediate
14621             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14622 
14623   ins_cost(INSN_COST);
14624   format %{ "tst $op1, $op2 # long" %}
14625   ins_encode %{
14626     __ tst($op1$$Register, $op2$$constant);
14627   %}
14628   ins_pipe(ialu_reg_reg);
14629 %}
14630 
14631 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14632   match(Set cr (CmpI (AndI op1 op2) op3));
14633   predicate(Assembler::operand_valid_for_logical_immediate
14634             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14635 
14636   ins_cost(INSN_COST);
14637   format %{ "tst $op1, $op2 # int" %}
14638   ins_encode %{
14639     __ tstw($op1$$Register, $op2$$constant);
14640   %}
14641   ins_pipe(ialu_reg_reg);
14642 %}
14643 
14644 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14645   match(Set cr (CmpL (AndL op1 op2) op3));
14646 
14647   ins_cost(INSN_COST);
14648   format %{ "tst $op1, $op2 # long" %}
14649   ins_encode %{
14650     __ tst($op1$$Register, $op2$$Register);
14651   %}
14652   ins_pipe(ialu_reg_reg);
14653 %}
14654 
14655 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14656   match(Set cr (CmpI (AndI op1 op2) op3));
14657 
14658   ins_cost(INSN_COST);
14659   format %{ "tstw $op1, $op2 # int" %}
14660   ins_encode %{
14661     __ tstw($op1$$Register, $op2$$Register);
14662   %}
14663   ins_pipe(ialu_reg_reg);
14664 %}
14665 
14666 
14667 // Conditional Far Branch
14668 // Conditional Far Branch Unsigned
14669 // TODO: fixme
14670 
14671 // counted loop end branch near
14672 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14673 %{
14674   match(CountedLoopEnd cmp cr);
14675 
14676   effect(USE lbl);
14677 
14678   ins_cost(BRANCH_COST);
14679   // short variant.
14680   // ins_short_branch(1);
14681   format %{ "b$cmp $lbl \t// counted loop end" %}
14682 
14683   ins_encode(aarch64_enc_br_con(cmp, lbl));
14684 
14685   ins_pipe(pipe_branch);
14686 %}
14687 
14688 // counted loop end branch near Unsigned
14689 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14690 %{
14691   match(CountedLoopEnd cmp cr);
14692 
14693   effect(USE lbl);
14694 
14695   ins_cost(BRANCH_COST);
14696   // short variant.
14697   // ins_short_branch(1);
14698   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14699 
14700   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14701 
14702   ins_pipe(pipe_branch);
14703 %}
14704 
14705 // counted loop end branch far
14706 // counted loop end branch far unsigned
14707 // TODO: fixme
14708 
14709 // ============================================================================
14710 // inlined locking and unlocking
14711 
14712 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14713 %{
14714   match(Set cr (FastLock object box));
14715   effect(TEMP tmp, TEMP tmp2);
14716 
14717   // TODO
14718   // identify correct cost
14719   ins_cost(5 * INSN_COST);
14720   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
14721 
14722   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
14723 
14724   ins_pipe(pipe_serial);
14725 %}
14726 
14727 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14728 %{
14729   match(Set cr (FastUnlock object box));
14730   effect(TEMP tmp, TEMP tmp2);
14731 
14732   ins_cost(5 * INSN_COST);
14733   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14734 
14735   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14736 
14737   ins_pipe(pipe_serial);
14738 %}
14739 
14740 
14741 // ============================================================================
14742 // Safepoint Instructions
14743 
14744 // TODO
14745 // provide a near and far version of this code
14746 
14747 instruct safePoint(iRegP poll)
14748 %{
14749   match(SafePoint poll);
14750 
14751   format %{
14752     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14753   %}
14754   ins_encode %{
14755     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14756   %}
14757   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14758 %}
14759 
14760 
14761 // ============================================================================
14762 // Procedure Call/Return Instructions
14763 
14764 // Call Java Static Instruction
14765 
14766 instruct CallStaticJavaDirect(method meth)
14767 %{
14768   match(CallStaticJava);
14769 
14770   effect(USE meth);
14771 
14772   ins_cost(CALL_COST);
14773 
14774   format %{ "call,static $meth \t// ==> " %}
14775 
14776   ins_encode( aarch64_enc_java_static_call(meth),
14777               aarch64_enc_call_epilog );
14778 
14779   ins_pipe(pipe_class_call);
14780 %}
14781 
14782 // TO HERE
14783 
14784 // Call Java Dynamic Instruction
14785 instruct CallDynamicJavaDirect(method meth)
14786 %{
14787   match(CallDynamicJava);
14788 
14789   effect(USE meth);
14790 
14791   ins_cost(CALL_COST);
14792 
14793   format %{ "CALL,dynamic $meth \t// ==> " %}
14794 
14795   ins_encode( aarch64_enc_java_dynamic_call(meth),
14796                aarch64_enc_call_epilog );
14797 
14798   ins_pipe(pipe_class_call);
14799 %}
14800 
14801 // Call Runtime Instruction
14802 
14803 instruct CallRuntimeDirect(method meth)
14804 %{
14805   match(CallRuntime);
14806 
14807   effect(USE meth);
14808 
14809   ins_cost(CALL_COST);
14810 
14811   format %{ "CALL, runtime $meth" %}
14812 
14813   ins_encode( aarch64_enc_java_to_runtime(meth) );
14814 
14815   ins_pipe(pipe_class_call);
14816 %}
14817 
14818 // Call Runtime Instruction
14819 
14820 instruct CallLeafDirect(method meth)
14821 %{
14822   match(CallLeaf);
14823 
14824   effect(USE meth);
14825 
14826   ins_cost(CALL_COST);
14827 
14828   format %{ "CALL, runtime leaf $meth" %}
14829 
14830   ins_encode( aarch64_enc_java_to_runtime(meth) );
14831 
14832   ins_pipe(pipe_class_call);
14833 %}
14834 
14835 // Call Runtime Instruction
14836 
14837 instruct CallLeafNoFPDirect(method meth)
14838 %{
14839   match(CallLeafNoFP);
14840 
14841   effect(USE meth);
14842 
14843   ins_cost(CALL_COST);
14844 
14845   format %{ "CALL, runtime leaf nofp $meth" %}
14846 
14847   ins_encode( aarch64_enc_java_to_runtime(meth) );
14848 
14849   ins_pipe(pipe_class_call);
14850 %}
14851 
14852 // Tail Call; Jump from runtime stub to Java code.
14853 // Also known as an 'interprocedural jump'.
14854 // Target of jump will eventually return to caller.
14855 // TailJump below removes the return address.
14856 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14857 %{
14858   match(TailCall jump_target method_oop);
14859 
14860   ins_cost(CALL_COST);
14861 
14862   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14863 
14864   ins_encode(aarch64_enc_tail_call(jump_target));
14865 
14866   ins_pipe(pipe_class_call);
14867 %}
14868 
14869 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14870 %{
14871   match(TailJump jump_target ex_oop);
14872 
14873   ins_cost(CALL_COST);
14874 
14875   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14876 
14877   ins_encode(aarch64_enc_tail_jmp(jump_target));
14878 
14879   ins_pipe(pipe_class_call);
14880 %}
14881 
14882 // Create exception oop: created by stack-crawling runtime code.
14883 // Created exception is now available to this handler, and is setup
14884 // just prior to jumping to this handler. No code emitted.
14885 // TODO check
14886 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14887 instruct CreateException(iRegP_R0 ex_oop)
14888 %{
14889   match(Set ex_oop (CreateEx));
14890 
14891   format %{ " -- \t// exception oop; no code emitted" %}
14892 
14893   size(0);
14894 
14895   ins_encode( /*empty*/ );
14896 
14897   ins_pipe(pipe_class_empty);
14898 %}
14899 
14900 // Rethrow exception: The exception oop will come in the first
14901 // argument position. Then JUMP (not call) to the rethrow stub code.
14902 instruct RethrowException() %{
14903   match(Rethrow);
14904   ins_cost(CALL_COST);
14905 
14906   format %{ "b rethrow_stub" %}
14907 
14908   ins_encode( aarch64_enc_rethrow() );
14909 
14910   ins_pipe(pipe_class_call);
14911 %}
14912 
14913 
14914 // Return Instruction
14915 // epilog node loads ret address into lr as part of frame pop
14916 instruct Ret()
14917 %{
14918   match(Return);
14919 
14920   format %{ "ret\t// return register" %}
14921 
14922   ins_encode( aarch64_enc_ret() );
14923 
14924   ins_pipe(pipe_branch);
14925 %}
14926 
14927 // Die now.
14928 instruct ShouldNotReachHere() %{
14929   match(Halt);
14930 
14931   ins_cost(CALL_COST);
14932   format %{ "ShouldNotReachHere" %}
14933 
14934   ins_encode %{
14935     // TODO
14936     // implement proper trap call here
14937     __ brk(999);
14938   %}
14939 
14940   ins_pipe(pipe_class_default);
14941 %}
14942 
14943 // ============================================================================
14944 // Partial Subtype Check
14945 //
14946 // superklass array for an instance of the superklass.  Set a hidden
14947 // internal cache on a hit (cache is checked with exposed code in
14948 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14949 // encoding ALSO sets flags.
14950 
14951 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14952 %{
14953   match(Set result (PartialSubtypeCheck sub super));
14954   effect(KILL cr, KILL temp);
14955 
14956   ins_cost(1100);  // slightly larger than the next version
14957   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14958 
14959   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14960 
14961   opcode(0x1); // Force zero of result reg on hit
14962 
14963   ins_pipe(pipe_class_memory);
14964 %}
14965 
14966 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14967 %{
14968   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14969   effect(KILL temp, KILL result);
14970 
14971   ins_cost(1100);  // slightly larger than the next version
14972   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14973 
14974   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14975 
14976   opcode(0x0); // Don't zero result reg on hit
14977 
14978   ins_pipe(pipe_class_memory);
14979 %}
14980 
14981 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14982                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
14983 %{
14984   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14985   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14986   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14987 
14988   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14989   ins_encode %{
14990     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14991     __ asrw($cnt1$$Register, $cnt1$$Register, 1);
14992     __ asrw($cnt2$$Register, $cnt2$$Register, 1);
14993     __ string_compare($str1$$Register, $str2$$Register,
14994                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14995                       $tmp1$$Register);
14996   %}
14997   ins_pipe(pipe_class_memory);
14998 %}
14999 
15000 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15001        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15002 %{
15003   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15004   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15005   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15006          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15007   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15008 
15009   ins_encode %{
15010     __ string_indexof($str1$$Register, $str2$$Register,
15011                       $cnt1$$Register, $cnt2$$Register,
15012                       $tmp1$$Register, $tmp2$$Register,
15013                       $tmp3$$Register, $tmp4$$Register,
15014                       -1, $result$$Register, StrIntrinsicNode::UU);
15015   %}
15016   ins_pipe(pipe_class_memory);
15017 %}
15018 
15019 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15020        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15021 %{
15022   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15023   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15024   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15025          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15026   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15027 
15028   ins_encode %{
15029     __ string_indexof($str1$$Register, $str2$$Register,
15030                       $cnt1$$Register, $cnt2$$Register,
15031                       $tmp1$$Register, $tmp2$$Register,
15032                       $tmp3$$Register, $tmp4$$Register,
15033                       -1, $result$$Register, StrIntrinsicNode::LL);
15034   %}
15035   ins_pipe(pipe_class_memory);
15036 %}
15037 
15038 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15039        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15040 %{
15041   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15042   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15043   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15044          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15045   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15046 
15047   ins_encode %{
15048     __ string_indexof($str1$$Register, $str2$$Register,
15049                       $cnt1$$Register, $cnt2$$Register,
15050                       $tmp1$$Register, $tmp2$$Register,
15051                       $tmp3$$Register, $tmp4$$Register,
15052                       -1, $result$$Register, StrIntrinsicNode::UL);
15053   %}
15054   ins_pipe(pipe_class_memory);
15055 %}
15056 
15057 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15058        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15059 %{
15060   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15061   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15062   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15063          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15064   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
15065 
15066   ins_encode %{
15067     __ string_indexof($str1$$Register, $str2$$Register,
15068                       $cnt1$$Register, $cnt2$$Register,
15069                       $tmp1$$Register, $tmp2$$Register,
15070                       $tmp3$$Register, $tmp4$$Register,
15071                       -1, $result$$Register, StrIntrinsicNode::LU);
15072   %}
15073   ins_pipe(pipe_class_memory);
15074 %}
15075 
15076 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15077                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15078                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15079 %{
15080   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15081   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15082   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15083          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15084   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15085 
15086   ins_encode %{
15087     int icnt2 = (int)$int_cnt2$$constant;
15088     __ string_indexof($str1$$Register, $str2$$Register,
15089                       $cnt1$$Register, zr,
15090                       $tmp1$$Register, $tmp2$$Register,
15091                       $tmp3$$Register, $tmp4$$Register,
15092                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15093   %}
15094   ins_pipe(pipe_class_memory);
15095 %}
15096 
15097 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15098                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15099                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15100 %{
15101   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15102   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15103   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15104          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15105   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15106 
15107   ins_encode %{
15108     int icnt2 = (int)$int_cnt2$$constant;
15109     __ string_indexof($str1$$Register, $str2$$Register,
15110                       $cnt1$$Register, zr,
15111                       $tmp1$$Register, $tmp2$$Register,
15112                       $tmp3$$Register, $tmp4$$Register,
15113                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15114   %}
15115   ins_pipe(pipe_class_memory);
15116 %}
15117 
15118 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15119                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15120                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15121 %{
15122   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15123   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15124   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15125          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15126   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15127 
15128   ins_encode %{
15129     int icnt2 = (int)$int_cnt2$$constant;
15130     __ string_indexof($str1$$Register, $str2$$Register,
15131                       $cnt1$$Register, zr,
15132                       $tmp1$$Register, $tmp2$$Register,
15133                       $tmp3$$Register, $tmp4$$Register,
15134                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15135   %}
15136   ins_pipe(pipe_class_memory);
15137 %}
15138 
15139 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15140                  immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15141                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15142 %{
15143   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15144   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15145   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15146          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15147   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
15148 
15149   ins_encode %{
15150     int icnt2 = (int)$int_cnt2$$constant;
15151     __ string_indexof($str1$$Register, $str2$$Register,
15152                       $cnt1$$Register, zr,
15153                       $tmp1$$Register, $tmp2$$Register,
15154                       $tmp3$$Register, $tmp4$$Register,
15155                       icnt2, $result$$Register, StrIntrinsicNode::LU);
15156   %}
15157   ins_pipe(pipe_class_memory);
15158 %}
15159 
15160 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15161                         iRegI_R0 result, rFlagsReg cr)
15162 %{
15163   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15164   match(Set result (StrEquals (Binary str1 str2) cnt));
15165   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15166 
15167   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15168   ins_encode %{
15169     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15170     __ arrays_equals($str1$$Register, $str2$$Register,
15171                      $result$$Register, $cnt$$Register,
15172                      1, /*is_string*/true);
15173   %}
15174   ins_pipe(pipe_class_memory);
15175 %}
15176 
15177 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15178                         iRegI_R0 result, rFlagsReg cr)
15179 %{
15180   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15181   match(Set result (StrEquals (Binary str1 str2) cnt));
15182   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15183 
15184   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15185   ins_encode %{
15186     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15187     __ asrw($cnt$$Register, $cnt$$Register, 1);
15188     __ arrays_equals($str1$$Register, $str2$$Register,
15189                      $result$$Register, $cnt$$Register,
15190                      2, /*is_string*/true);
15191   %}
15192   ins_pipe(pipe_class_memory);
15193 %}
15194 
15195 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15196                       iRegP_R10 tmp, rFlagsReg cr)
15197 %{
15198   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15199   match(Set result (AryEq ary1 ary2));
15200   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15201 
15202   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15203   ins_encode %{
15204     __ arrays_equals($ary1$$Register, $ary2$$Register,
15205                      $result$$Register, $tmp$$Register,
15206                      1, /*is_string*/false);
15207     %}
15208   ins_pipe(pipe_class_memory);
15209 %}
15210 
15211 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15212                       iRegP_R10 tmp, rFlagsReg cr)
15213 %{
15214   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15215   match(Set result (AryEq ary1 ary2));
15216   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15217 
15218   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15219   ins_encode %{
15220     __ arrays_equals($ary1$$Register, $ary2$$Register,
15221                      $result$$Register, $tmp$$Register,
15222                      2, /*is_string*/false);
15223   %}
15224   ins_pipe(pipe_class_memory);
15225 %}
15226 
15227 
15228 // fast char[] to byte[] compression
15229 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15230                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15231                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15232                          iRegI_R0 result, rFlagsReg cr)
15233 %{
15234   match(Set result (StrCompressedCopy src (Binary dst len)));
15235   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15236 
15237   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15238   ins_encode %{
15239     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15240                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15241                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15242                            $result$$Register);
15243   %}
15244   ins_pipe( pipe_slow );
15245 %}
15246 
15247 // fast byte[] to char[] inflation
15248 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15249                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15250 %{
15251   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15252   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15253 
15254   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15255   ins_encode %{
15256     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15257                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15258   %}
15259   ins_pipe(pipe_class_memory);
15260 %}
15261 
15262 // encode char[] to byte[] in ISO_8859_1
15263 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15264                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15265                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15266                           iRegI_R0 result, rFlagsReg cr)
15267 %{
15268   match(Set result (EncodeISOArray src (Binary dst len)));
15269   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15270          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15271 
15272   format %{ "Encode array $src,$dst,$len -> $result" %}
15273   ins_encode %{
15274     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15275          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15276          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15277   %}
15278   ins_pipe( pipe_class_memory );
15279 %}
15280 
15281 // ============================================================================
15282 // This name is KNOWN by the ADLC and cannot be changed.
15283 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15284 // for this guy.
15285 instruct tlsLoadP(thread_RegP dst)
15286 %{
15287   match(Set dst (ThreadLocal));
15288 
15289   ins_cost(0);
15290 
15291   format %{ " -- \t// $dst=Thread::current(), empty" %}
15292 
15293   size(0);
15294 
15295   ins_encode( /*empty*/ );
15296 
15297   ins_pipe(pipe_class_empty);
15298 %}
15299 
15300 // ====================VECTOR INSTRUCTIONS=====================================
15301 
15302 // Load vector (32 bits)
15303 instruct loadV4(vecD dst, vmem4 mem)
15304 %{
15305   predicate(n->as_LoadVector()->memory_size() == 4);
15306   match(Set dst (LoadVector mem));
15307   ins_cost(4 * INSN_COST);
15308   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15309   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15310   ins_pipe(vload_reg_mem64);
15311 %}
15312 
15313 // Load vector (64 bits)
15314 instruct loadV8(vecD dst, vmem8 mem)
15315 %{
15316   predicate(n->as_LoadVector()->memory_size() == 8);
15317   match(Set dst (LoadVector mem));
15318   ins_cost(4 * INSN_COST);
15319   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15320   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15321   ins_pipe(vload_reg_mem64);
15322 %}
15323 
15324 // Load Vector (128 bits)
15325 instruct loadV16(vecX dst, vmem16 mem)
15326 %{
15327   predicate(n->as_LoadVector()->memory_size() == 16);
15328   match(Set dst (LoadVector mem));
15329   ins_cost(4 * INSN_COST);
15330   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15331   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15332   ins_pipe(vload_reg_mem128);
15333 %}
15334 
15335 // Store Vector (32 bits)
15336 instruct storeV4(vecD src, vmem4 mem)
15337 %{
15338   predicate(n->as_StoreVector()->memory_size() == 4);
15339   match(Set mem (StoreVector mem src));
15340   ins_cost(4 * INSN_COST);
15341   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15342   ins_encode( aarch64_enc_strvS(src, mem) );
15343   ins_pipe(vstore_reg_mem64);
15344 %}
15345 
15346 // Store Vector (64 bits)
15347 instruct storeV8(vecD src, vmem8 mem)
15348 %{
15349   predicate(n->as_StoreVector()->memory_size() == 8);
15350   match(Set mem (StoreVector mem src));
15351   ins_cost(4 * INSN_COST);
15352   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15353   ins_encode( aarch64_enc_strvD(src, mem) );
15354   ins_pipe(vstore_reg_mem64);
15355 %}
15356 
15357 // Store Vector (128 bits)
15358 instruct storeV16(vecX src, vmem16 mem)
15359 %{
15360   predicate(n->as_StoreVector()->memory_size() == 16);
15361   match(Set mem (StoreVector mem src));
15362   ins_cost(4 * INSN_COST);
15363   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15364   ins_encode( aarch64_enc_strvQ(src, mem) );
15365   ins_pipe(vstore_reg_mem128);
15366 %}
15367 
15368 instruct replicate8B(vecD dst, iRegIorL2I src)
15369 %{
15370   predicate(n->as_Vector()->length() == 4 ||
15371             n->as_Vector()->length() == 8);
15372   match(Set dst (ReplicateB src));
15373   ins_cost(INSN_COST);
15374   format %{ "dup  $dst, $src\t# vector (8B)" %}
15375   ins_encode %{
15376     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15377   %}
15378   ins_pipe(vdup_reg_reg64);
15379 %}
15380 
15381 instruct replicate16B(vecX dst, iRegIorL2I src)
15382 %{
15383   predicate(n->as_Vector()->length() == 16);
15384   match(Set dst (ReplicateB src));
15385   ins_cost(INSN_COST);
15386   format %{ "dup  $dst, $src\t# vector (16B)" %}
15387   ins_encode %{
15388     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15389   %}
15390   ins_pipe(vdup_reg_reg128);
15391 %}
15392 
15393 instruct replicate8B_imm(vecD dst, immI con)
15394 %{
15395   predicate(n->as_Vector()->length() == 4 ||
15396             n->as_Vector()->length() == 8);
15397   match(Set dst (ReplicateB con));
15398   ins_cost(INSN_COST);
15399   format %{ "movi  $dst, $con\t# vector(8B)" %}
15400   ins_encode %{
15401     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15402   %}
15403   ins_pipe(vmovi_reg_imm64);
15404 %}
15405 
15406 instruct replicate16B_imm(vecX dst, immI con)
15407 %{
15408   predicate(n->as_Vector()->length() == 16);
15409   match(Set dst (ReplicateB con));
15410   ins_cost(INSN_COST);
15411   format %{ "movi  $dst, $con\t# vector(16B)" %}
15412   ins_encode %{
15413     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15414   %}
15415   ins_pipe(vmovi_reg_imm128);
15416 %}
15417 
15418 instruct replicate4S(vecD dst, iRegIorL2I src)
15419 %{
15420   predicate(n->as_Vector()->length() == 2 ||
15421             n->as_Vector()->length() == 4);
15422   match(Set dst (ReplicateS src));
15423   ins_cost(INSN_COST);
15424   format %{ "dup  $dst, $src\t# vector (4S)" %}
15425   ins_encode %{
15426     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15427   %}
15428   ins_pipe(vdup_reg_reg64);
15429 %}
15430 
15431 instruct replicate8S(vecX dst, iRegIorL2I src)
15432 %{
15433   predicate(n->as_Vector()->length() == 8);
15434   match(Set dst (ReplicateS src));
15435   ins_cost(INSN_COST);
15436   format %{ "dup  $dst, $src\t# vector (8S)" %}
15437   ins_encode %{
15438     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15439   %}
15440   ins_pipe(vdup_reg_reg128);
15441 %}
15442 
15443 instruct replicate4S_imm(vecD dst, immI con)
15444 %{
15445   predicate(n->as_Vector()->length() == 2 ||
15446             n->as_Vector()->length() == 4);
15447   match(Set dst (ReplicateS con));
15448   ins_cost(INSN_COST);
15449   format %{ "movi  $dst, $con\t# vector(4H)" %}
15450   ins_encode %{
15451     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15452   %}
15453   ins_pipe(vmovi_reg_imm64);
15454 %}
15455 
15456 instruct replicate8S_imm(vecX dst, immI con)
15457 %{
15458   predicate(n->as_Vector()->length() == 8);
15459   match(Set dst (ReplicateS con));
15460   ins_cost(INSN_COST);
15461   format %{ "movi  $dst, $con\t# vector(8H)" %}
15462   ins_encode %{
15463     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15464   %}
15465   ins_pipe(vmovi_reg_imm128);
15466 %}
15467 
15468 instruct replicate2I(vecD dst, iRegIorL2I src)
15469 %{
15470   predicate(n->as_Vector()->length() == 2);
15471   match(Set dst (ReplicateI src));
15472   ins_cost(INSN_COST);
15473   format %{ "dup  $dst, $src\t# vector (2I)" %}
15474   ins_encode %{
15475     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15476   %}
15477   ins_pipe(vdup_reg_reg64);
15478 %}
15479 
15480 instruct replicate4I(vecX dst, iRegIorL2I src)
15481 %{
15482   predicate(n->as_Vector()->length() == 4);
15483   match(Set dst (ReplicateI src));
15484   ins_cost(INSN_COST);
15485   format %{ "dup  $dst, $src\t# vector (4I)" %}
15486   ins_encode %{
15487     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15488   %}
15489   ins_pipe(vdup_reg_reg128);
15490 %}
15491 
15492 instruct replicate2I_imm(vecD dst, immI con)
15493 %{
15494   predicate(n->as_Vector()->length() == 2);
15495   match(Set dst (ReplicateI con));
15496   ins_cost(INSN_COST);
15497   format %{ "movi  $dst, $con\t# vector(2I)" %}
15498   ins_encode %{
15499     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15500   %}
15501   ins_pipe(vmovi_reg_imm64);
15502 %}
15503 
15504 instruct replicate4I_imm(vecX dst, immI con)
15505 %{
15506   predicate(n->as_Vector()->length() == 4);
15507   match(Set dst (ReplicateI con));
15508   ins_cost(INSN_COST);
15509   format %{ "movi  $dst, $con\t# vector(4I)" %}
15510   ins_encode %{
15511     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15512   %}
15513   ins_pipe(vmovi_reg_imm128);
15514 %}
15515 
15516 instruct replicate2L(vecX dst, iRegL src)
15517 %{
15518   predicate(n->as_Vector()->length() == 2);
15519   match(Set dst (ReplicateL src));
15520   ins_cost(INSN_COST);
15521   format %{ "dup  $dst, $src\t# vector (2L)" %}
15522   ins_encode %{
15523     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15524   %}
15525   ins_pipe(vdup_reg_reg128);
15526 %}
15527 
15528 instruct replicate2L_zero(vecX dst, immI0 zero)
15529 %{
15530   predicate(n->as_Vector()->length() == 2);
15531   match(Set dst (ReplicateI zero));
15532   ins_cost(INSN_COST);
15533   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15534   ins_encode %{
15535     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15536            as_FloatRegister($dst$$reg),
15537            as_FloatRegister($dst$$reg));
15538   %}
15539   ins_pipe(vmovi_reg_imm128);
15540 %}
15541 
15542 instruct replicate2F(vecD dst, vRegF src)
15543 %{
15544   predicate(n->as_Vector()->length() == 2);
15545   match(Set dst (ReplicateF src));
15546   ins_cost(INSN_COST);
15547   format %{ "dup  $dst, $src\t# vector (2F)" %}
15548   ins_encode %{
15549     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15550            as_FloatRegister($src$$reg));
15551   %}
15552   ins_pipe(vdup_reg_freg64);
15553 %}
15554 
15555 instruct replicate4F(vecX dst, vRegF src)
15556 %{
15557   predicate(n->as_Vector()->length() == 4);
15558   match(Set dst (ReplicateF src));
15559   ins_cost(INSN_COST);
15560   format %{ "dup  $dst, $src\t# vector (4F)" %}
15561   ins_encode %{
15562     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15563            as_FloatRegister($src$$reg));
15564   %}
15565   ins_pipe(vdup_reg_freg128);
15566 %}
15567 
15568 instruct replicate2D(vecX dst, vRegD src)
15569 %{
15570   predicate(n->as_Vector()->length() == 2);
15571   match(Set dst (ReplicateD src));
15572   ins_cost(INSN_COST);
15573   format %{ "dup  $dst, $src\t# vector (2D)" %}
15574   ins_encode %{
15575     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15576            as_FloatRegister($src$$reg));
15577   %}
15578   ins_pipe(vdup_reg_dreg128);
15579 %}
15580 
15581 // ====================REDUCTION ARITHMETIC====================================
15582 
15583 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
15584 %{
15585   match(Set dst (AddReductionVI src1 src2));
15586   ins_cost(INSN_COST);
15587   effect(TEMP tmp, TEMP tmp2);
15588   format %{ "umov  $tmp, $src2, S, 0\n\t"
15589             "umov  $tmp2, $src2, S, 1\n\t"
15590             "addw  $dst, $src1, $tmp\n\t"
15591             "addw  $dst, $dst, $tmp2\t add reduction2i"
15592   %}
15593   ins_encode %{
15594     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15595     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15596     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15597     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15598   %}
15599   ins_pipe(pipe_class_default);
15600 %}
15601 
15602 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
15603 %{
15604   match(Set dst (AddReductionVI src1 src2));
15605   ins_cost(INSN_COST);
15606   effect(TEMP tmp, TEMP tmp2);
15607   format %{ "addv  $tmp, T4S, $src2\n\t"
15608             "umov  $tmp2, $tmp, S, 0\n\t"
15609             "addw  $dst, $tmp2, $src1\t add reduction4i"
15610   %}
15611   ins_encode %{
15612     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15613             as_FloatRegister($src2$$reg));
15614     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15615     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15616   %}
15617   ins_pipe(pipe_class_default);
15618 %}
15619 
15620 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
15621 %{
15622   match(Set dst (MulReductionVI src1 src2));
15623   ins_cost(INSN_COST);
15624   effect(TEMP tmp, TEMP dst);
15625   format %{ "umov  $tmp, $src2, S, 0\n\t"
15626             "mul   $dst, $tmp, $src1\n\t"
15627             "umov  $tmp, $src2, S, 1\n\t"
15628             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
15629   %}
15630   ins_encode %{
15631     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15632     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
15633     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15634     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
15635   %}
15636   ins_pipe(pipe_class_default);
15637 %}
15638 
15639 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
15640 %{
15641   match(Set dst (MulReductionVI src1 src2));
15642   ins_cost(INSN_COST);
15643   effect(TEMP tmp, TEMP tmp2, TEMP dst);
15644   format %{ "ins   $tmp, $src2, 0, 1\n\t"
15645             "mul   $tmp, $tmp, $src2\n\t"
15646             "umov  $tmp2, $tmp, S, 0\n\t"
15647             "mul   $dst, $tmp2, $src1\n\t"
15648             "umov  $tmp2, $tmp, S, 1\n\t"
15649             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
15650   %}
15651   ins_encode %{
15652     __ ins(as_FloatRegister($tmp$$reg), __ D,
15653            as_FloatRegister($src2$$reg), 0, 1);
15654     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
15655            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
15656     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15657     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
15658     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
15659     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
15660   %}
15661   ins_pipe(pipe_class_default);
15662 %}
15663 
15664 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15665 %{
15666   match(Set dst (AddReductionVF src1 src2));
15667   ins_cost(INSN_COST);
15668   effect(TEMP tmp, TEMP dst);
15669   format %{ "fadds $dst, $src1, $src2\n\t"
15670             "ins   $tmp, S, $src2, 0, 1\n\t"
15671             "fadds $dst, $dst, $tmp\t add reduction2f"
15672   %}
15673   ins_encode %{
15674     __ fadds(as_FloatRegister($dst$$reg),
15675              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15676     __ ins(as_FloatRegister($tmp$$reg), __ S,
15677            as_FloatRegister($src2$$reg), 0, 1);
15678     __ fadds(as_FloatRegister($dst$$reg),
15679              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15680   %}
15681   ins_pipe(pipe_class_default);
15682 %}
15683 
15684 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15685 %{
15686   match(Set dst (AddReductionVF src1 src2));
15687   ins_cost(INSN_COST);
15688   effect(TEMP tmp, TEMP dst);
15689   format %{ "fadds $dst, $src1, $src2\n\t"
15690             "ins   $tmp, S, $src2, 0, 1\n\t"
15691             "fadds $dst, $dst, $tmp\n\t"
15692             "ins   $tmp, S, $src2, 0, 2\n\t"
15693             "fadds $dst, $dst, $tmp\n\t"
15694             "ins   $tmp, S, $src2, 0, 3\n\t"
15695             "fadds $dst, $dst, $tmp\t add reduction4f"
15696   %}
15697   ins_encode %{
15698     __ fadds(as_FloatRegister($dst$$reg),
15699              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15700     __ ins(as_FloatRegister($tmp$$reg), __ S,
15701            as_FloatRegister($src2$$reg), 0, 1);
15702     __ fadds(as_FloatRegister($dst$$reg),
15703              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15704     __ ins(as_FloatRegister($tmp$$reg), __ S,
15705            as_FloatRegister($src2$$reg), 0, 2);
15706     __ fadds(as_FloatRegister($dst$$reg),
15707              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15708     __ ins(as_FloatRegister($tmp$$reg), __ S,
15709            as_FloatRegister($src2$$reg), 0, 3);
15710     __ fadds(as_FloatRegister($dst$$reg),
15711              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15712   %}
15713   ins_pipe(pipe_class_default);
15714 %}
15715 
15716 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15717 %{
15718   match(Set dst (MulReductionVF src1 src2));
15719   ins_cost(INSN_COST);
15720   effect(TEMP tmp, TEMP dst);
15721   format %{ "fmuls $dst, $src1, $src2\n\t"
15722             "ins   $tmp, S, $src2, 0, 1\n\t"
15723             "fmuls $dst, $dst, $tmp\t add reduction4f"
15724   %}
15725   ins_encode %{
15726     __ fmuls(as_FloatRegister($dst$$reg),
15727              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15728     __ ins(as_FloatRegister($tmp$$reg), __ S,
15729            as_FloatRegister($src2$$reg), 0, 1);
15730     __ fmuls(as_FloatRegister($dst$$reg),
15731              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15732   %}
15733   ins_pipe(pipe_class_default);
15734 %}
15735 
15736 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15737 %{
15738   match(Set dst (MulReductionVF src1 src2));
15739   ins_cost(INSN_COST);
15740   effect(TEMP tmp, TEMP dst);
15741   format %{ "fmuls $dst, $src1, $src2\n\t"
15742             "ins   $tmp, S, $src2, 0, 1\n\t"
15743             "fmuls $dst, $dst, $tmp\n\t"
15744             "ins   $tmp, S, $src2, 0, 2\n\t"
15745             "fmuls $dst, $dst, $tmp\n\t"
15746             "ins   $tmp, S, $src2, 0, 3\n\t"
15747             "fmuls $dst, $dst, $tmp\t add reduction4f"
15748   %}
15749   ins_encode %{
15750     __ fmuls(as_FloatRegister($dst$$reg),
15751              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15752     __ ins(as_FloatRegister($tmp$$reg), __ S,
15753            as_FloatRegister($src2$$reg), 0, 1);
15754     __ fmuls(as_FloatRegister($dst$$reg),
15755              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15756     __ ins(as_FloatRegister($tmp$$reg), __ S,
15757            as_FloatRegister($src2$$reg), 0, 2);
15758     __ fmuls(as_FloatRegister($dst$$reg),
15759              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15760     __ ins(as_FloatRegister($tmp$$reg), __ S,
15761            as_FloatRegister($src2$$reg), 0, 3);
15762     __ fmuls(as_FloatRegister($dst$$reg),
15763              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15764   %}
15765   ins_pipe(pipe_class_default);
15766 %}
15767 
15768 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15769 %{
15770   match(Set dst (AddReductionVD src1 src2));
15771   ins_cost(INSN_COST);
15772   effect(TEMP tmp, TEMP dst);
15773   format %{ "faddd $dst, $src1, $src2\n\t"
15774             "ins   $tmp, D, $src2, 0, 1\n\t"
15775             "faddd $dst, $dst, $tmp\t add reduction2d"
15776   %}
15777   ins_encode %{
15778     __ faddd(as_FloatRegister($dst$$reg),
15779              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15780     __ ins(as_FloatRegister($tmp$$reg), __ D,
15781            as_FloatRegister($src2$$reg), 0, 1);
15782     __ faddd(as_FloatRegister($dst$$reg),
15783              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15784   %}
15785   ins_pipe(pipe_class_default);
15786 %}
15787 
15788 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15789 %{
15790   match(Set dst (MulReductionVD src1 src2));
15791   ins_cost(INSN_COST);
15792   effect(TEMP tmp, TEMP dst);
15793   format %{ "fmuld $dst, $src1, $src2\n\t"
15794             "ins   $tmp, D, $src2, 0, 1\n\t"
15795             "fmuld $dst, $dst, $tmp\t add reduction2d"
15796   %}
15797   ins_encode %{
15798     __ fmuld(as_FloatRegister($dst$$reg),
15799              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15800     __ ins(as_FloatRegister($tmp$$reg), __ D,
15801            as_FloatRegister($src2$$reg), 0, 1);
15802     __ fmuld(as_FloatRegister($dst$$reg),
15803              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15804   %}
15805   ins_pipe(pipe_class_default);
15806 %}
15807 
15808 // ====================VECTOR ARITHMETIC=======================================
15809 
15810 // --------------------------------- ADD --------------------------------------
15811 
15812 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15813 %{
15814   predicate(n->as_Vector()->length() == 4 ||
15815             n->as_Vector()->length() == 8);
15816   match(Set dst (AddVB src1 src2));
15817   ins_cost(INSN_COST);
15818   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15819   ins_encode %{
15820     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15821             as_FloatRegister($src1$$reg),
15822             as_FloatRegister($src2$$reg));
15823   %}
15824   ins_pipe(vdop64);
15825 %}
15826 
15827 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15828 %{
15829   predicate(n->as_Vector()->length() == 16);
15830   match(Set dst (AddVB src1 src2));
15831   ins_cost(INSN_COST);
15832   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15833   ins_encode %{
15834     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15835             as_FloatRegister($src1$$reg),
15836             as_FloatRegister($src2$$reg));
15837   %}
15838   ins_pipe(vdop128);
15839 %}
15840 
15841 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15842 %{
15843   predicate(n->as_Vector()->length() == 2 ||
15844             n->as_Vector()->length() == 4);
15845   match(Set dst (AddVS src1 src2));
15846   ins_cost(INSN_COST);
15847   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15848   ins_encode %{
15849     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15850             as_FloatRegister($src1$$reg),
15851             as_FloatRegister($src2$$reg));
15852   %}
15853   ins_pipe(vdop64);
15854 %}
15855 
15856 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15857 %{
15858   predicate(n->as_Vector()->length() == 8);
15859   match(Set dst (AddVS src1 src2));
15860   ins_cost(INSN_COST);
15861   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15862   ins_encode %{
15863     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15864             as_FloatRegister($src1$$reg),
15865             as_FloatRegister($src2$$reg));
15866   %}
15867   ins_pipe(vdop128);
15868 %}
15869 
15870 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15871 %{
15872   predicate(n->as_Vector()->length() == 2);
15873   match(Set dst (AddVI src1 src2));
15874   ins_cost(INSN_COST);
15875   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15876   ins_encode %{
15877     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15878             as_FloatRegister($src1$$reg),
15879             as_FloatRegister($src2$$reg));
15880   %}
15881   ins_pipe(vdop64);
15882 %}
15883 
15884 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15885 %{
15886   predicate(n->as_Vector()->length() == 4);
15887   match(Set dst (AddVI src1 src2));
15888   ins_cost(INSN_COST);
15889   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15890   ins_encode %{
15891     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15892             as_FloatRegister($src1$$reg),
15893             as_FloatRegister($src2$$reg));
15894   %}
15895   ins_pipe(vdop128);
15896 %}
15897 
15898 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15899 %{
15900   predicate(n->as_Vector()->length() == 2);
15901   match(Set dst (AddVL src1 src2));
15902   ins_cost(INSN_COST);
15903   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15904   ins_encode %{
15905     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15906             as_FloatRegister($src1$$reg),
15907             as_FloatRegister($src2$$reg));
15908   %}
15909   ins_pipe(vdop128);
15910 %}
15911 
15912 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15913 %{
15914   predicate(n->as_Vector()->length() == 2);
15915   match(Set dst (AddVF src1 src2));
15916   ins_cost(INSN_COST);
15917   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15918   ins_encode %{
15919     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15920             as_FloatRegister($src1$$reg),
15921             as_FloatRegister($src2$$reg));
15922   %}
15923   ins_pipe(vdop_fp64);
15924 %}
15925 
15926 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15927 %{
15928   predicate(n->as_Vector()->length() == 4);
15929   match(Set dst (AddVF src1 src2));
15930   ins_cost(INSN_COST);
15931   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15932   ins_encode %{
15933     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15934             as_FloatRegister($src1$$reg),
15935             as_FloatRegister($src2$$reg));
15936   %}
15937   ins_pipe(vdop_fp128);
15938 %}
15939 
15940 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15941 %{
15942   match(Set dst (AddVD src1 src2));
15943   ins_cost(INSN_COST);
15944   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15945   ins_encode %{
15946     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15947             as_FloatRegister($src1$$reg),
15948             as_FloatRegister($src2$$reg));
15949   %}
15950   ins_pipe(vdop_fp128);
15951 %}
15952 
15953 // --------------------------------- SUB --------------------------------------
15954 
15955 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15956 %{
15957   predicate(n->as_Vector()->length() == 4 ||
15958             n->as_Vector()->length() == 8);
15959   match(Set dst (SubVB src1 src2));
15960   ins_cost(INSN_COST);
15961   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15962   ins_encode %{
15963     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15964             as_FloatRegister($src1$$reg),
15965             as_FloatRegister($src2$$reg));
15966   %}
15967   ins_pipe(vdop64);
15968 %}
15969 
15970 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15971 %{
15972   predicate(n->as_Vector()->length() == 16);
15973   match(Set dst (SubVB src1 src2));
15974   ins_cost(INSN_COST);
15975   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15976   ins_encode %{
15977     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15978             as_FloatRegister($src1$$reg),
15979             as_FloatRegister($src2$$reg));
15980   %}
15981   ins_pipe(vdop128);
15982 %}
15983 
15984 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15985 %{
15986   predicate(n->as_Vector()->length() == 2 ||
15987             n->as_Vector()->length() == 4);
15988   match(Set dst (SubVS src1 src2));
15989   ins_cost(INSN_COST);
15990   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15991   ins_encode %{
15992     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15993             as_FloatRegister($src1$$reg),
15994             as_FloatRegister($src2$$reg));
15995   %}
15996   ins_pipe(vdop64);
15997 %}
15998 
15999 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16000 %{
16001   predicate(n->as_Vector()->length() == 8);
16002   match(Set dst (SubVS src1 src2));
16003   ins_cost(INSN_COST);
16004   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
16005   ins_encode %{
16006     __ subv(as_FloatRegister($dst$$reg), __ T8H,
16007             as_FloatRegister($src1$$reg),
16008             as_FloatRegister($src2$$reg));
16009   %}
16010   ins_pipe(vdop128);
16011 %}
16012 
16013 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16014 %{
16015   predicate(n->as_Vector()->length() == 2);
16016   match(Set dst (SubVI src1 src2));
16017   ins_cost(INSN_COST);
16018   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16019   ins_encode %{
16020     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16021             as_FloatRegister($src1$$reg),
16022             as_FloatRegister($src2$$reg));
16023   %}
16024   ins_pipe(vdop64);
16025 %}
16026 
16027 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16028 %{
16029   predicate(n->as_Vector()->length() == 4);
16030   match(Set dst (SubVI src1 src2));
16031   ins_cost(INSN_COST);
16032   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16033   ins_encode %{
16034     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16035             as_FloatRegister($src1$$reg),
16036             as_FloatRegister($src2$$reg));
16037   %}
16038   ins_pipe(vdop128);
16039 %}
16040 
16041 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16042 %{
16043   predicate(n->as_Vector()->length() == 2);
16044   match(Set dst (SubVL src1 src2));
16045   ins_cost(INSN_COST);
16046   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16047   ins_encode %{
16048     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16049             as_FloatRegister($src1$$reg),
16050             as_FloatRegister($src2$$reg));
16051   %}
16052   ins_pipe(vdop128);
16053 %}
16054 
16055 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16056 %{
16057   predicate(n->as_Vector()->length() == 2);
16058   match(Set dst (SubVF src1 src2));
16059   ins_cost(INSN_COST);
16060   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16061   ins_encode %{
16062     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16063             as_FloatRegister($src1$$reg),
16064             as_FloatRegister($src2$$reg));
16065   %}
16066   ins_pipe(vdop_fp64);
16067 %}
16068 
16069 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16070 %{
16071   predicate(n->as_Vector()->length() == 4);
16072   match(Set dst (SubVF src1 src2));
16073   ins_cost(INSN_COST);
16074   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16075   ins_encode %{
16076     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16077             as_FloatRegister($src1$$reg),
16078             as_FloatRegister($src2$$reg));
16079   %}
16080   ins_pipe(vdop_fp128);
16081 %}
16082 
16083 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16084 %{
16085   predicate(n->as_Vector()->length() == 2);
16086   match(Set dst (SubVD src1 src2));
16087   ins_cost(INSN_COST);
16088   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16089   ins_encode %{
16090     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16091             as_FloatRegister($src1$$reg),
16092             as_FloatRegister($src2$$reg));
16093   %}
16094   ins_pipe(vdop_fp128);
16095 %}
16096 
16097 // --------------------------------- MUL --------------------------------------
16098 
16099 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16100 %{
16101   predicate(n->as_Vector()->length() == 2 ||
16102             n->as_Vector()->length() == 4);
16103   match(Set dst (MulVS src1 src2));
16104   ins_cost(INSN_COST);
16105   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16106   ins_encode %{
16107     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16108             as_FloatRegister($src1$$reg),
16109             as_FloatRegister($src2$$reg));
16110   %}
16111   ins_pipe(vmul64);
16112 %}
16113 
16114 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16115 %{
16116   predicate(n->as_Vector()->length() == 8);
16117   match(Set dst (MulVS src1 src2));
16118   ins_cost(INSN_COST);
16119   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16120   ins_encode %{
16121     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16122             as_FloatRegister($src1$$reg),
16123             as_FloatRegister($src2$$reg));
16124   %}
16125   ins_pipe(vmul128);
16126 %}
16127 
16128 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16129 %{
16130   predicate(n->as_Vector()->length() == 2);
16131   match(Set dst (MulVI src1 src2));
16132   ins_cost(INSN_COST);
16133   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16134   ins_encode %{
16135     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16136             as_FloatRegister($src1$$reg),
16137             as_FloatRegister($src2$$reg));
16138   %}
16139   ins_pipe(vmul64);
16140 %}
16141 
16142 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16143 %{
16144   predicate(n->as_Vector()->length() == 4);
16145   match(Set dst (MulVI src1 src2));
16146   ins_cost(INSN_COST);
16147   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16148   ins_encode %{
16149     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16150             as_FloatRegister($src1$$reg),
16151             as_FloatRegister($src2$$reg));
16152   %}
16153   ins_pipe(vmul128);
16154 %}
16155 
16156 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16157 %{
16158   predicate(n->as_Vector()->length() == 2);
16159   match(Set dst (MulVF src1 src2));
16160   ins_cost(INSN_COST);
16161   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16162   ins_encode %{
16163     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16164             as_FloatRegister($src1$$reg),
16165             as_FloatRegister($src2$$reg));
16166   %}
16167   ins_pipe(vmuldiv_fp64);
16168 %}
16169 
16170 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16171 %{
16172   predicate(n->as_Vector()->length() == 4);
16173   match(Set dst (MulVF src1 src2));
16174   ins_cost(INSN_COST);
16175   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16176   ins_encode %{
16177     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16178             as_FloatRegister($src1$$reg),
16179             as_FloatRegister($src2$$reg));
16180   %}
16181   ins_pipe(vmuldiv_fp128);
16182 %}
16183 
16184 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16185 %{
16186   predicate(n->as_Vector()->length() == 2);
16187   match(Set dst (MulVD src1 src2));
16188   ins_cost(INSN_COST);
16189   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16190   ins_encode %{
16191     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16192             as_FloatRegister($src1$$reg),
16193             as_FloatRegister($src2$$reg));
16194   %}
16195   ins_pipe(vmuldiv_fp128);
16196 %}
16197 
16198 // --------------------------------- MLA --------------------------------------
16199 
16200 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16201 %{
16202   predicate(n->as_Vector()->length() == 2 ||
16203             n->as_Vector()->length() == 4);
16204   match(Set dst (AddVS dst (MulVS src1 src2)));
16205   ins_cost(INSN_COST);
16206   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16207   ins_encode %{
16208     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16209             as_FloatRegister($src1$$reg),
16210             as_FloatRegister($src2$$reg));
16211   %}
16212   ins_pipe(vmla64);
16213 %}
16214 
16215 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16216 %{
16217   predicate(n->as_Vector()->length() == 8);
16218   match(Set dst (AddVS dst (MulVS src1 src2)));
16219   ins_cost(INSN_COST);
16220   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16221   ins_encode %{
16222     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16223             as_FloatRegister($src1$$reg),
16224             as_FloatRegister($src2$$reg));
16225   %}
16226   ins_pipe(vmla128);
16227 %}
16228 
16229 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16230 %{
16231   predicate(n->as_Vector()->length() == 2);
16232   match(Set dst (AddVI dst (MulVI src1 src2)));
16233   ins_cost(INSN_COST);
16234   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16235   ins_encode %{
16236     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16237             as_FloatRegister($src1$$reg),
16238             as_FloatRegister($src2$$reg));
16239   %}
16240   ins_pipe(vmla64);
16241 %}
16242 
16243 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16244 %{
16245   predicate(n->as_Vector()->length() == 4);
16246   match(Set dst (AddVI dst (MulVI src1 src2)));
16247   ins_cost(INSN_COST);
16248   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16249   ins_encode %{
16250     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16251             as_FloatRegister($src1$$reg),
16252             as_FloatRegister($src2$$reg));
16253   %}
16254   ins_pipe(vmla128);
16255 %}
16256 
16257 // --------------------------------- MLS --------------------------------------
16258 
16259 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16260 %{
16261   predicate(n->as_Vector()->length() == 2 ||
16262             n->as_Vector()->length() == 4);
16263   match(Set dst (SubVS dst (MulVS src1 src2)));
16264   ins_cost(INSN_COST);
16265   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16266   ins_encode %{
16267     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16268             as_FloatRegister($src1$$reg),
16269             as_FloatRegister($src2$$reg));
16270   %}
16271   ins_pipe(vmla64);
16272 %}
16273 
16274 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16275 %{
16276   predicate(n->as_Vector()->length() == 8);
16277   match(Set dst (SubVS dst (MulVS src1 src2)));
16278   ins_cost(INSN_COST);
16279   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16280   ins_encode %{
16281     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16282             as_FloatRegister($src1$$reg),
16283             as_FloatRegister($src2$$reg));
16284   %}
16285   ins_pipe(vmla128);
16286 %}
16287 
16288 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16289 %{
16290   predicate(n->as_Vector()->length() == 2);
16291   match(Set dst (SubVI dst (MulVI src1 src2)));
16292   ins_cost(INSN_COST);
16293   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16294   ins_encode %{
16295     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16296             as_FloatRegister($src1$$reg),
16297             as_FloatRegister($src2$$reg));
16298   %}
16299   ins_pipe(vmla64);
16300 %}
16301 
16302 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16303 %{
16304   predicate(n->as_Vector()->length() == 4);
16305   match(Set dst (SubVI dst (MulVI src1 src2)));
16306   ins_cost(INSN_COST);
16307   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16308   ins_encode %{
16309     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16310             as_FloatRegister($src1$$reg),
16311             as_FloatRegister($src2$$reg));
16312   %}
16313   ins_pipe(vmla128);
16314 %}
16315 
16316 // --------------------------------- DIV --------------------------------------
16317 
16318 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16319 %{
16320   predicate(n->as_Vector()->length() == 2);
16321   match(Set dst (DivVF src1 src2));
16322   ins_cost(INSN_COST);
16323   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16324   ins_encode %{
16325     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16326             as_FloatRegister($src1$$reg),
16327             as_FloatRegister($src2$$reg));
16328   %}
16329   ins_pipe(vmuldiv_fp64);
16330 %}
16331 
16332 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16333 %{
16334   predicate(n->as_Vector()->length() == 4);
16335   match(Set dst (DivVF src1 src2));
16336   ins_cost(INSN_COST);
16337   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16338   ins_encode %{
16339     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16340             as_FloatRegister($src1$$reg),
16341             as_FloatRegister($src2$$reg));
16342   %}
16343   ins_pipe(vmuldiv_fp128);
16344 %}
16345 
16346 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16347 %{
16348   predicate(n->as_Vector()->length() == 2);
16349   match(Set dst (DivVD src1 src2));
16350   ins_cost(INSN_COST);
16351   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16352   ins_encode %{
16353     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16354             as_FloatRegister($src1$$reg),
16355             as_FloatRegister($src2$$reg));
16356   %}
16357   ins_pipe(vmuldiv_fp128);
16358 %}
16359 
16360 // --------------------------------- SQRT -------------------------------------
16361 
16362 instruct vsqrt2D(vecX dst, vecX src)
16363 %{
16364   predicate(n->as_Vector()->length() == 2);
16365   match(Set dst (SqrtVD src));
16366   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16367   ins_encode %{
16368     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16369              as_FloatRegister($src$$reg));
16370   %}
16371   ins_pipe(vsqrt_fp128);
16372 %}
16373 
16374 // --------------------------------- ABS --------------------------------------
16375 
16376 instruct vabs2F(vecD dst, vecD src)
16377 %{
16378   predicate(n->as_Vector()->length() == 2);
16379   match(Set dst (AbsVF src));
16380   ins_cost(INSN_COST * 3);
16381   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16382   ins_encode %{
16383     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16384             as_FloatRegister($src$$reg));
16385   %}
16386   ins_pipe(vunop_fp64);
16387 %}
16388 
16389 instruct vabs4F(vecX dst, vecX src)
16390 %{
16391   predicate(n->as_Vector()->length() == 4);
16392   match(Set dst (AbsVF src));
16393   ins_cost(INSN_COST * 3);
16394   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16395   ins_encode %{
16396     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16397             as_FloatRegister($src$$reg));
16398   %}
16399   ins_pipe(vunop_fp128);
16400 %}
16401 
16402 instruct vabs2D(vecX dst, vecX src)
16403 %{
16404   predicate(n->as_Vector()->length() == 2);
16405   match(Set dst (AbsVD src));
16406   ins_cost(INSN_COST * 3);
16407   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16408   ins_encode %{
16409     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16410             as_FloatRegister($src$$reg));
16411   %}
16412   ins_pipe(vunop_fp128);
16413 %}
16414 
16415 // --------------------------------- NEG --------------------------------------
16416 
16417 instruct vneg2F(vecD dst, vecD src)
16418 %{
16419   predicate(n->as_Vector()->length() == 2);
16420   match(Set dst (NegVF src));
16421   ins_cost(INSN_COST * 3);
16422   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16423   ins_encode %{
16424     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16425             as_FloatRegister($src$$reg));
16426   %}
16427   ins_pipe(vunop_fp64);
16428 %}
16429 
16430 instruct vneg4F(vecX dst, vecX src)
16431 %{
16432   predicate(n->as_Vector()->length() == 4);
16433   match(Set dst (NegVF src));
16434   ins_cost(INSN_COST * 3);
16435   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16436   ins_encode %{
16437     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16438             as_FloatRegister($src$$reg));
16439   %}
16440   ins_pipe(vunop_fp128);
16441 %}
16442 
16443 instruct vneg2D(vecX dst, vecX src)
16444 %{
16445   predicate(n->as_Vector()->length() == 2);
16446   match(Set dst (NegVD src));
16447   ins_cost(INSN_COST * 3);
16448   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16449   ins_encode %{
16450     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16451             as_FloatRegister($src$$reg));
16452   %}
16453   ins_pipe(vunop_fp128);
16454 %}
16455 
16456 // --------------------------------- AND --------------------------------------
16457 
16458 instruct vand8B(vecD dst, vecD src1, vecD src2)
16459 %{
16460   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16461             n->as_Vector()->length_in_bytes() == 8);
16462   match(Set dst (AndV src1 src2));
16463   ins_cost(INSN_COST);
16464   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16465   ins_encode %{
16466     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16467             as_FloatRegister($src1$$reg),
16468             as_FloatRegister($src2$$reg));
16469   %}
16470   ins_pipe(vlogical64);
16471 %}
16472 
16473 instruct vand16B(vecX dst, vecX src1, vecX src2)
16474 %{
16475   predicate(n->as_Vector()->length_in_bytes() == 16);
16476   match(Set dst (AndV src1 src2));
16477   ins_cost(INSN_COST);
16478   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16479   ins_encode %{
16480     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16481             as_FloatRegister($src1$$reg),
16482             as_FloatRegister($src2$$reg));
16483   %}
16484   ins_pipe(vlogical128);
16485 %}
16486 
16487 // --------------------------------- OR ---------------------------------------
16488 
16489 instruct vor8B(vecD dst, vecD src1, vecD src2)
16490 %{
16491   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16492             n->as_Vector()->length_in_bytes() == 8);
16493   match(Set dst (OrV src1 src2));
16494   ins_cost(INSN_COST);
16495   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16496   ins_encode %{
16497     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16498             as_FloatRegister($src1$$reg),
16499             as_FloatRegister($src2$$reg));
16500   %}
16501   ins_pipe(vlogical64);
16502 %}
16503 
16504 instruct vor16B(vecX dst, vecX src1, vecX src2)
16505 %{
16506   predicate(n->as_Vector()->length_in_bytes() == 16);
16507   match(Set dst (OrV src1 src2));
16508   ins_cost(INSN_COST);
16509   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16510   ins_encode %{
16511     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16512             as_FloatRegister($src1$$reg),
16513             as_FloatRegister($src2$$reg));
16514   %}
16515   ins_pipe(vlogical128);
16516 %}
16517 
16518 // --------------------------------- XOR --------------------------------------
16519 
16520 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16521 %{
16522   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16523             n->as_Vector()->length_in_bytes() == 8);
16524   match(Set dst (XorV src1 src2));
16525   ins_cost(INSN_COST);
16526   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16527   ins_encode %{
16528     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16529             as_FloatRegister($src1$$reg),
16530             as_FloatRegister($src2$$reg));
16531   %}
16532   ins_pipe(vlogical64);
16533 %}
16534 
16535 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16536 %{
16537   predicate(n->as_Vector()->length_in_bytes() == 16);
16538   match(Set dst (XorV src1 src2));
16539   ins_cost(INSN_COST);
16540   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16541   ins_encode %{
16542     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16543             as_FloatRegister($src1$$reg),
16544             as_FloatRegister($src2$$reg));
16545   %}
16546   ins_pipe(vlogical128);
16547 %}
16548 
16549 // ------------------------------ Shift ---------------------------------------
16550 
16551 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
16552   match(Set dst (LShiftCntV cnt));
16553   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
16554   ins_encode %{
16555     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16556   %}
16557   ins_pipe(vdup_reg_reg128);
16558 %}
16559 
16560 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
16561 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
16562   match(Set dst (RShiftCntV cnt));
16563   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
16564   ins_encode %{
16565     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16566     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
16567   %}
16568   ins_pipe(vdup_reg_reg128);
16569 %}
16570 
16571 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
16572   predicate(n->as_Vector()->length() == 4 ||
16573             n->as_Vector()->length() == 8);
16574   match(Set dst (LShiftVB src shift));
16575   match(Set dst (RShiftVB src shift));
16576   ins_cost(INSN_COST);
16577   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16578   ins_encode %{
16579     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16580             as_FloatRegister($src$$reg),
16581             as_FloatRegister($shift$$reg));
16582   %}
16583   ins_pipe(vshift64);
16584 %}
16585 
16586 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16587   predicate(n->as_Vector()->length() == 16);
16588   match(Set dst (LShiftVB src shift));
16589   match(Set dst (RShiftVB src shift));
16590   ins_cost(INSN_COST);
16591   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16592   ins_encode %{
16593     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16594             as_FloatRegister($src$$reg),
16595             as_FloatRegister($shift$$reg));
16596   %}
16597   ins_pipe(vshift128);
16598 %}
16599 
16600 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
16601   predicate(n->as_Vector()->length() == 4 ||
16602             n->as_Vector()->length() == 8);
16603   match(Set dst (URShiftVB src shift));
16604   ins_cost(INSN_COST);
16605   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
16606   ins_encode %{
16607     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16608             as_FloatRegister($src$$reg),
16609             as_FloatRegister($shift$$reg));
16610   %}
16611   ins_pipe(vshift64);
16612 %}
16613 
16614 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
16615   predicate(n->as_Vector()->length() == 16);
16616   match(Set dst (URShiftVB src shift));
16617   ins_cost(INSN_COST);
16618   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
16619   ins_encode %{
16620     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16621             as_FloatRegister($src$$reg),
16622             as_FloatRegister($shift$$reg));
16623   %}
16624   ins_pipe(vshift128);
16625 %}
16626 
16627 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16628   predicate(n->as_Vector()->length() == 4 ||
16629             n->as_Vector()->length() == 8);
16630   match(Set dst (LShiftVB src shift));
16631   ins_cost(INSN_COST);
16632   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16633   ins_encode %{
16634     int sh = (int)$shift$$constant & 31;
16635     if (sh >= 8) {
16636       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16637              as_FloatRegister($src$$reg),
16638              as_FloatRegister($src$$reg));
16639     } else {
16640       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16641              as_FloatRegister($src$$reg), sh);
16642     }
16643   %}
16644   ins_pipe(vshift64_imm);
16645 %}
16646 
16647 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16648   predicate(n->as_Vector()->length() == 16);
16649   match(Set dst (LShiftVB src shift));
16650   ins_cost(INSN_COST);
16651   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16652   ins_encode %{
16653     int sh = (int)$shift$$constant & 31;
16654     if (sh >= 8) {
16655       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16656              as_FloatRegister($src$$reg),
16657              as_FloatRegister($src$$reg));
16658     } else {
16659       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16660              as_FloatRegister($src$$reg), sh);
16661     }
16662   %}
16663   ins_pipe(vshift128_imm);
16664 %}
16665 
16666 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16667   predicate(n->as_Vector()->length() == 4 ||
16668             n->as_Vector()->length() == 8);
16669   match(Set dst (RShiftVB src shift));
16670   ins_cost(INSN_COST);
16671   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16672   ins_encode %{
16673     int sh = (int)$shift$$constant & 31;
16674     if (sh >= 8) sh = 7;
16675     sh = -sh & 7;
16676     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16677            as_FloatRegister($src$$reg), sh);
16678   %}
16679   ins_pipe(vshift64_imm);
16680 %}
16681 
16682 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16683   predicate(n->as_Vector()->length() == 16);
16684   match(Set dst (RShiftVB src shift));
16685   ins_cost(INSN_COST);
16686   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16687   ins_encode %{
16688     int sh = (int)$shift$$constant & 31;
16689     if (sh >= 8) sh = 7;
16690     sh = -sh & 7;
16691     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16692            as_FloatRegister($src$$reg), sh);
16693   %}
16694   ins_pipe(vshift128_imm);
16695 %}
16696 
16697 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16698   predicate(n->as_Vector()->length() == 4 ||
16699             n->as_Vector()->length() == 8);
16700   match(Set dst (URShiftVB src shift));
16701   ins_cost(INSN_COST);
16702   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16703   ins_encode %{
16704     int sh = (int)$shift$$constant & 31;
16705     if (sh >= 8) {
16706       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16707              as_FloatRegister($src$$reg),
16708              as_FloatRegister($src$$reg));
16709     } else {
16710       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16711              as_FloatRegister($src$$reg), -sh & 7);
16712     }
16713   %}
16714   ins_pipe(vshift64_imm);
16715 %}
16716 
16717 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16718   predicate(n->as_Vector()->length() == 16);
16719   match(Set dst (URShiftVB src shift));
16720   ins_cost(INSN_COST);
16721   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16722   ins_encode %{
16723     int sh = (int)$shift$$constant & 31;
16724     if (sh >= 8) {
16725       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16726              as_FloatRegister($src$$reg),
16727              as_FloatRegister($src$$reg));
16728     } else {
16729       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16730              as_FloatRegister($src$$reg), -sh & 7);
16731     }
16732   %}
16733   ins_pipe(vshift128_imm);
16734 %}
16735 
16736 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
16737   predicate(n->as_Vector()->length() == 2 ||
16738             n->as_Vector()->length() == 4);
16739   match(Set dst (LShiftVS src shift));
16740   match(Set dst (RShiftVS src shift));
16741   ins_cost(INSN_COST);
16742   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16743   ins_encode %{
16744     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16745             as_FloatRegister($src$$reg),
16746             as_FloatRegister($shift$$reg));
16747   %}
16748   ins_pipe(vshift64);
16749 %}
16750 
16751 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16752   predicate(n->as_Vector()->length() == 8);
16753   match(Set dst (LShiftVS src shift));
16754   match(Set dst (RShiftVS src shift));
16755   ins_cost(INSN_COST);
16756   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16757   ins_encode %{
16758     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16759             as_FloatRegister($src$$reg),
16760             as_FloatRegister($shift$$reg));
16761   %}
16762   ins_pipe(vshift128);
16763 %}
16764 
16765 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
16766   predicate(n->as_Vector()->length() == 2 ||
16767             n->as_Vector()->length() == 4);
16768   match(Set dst (URShiftVS src shift));
16769   ins_cost(INSN_COST);
16770   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
16771   ins_encode %{
16772     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16773             as_FloatRegister($src$$reg),
16774             as_FloatRegister($shift$$reg));
16775   %}
16776   ins_pipe(vshift64);
16777 %}
16778 
16779 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
16780   predicate(n->as_Vector()->length() == 8);
16781   match(Set dst (URShiftVS src shift));
16782   ins_cost(INSN_COST);
16783   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
16784   ins_encode %{
16785     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16786             as_FloatRegister($src$$reg),
16787             as_FloatRegister($shift$$reg));
16788   %}
16789   ins_pipe(vshift128);
16790 %}
16791 
16792 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16793   predicate(n->as_Vector()->length() == 2 ||
16794             n->as_Vector()->length() == 4);
16795   match(Set dst (LShiftVS src shift));
16796   ins_cost(INSN_COST);
16797   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16798   ins_encode %{
16799     int sh = (int)$shift$$constant & 31;
16800     if (sh >= 16) {
16801       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16802              as_FloatRegister($src$$reg),
16803              as_FloatRegister($src$$reg));
16804     } else {
16805       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16806              as_FloatRegister($src$$reg), sh);
16807     }
16808   %}
16809   ins_pipe(vshift64_imm);
16810 %}
16811 
16812 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16813   predicate(n->as_Vector()->length() == 8);
16814   match(Set dst (LShiftVS src shift));
16815   ins_cost(INSN_COST);
16816   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16817   ins_encode %{
16818     int sh = (int)$shift$$constant & 31;
16819     if (sh >= 16) {
16820       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16821              as_FloatRegister($src$$reg),
16822              as_FloatRegister($src$$reg));
16823     } else {
16824       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16825              as_FloatRegister($src$$reg), sh);
16826     }
16827   %}
16828   ins_pipe(vshift128_imm);
16829 %}
16830 
16831 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16832   predicate(n->as_Vector()->length() == 2 ||
16833             n->as_Vector()->length() == 4);
16834   match(Set dst (RShiftVS src shift));
16835   ins_cost(INSN_COST);
16836   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16837   ins_encode %{
16838     int sh = (int)$shift$$constant & 31;
16839     if (sh >= 16) sh = 15;
16840     sh = -sh & 15;
16841     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16842            as_FloatRegister($src$$reg), sh);
16843   %}
16844   ins_pipe(vshift64_imm);
16845 %}
16846 
16847 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16848   predicate(n->as_Vector()->length() == 8);
16849   match(Set dst (RShiftVS src shift));
16850   ins_cost(INSN_COST);
16851   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16852   ins_encode %{
16853     int sh = (int)$shift$$constant & 31;
16854     if (sh >= 16) sh = 15;
16855     sh = -sh & 15;
16856     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16857            as_FloatRegister($src$$reg), sh);
16858   %}
16859   ins_pipe(vshift128_imm);
16860 %}
16861 
16862 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16863   predicate(n->as_Vector()->length() == 2 ||
16864             n->as_Vector()->length() == 4);
16865   match(Set dst (URShiftVS src shift));
16866   ins_cost(INSN_COST);
16867   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16868   ins_encode %{
16869     int sh = (int)$shift$$constant & 31;
16870     if (sh >= 16) {
16871       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16872              as_FloatRegister($src$$reg),
16873              as_FloatRegister($src$$reg));
16874     } else {
16875       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16876              as_FloatRegister($src$$reg), -sh & 15);
16877     }
16878   %}
16879   ins_pipe(vshift64_imm);
16880 %}
16881 
16882 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16883   predicate(n->as_Vector()->length() == 8);
16884   match(Set dst (URShiftVS src shift));
16885   ins_cost(INSN_COST);
16886   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16887   ins_encode %{
16888     int sh = (int)$shift$$constant & 31;
16889     if (sh >= 16) {
16890       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16891              as_FloatRegister($src$$reg),
16892              as_FloatRegister($src$$reg));
16893     } else {
16894       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16895              as_FloatRegister($src$$reg), -sh & 15);
16896     }
16897   %}
16898   ins_pipe(vshift128_imm);
16899 %}
16900 
16901 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
16902   predicate(n->as_Vector()->length() == 2);
16903   match(Set dst (LShiftVI src shift));
16904   match(Set dst (RShiftVI src shift));
16905   ins_cost(INSN_COST);
16906   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16907   ins_encode %{
16908     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16909             as_FloatRegister($src$$reg),
16910             as_FloatRegister($shift$$reg));
16911   %}
16912   ins_pipe(vshift64);
16913 %}
16914 
16915 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16916   predicate(n->as_Vector()->length() == 4);
16917   match(Set dst (LShiftVI src shift));
16918   match(Set dst (RShiftVI src shift));
16919   ins_cost(INSN_COST);
16920   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16921   ins_encode %{
16922     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16923             as_FloatRegister($src$$reg),
16924             as_FloatRegister($shift$$reg));
16925   %}
16926   ins_pipe(vshift128);
16927 %}
16928 
16929 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
16930   predicate(n->as_Vector()->length() == 2);
16931   match(Set dst (URShiftVI src shift));
16932   ins_cost(INSN_COST);
16933   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
16934   ins_encode %{
16935     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16936             as_FloatRegister($src$$reg),
16937             as_FloatRegister($shift$$reg));
16938   %}
16939   ins_pipe(vshift64);
16940 %}
16941 
16942 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
16943   predicate(n->as_Vector()->length() == 4);
16944   match(Set dst (URShiftVI src shift));
16945   ins_cost(INSN_COST);
16946   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
16947   ins_encode %{
16948     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16949             as_FloatRegister($src$$reg),
16950             as_FloatRegister($shift$$reg));
16951   %}
16952   ins_pipe(vshift128);
16953 %}
16954 
16955 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16956   predicate(n->as_Vector()->length() == 2);
16957   match(Set dst (LShiftVI src shift));
16958   ins_cost(INSN_COST);
16959   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16960   ins_encode %{
16961     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16962            as_FloatRegister($src$$reg),
16963            (int)$shift$$constant & 31);
16964   %}
16965   ins_pipe(vshift64_imm);
16966 %}
16967 
16968 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16969   predicate(n->as_Vector()->length() == 4);
16970   match(Set dst (LShiftVI src shift));
16971   ins_cost(INSN_COST);
16972   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
16973   ins_encode %{
16974     __ shl(as_FloatRegister($dst$$reg), __ T4S,
16975            as_FloatRegister($src$$reg),
16976            (int)$shift$$constant & 31);
16977   %}
16978   ins_pipe(vshift128_imm);
16979 %}
16980 
16981 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
16982   predicate(n->as_Vector()->length() == 2);
16983   match(Set dst (RShiftVI src shift));
16984   ins_cost(INSN_COST);
16985   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
16986   ins_encode %{
16987     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
16988             as_FloatRegister($src$$reg),
16989             -(int)$shift$$constant & 31);
16990   %}
16991   ins_pipe(vshift64_imm);
16992 %}
16993 
16994 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
16995   predicate(n->as_Vector()->length() == 4);
16996   match(Set dst (RShiftVI src shift));
16997   ins_cost(INSN_COST);
16998   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
16999   ins_encode %{
17000     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17001             as_FloatRegister($src$$reg),
17002             -(int)$shift$$constant & 31);
17003   %}
17004   ins_pipe(vshift128_imm);
17005 %}
17006 
17007 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17008   predicate(n->as_Vector()->length() == 2);
17009   match(Set dst (URShiftVI src shift));
17010   ins_cost(INSN_COST);
17011   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17012   ins_encode %{
17013     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17014             as_FloatRegister($src$$reg),
17015             -(int)$shift$$constant & 31);
17016   %}
17017   ins_pipe(vshift64_imm);
17018 %}
17019 
17020 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17021   predicate(n->as_Vector()->length() == 4);
17022   match(Set dst (URShiftVI src shift));
17023   ins_cost(INSN_COST);
17024   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17025   ins_encode %{
17026     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17027             as_FloatRegister($src$$reg),
17028             -(int)$shift$$constant & 31);
17029   %}
17030   ins_pipe(vshift128_imm);
17031 %}
17032 
17033 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17034   predicate(n->as_Vector()->length() == 2);
17035   match(Set dst (LShiftVL src shift));
17036   match(Set dst (RShiftVL src shift));
17037   ins_cost(INSN_COST);
17038   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17039   ins_encode %{
17040     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17041             as_FloatRegister($src$$reg),
17042             as_FloatRegister($shift$$reg));
17043   %}
17044   ins_pipe(vshift128);
17045 %}
17046 
17047 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17048   predicate(n->as_Vector()->length() == 2);
17049   match(Set dst (URShiftVL src shift));
17050   ins_cost(INSN_COST);
17051   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
17052   ins_encode %{
17053     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17054             as_FloatRegister($src$$reg),
17055             as_FloatRegister($shift$$reg));
17056   %}
17057   ins_pipe(vshift128);
17058 %}
17059 
17060 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17061   predicate(n->as_Vector()->length() == 2);
17062   match(Set dst (LShiftVL src shift));
17063   ins_cost(INSN_COST);
17064   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17065   ins_encode %{
17066     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17067            as_FloatRegister($src$$reg),
17068            (int)$shift$$constant & 63);
17069   %}
17070   ins_pipe(vshift128_imm);
17071 %}
17072 
17073 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17074   predicate(n->as_Vector()->length() == 2);
17075   match(Set dst (RShiftVL src shift));
17076   ins_cost(INSN_COST);
17077   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17078   ins_encode %{
17079     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17080             as_FloatRegister($src$$reg),
17081             -(int)$shift$$constant & 63);
17082   %}
17083   ins_pipe(vshift128_imm);
17084 %}
17085 
17086 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17087   predicate(n->as_Vector()->length() == 2);
17088   match(Set dst (URShiftVL src shift));
17089   ins_cost(INSN_COST);
17090   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17091   ins_encode %{
17092     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17093             as_FloatRegister($src$$reg),
17094             -(int)$shift$$constant & 63);
17095   %}
17096   ins_pipe(vshift128_imm);
17097 %}
17098 
17099 //----------PEEPHOLE RULES-----------------------------------------------------
17100 // These must follow all instruction definitions as they use the names
17101 // defined in the instructions definitions.
17102 //
17103 // peepmatch ( root_instr_name [preceding_instruction]* );
17104 //
17105 // peepconstraint %{
17106 // (instruction_number.operand_name relational_op instruction_number.operand_name
17107 //  [, ...] );
17108 // // instruction numbers are zero-based using left to right order in peepmatch
17109 //
17110 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17111 // // provide an instruction_number.operand_name for each operand that appears
17112 // // in the replacement instruction's match rule
17113 //
17114 // ---------VM FLAGS---------------------------------------------------------
17115 //
17116 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17117 //
17118 // Each peephole rule is given an identifying number starting with zero and
17119 // increasing by one in the order seen by the parser.  An individual peephole
17120 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17121 // on the command-line.
17122 //
17123 // ---------CURRENT LIMITATIONS----------------------------------------------
17124 //
17125 // Only match adjacent instructions in same basic block
17126 // Only equality constraints
17127 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17128 // Only one replacement instruction
17129 //
17130 // ---------EXAMPLE----------------------------------------------------------
17131 //
17132 // // pertinent parts of existing instructions in architecture description
17133 // instruct movI(iRegINoSp dst, iRegI src)
17134 // %{
17135 //   match(Set dst (CopyI src));
17136 // %}
17137 //
17138 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17139 // %{
17140 //   match(Set dst (AddI dst src));
17141 //   effect(KILL cr);
17142 // %}
17143 //
17144 // // Change (inc mov) to lea
17145 // peephole %{
17146 //   // increment preceeded by register-register move
17147 //   peepmatch ( incI_iReg movI );
17148 //   // require that the destination register of the increment
17149 //   // match the destination register of the move
17150 //   peepconstraint ( 0.dst == 1.dst );
17151 //   // construct a replacement instruction that sets
17152 //   // the destination to ( move's source register + one )
17153 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17154 // %}
17155 //
17156 
17157 // Implementation no longer uses movX instructions since
17158 // machine-independent system no longer uses CopyX nodes.
17159 //
17160 // peephole
17161 // %{
17162 //   peepmatch (incI_iReg movI);
17163 //   peepconstraint (0.dst == 1.dst);
17164 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17165 // %}
17166 
17167 // peephole
17168 // %{
17169 //   peepmatch (decI_iReg movI);
17170 //   peepconstraint (0.dst == 1.dst);
17171 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17172 // %}
17173 
17174 // peephole
17175 // %{
17176 //   peepmatch (addI_iReg_imm movI);
17177 //   peepconstraint (0.dst == 1.dst);
17178 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17179 // %}
17180 
17181 // peephole
17182 // %{
17183 //   peepmatch (incL_iReg movL);
17184 //   peepconstraint (0.dst == 1.dst);
17185 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17186 // %}
17187 
17188 // peephole
17189 // %{
17190 //   peepmatch (decL_iReg movL);
17191 //   peepconstraint (0.dst == 1.dst);
17192 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17193 // %}
17194 
17195 // peephole
17196 // %{
17197 //   peepmatch (addL_iReg_imm movL);
17198 //   peepconstraint (0.dst == 1.dst);
17199 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17200 // %}
17201 
17202 // peephole
17203 // %{
17204 //   peepmatch (addP_iReg_imm movP);
17205 //   peepconstraint (0.dst == 1.dst);
17206 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17207 // %}
17208 
17209 // // Change load of spilled value to only a spill
17210 // instruct storeI(memory mem, iRegI src)
17211 // %{
17212 //   match(Set mem (StoreI mem src));
17213 // %}
17214 //
17215 // instruct loadI(iRegINoSp dst, memory mem)
17216 // %{
17217 //   match(Set dst (LoadI mem));
17218 // %}
17219 //
17220 
17221 //----------SMARTSPILL RULES---------------------------------------------------
17222 // These must follow all instruction definitions as they use the names
17223 // defined in the instructions definitions.
17224 
17225 // Local Variables:
17226 // mode: c++
17227 // End: